From 2e69ab14d8270fd87052ae73fe994271a9780dd1 Mon Sep 17 00:00:00 2001 From: daniel Date: Sat, 20 Feb 2016 23:59:33 +0100 Subject: [PATCH] fix corpora handling --- corpkit/corpus.py | 8 ++++---- corpkit/interrogation.py | 8 ++++---- corpkit/interrogator.py | 11 +++++------ 3 files changed, 13 insertions(+), 14 deletions(-) diff --git a/corpkit/corpus.py b/corpkit/corpus.py index 1a27bf69..2bf2702d 100644 --- a/corpkit/corpus.py +++ b/corpkit/corpus.py @@ -61,7 +61,7 @@ def _lazyprop(self): return getattr(self, attr_name) return _lazyprop -class Corpus(object): +class Corpus: """ A class representing a linguistic text corpus, which contains files, optionally within subcorpus folders. @@ -663,17 +663,17 @@ def __next__(self): # Python 3: def __next__(self) def interrogate(self, *args, **kwargs): """Interrogate the corpus using :func:`~corpkit.corpus.Corpus.interrogate`""" from interrogator import interrogator - return interrogator([s for s in self], *args, **kwargs) + return interrogator(self, *args, **kwargs) def concordance(self, *args, **kwargs): """Concordance the corpus using :func:`~corpkit.corpus.Corpus.concordance`""" from interrogator import interrogator - return interrogator([s for s in self], do_concordancing = 'only', *args, **kwargs) + return interrogator(self, do_concordancing = 'only', *args, **kwargs) def configurations(self, search, **kwargs): """Get a configuration using :func:`~corpkit.corpus.Corpus.configurations`""" from configurations import configurations - return configurations([s for s in self], search, **kwargs) + return configurations(self, search, **kwargs) from corpus import Datalist class Corpora(Datalist): diff --git a/corpkit/interrogation.py b/corpkit/interrogation.py index ac6abebf..355fc27f 100644 --- a/corpkit/interrogation.py +++ b/corpkit/interrogation.py @@ -379,10 +379,10 @@ def format(self, kind = 'string', n = 100, window = 35, columns = 'all', **kwarg """ Print concordance lines nicely, to string, LaTeX or CSV - :param kind: output format - :type kind: str (``'string'``/``'latex'``/``'csv'``) - :param n: Print first ``n`` lines only - :type n: int/'all' + :param kind: output format: `'string'`/`'latex'`/`'csv'` + :type kind: str + :param n: Print first `n` lines only + :type n: int/`'all'` :param window: how many characters to show to left and right :type window: int :param columns: which columns to show diff --git a/corpkit/interrogator.py b/corpkit/interrogator.py index fe6063fc..ef8d90a7 100644 --- a/corpkit/interrogator.py +++ b/corpkit/interrogator.py @@ -52,13 +52,12 @@ def interrogator(corpus, import corpkit from interrogation import Interrogation - from corpus import Datalist - from process import tregex_engine + from corpus import Datalist, Corpora, Corpus + from process import tregex_engine, get_deps import pandas as pd from pandas import DataFrame, Series from collections import Counter from other import as_regex - from process import get_deps from time import localtime, strftime from textprogressbar import TextProgressBar from process import animator @@ -94,9 +93,7 @@ def signal_handler(signal, frame): note = kwargs.get('note') # convert path to corpus object - from corpus import Corpus - - if corpus.__class__ != Corpus: + if corpus.__class__ not in [Corpus, Corpora]: if not multiprocess and not kwargs.get('outname'): corpus = Corpus(corpus, print_info = False) @@ -590,6 +587,8 @@ def plaintext_simple_search(pattern, plaintext_data, concordancing = False, **kw corpus = Corpus(corpus) if hasattr(corpus, '__iter__') and not im: im = True + if corpus.__class__ == Corpora: + im = True if not im and multiprocess: im = True