From dee181e61467a9a1c73378690cbae2f814adcb02 Mon Sep 17 00:00:00 2001
From: daniel <mcdonaldd@unimelb.edu.au>
Date: Mon, 16 Nov 2015 22:46:41 +0100
Subject: [PATCH] documentation

Former-commit-id: 65107b2a39f1b3be65f0d1217c2ff8d4c1f1c02c [formerly 097ec3295e6096963a7871c8c94583eef0e7eb11]
Former-commit-id: 4b67d67200c6da11023318c1ac8bbabb80a51396
---
 corpkit/other.py           | 320 ++++++++-----------------------------
 corpkit/textprogressbar.py |   3 +-
 2 files changed, 67 insertions(+), 256 deletions(-)
diff --git a/corpkit/other.py b/corpkit/other.py
index c204ea75..ab1b31c0 100755
--- a/corpkit/other.py
+++ b/corpkit/other.py
@@ -149,16 +149,26 @@ def concprinter(df, kind = 'string', n = 100):
     print ''
 
 def save_result(interrogation, savename, savedir = 'saved_interrogations', print_info = True):
-    """Save an interrogation as pickle to *savedir*.
+    """
+    Save an interrogation as pickle to *savedir*.
+
+       >>> interro_interrogator(corpus, 'words', 'any')
+       >>> save_result(interro, 'savename')
+
+    will create saved_interrogations/savename.p
 
     :param interrogation: Corpus interrogation to save
     :type interrogation: corpkit interogation/edited result
+    
     :param savename: A name for the saved file
     :type savename: str
+    
     :param savedir: Relative path to directory in which to save file
     :type savedir: str
+    
     :param print_info: Show/hide stdout
     :type print_info: bool
+    
     :returns: None
     """
     import corpkit
@@ -246,14 +256,23 @@ def urlify(s):
         f.close()
 
 def load_result(savename, loaddir = 'saved_interrogations', only_concs = False):
-    """Load saved data into memory
+    """
+    Load saved data into memory:
+
+        >>> loaded = load_result('interro')
+
+    will load saved_interrogations/interro.p as loaded
 
     :param savename: Filename with or without extension
     :type savename: str
+    
     :param loaddir: Relative path to the directory containg *savename*
     :type loaddir: str
+    
     :param only_concs: Set to True if loading concordance lines
     :type only_concs: bool
+
+    :returns: loaded data
     """
     import corpkit
     import collections
@@ -348,98 +367,6 @@ def make_into_namedtuple(unpickled):
             outs[f.replace('.p', '')] = make_into_namedtuple(unpickled)
         return outs
 
-def report_display():
-    import corpkit
-    """Displays/downloads the risk report in Jupyter Notebook, 
-       depending on your browser settings"""
-    class PDF(object):
-        def __init__(self, pdf, size=(200,200)):
-            import corpkit
-            self.pdf = pdf
-            self.size = size
-        def _repr_html_(self):
-            import corpkit
-            return '<iframe src={0} width={1[0]} height={1[1]}></iframe>'.format(self.pdf, self.size)
-        def _repr_latex_(self):
-            import corpkit
-            return r'\includegraphics[width=1.0\textwidth]{{{0}}}'.format(self.pdf)
-    return PDF('report/risk_report.pdf',size=(800,650))
-
-def ipyconverter(inputfile, outextension):
-    import corpkit
-    """ipyconverter converts ipynb files to various formats.
-
-    This function calls a shell script, rather than using an API. 
-    The first argument is the ipynb file. 
-    The second argument is the file extension of the output format, which may be 'py', 'html', 'tex' or 'md'.
-
-    Example usage: ipyconverter('infile.ipynb', 'tex')
-
-    This creates a .tex file called infile-converted.tex
-    """
-    import os
-    if outextension == 'py':
-        outargument = '--to python ' # the trailing space is important!
-    if outextension == 'tex':
-        outargument = '--to latex '
-    if outextension == 'html':
-        outargument = '--to html '
-    if outextension == 'md':
-        outargument = '--to md '
-    outbasename = os.path.splitext(inputfile)[0]
-    output = outbasename + '-converted.' + outextension
-    shellscript = 'ipython nbconvert ' + outargument + inputfile + ' --stdout > ' + output
-    print "Shell command: " + shellscript
-    os.system(shellscript)
-
-def conv(inputfile, loadme = True):
-    import corpkit
-    """A .py to .ipynb converter that relies on old code from IPython.
-
-    You shouldn't use this: I only am while I'm on a deadline.
-    """
-    import os, sys
-    import pycon.current as nbf
-    import IPython
-    outbasename = os.path.splitext(inputfile)[0]
-    output = outbasename + '.ipynb'
-    badname = outbasename + '.nbconvert.ipynb'
-    print '\nConverting ' + inputfile + ' ---> ' + output + ' ...'
-    nb = nbf.read(open(inputfile, 'r'), 'py')
-    nbf.write(nb, open(output, 'w'), 'ipynb')
-    os.system('ipython nbconvert --to=notebook --nbformat=4 %s' % output)
-    os.system('mv %s %s' % (badname, output))
-    if loadme:
-        os.system('ipython notebook %s' % output)
-    #nbnew = open(output, 'r')
-    #IPython.nbformat.v4.convert.upgrade(nbnew, from_version=3, from_minor=0)
-    print 'Done!\n'
-
-def pytoipy(inputfile):
-    import corpkit
-    """A .py to .ipynb converter.
-
-    This function converts .py files to ipynb.
-    Comments in the .py file can be used to delimit cells, headings, etc. For example:
-
-    # <headingcell level=1>
-    # A heading 
-    # <markdowncell>
-    # *This text is in markdown*
-    # <codecell>
-    # print 'hello'
-
-    Example usage: pytoipy('filename.py')
-    """
-    import os
-    import IPython.nbformat.current as nbf
-    outbasename = os.path.splitext(inputfile)[0]
-    output = outbasename + '.ipynb'
-    print '\nConverting ' + inputfile + ' ---> ' + output + ' ...'
-    nb = nbf.read(open(inputfile, 'r'), 'py')
-    nbf.write(nb, open(output, 'w'), 'ipynb')
-    print 'Done!\n'
-
 def new_project(name, loc = '.', root = False):
     """Make a new project in ./*loc*
 
@@ -505,124 +432,20 @@ def resource_path(relative):
         time = strftime("%H:%M:%S", localtime())
         print '%s: New project created: "%s"' % (time, name)
 
-def searchtree(tree, query, options = ['-t', '-o']):
-    import corpkit
-    "Searches a tree with Tregex and returns matching terminals"
-    import os
-    from other import tregex_engine
-    from tests import check_dit
-    try:
-        get_ipython().getoutput()
-    except TypeError:
-        have_ipython = True
-    except NameError:
-        import subprocess
-        have_ipython = False
-    fo = open('tree.tmp',"w")
-    fo.write(tree + '\n')
-    fo.close()
-    result = tregex_engine(query = query, check_query = True)
-    result = tregex_engine(query = query, options = options, corpus = "tree.tmp")
-    os.remove("tree.tmp")
-    return result
-
-def quicktree(sentence):
-    import corpkit
-    """Parse a sentence and return a visual representation in IPython"""
-    import os
-    from nltk import Tree
-    from nltk.draw.util import CanvasFrame
-    from nltk.draw import TreeWidget
-    try:
-        from stat_parser import Parser
-    except:
-        raise ValueError('PyStatParser not found.')
-    try:
-        from IPython.display import display
-        from IPython.display import Image
-    except:
-        pass
-    try:
-        get_ipython().getoutput()
-    except TypeError:
-        have_ipython = True
-    except NameError:
-        import subprocess
-        have_ipython = False
-    parser = Parser()
-    parsed = parser.parse(sentence)
-    cf = CanvasFrame()
-    tc = TreeWidget(cf.canvas(),parsed)
-    cf.add_widget(tc,10,10) # (10,10) offsets
-    cf.print_to_file('tree.ps')
-    cf.destroy()
-    if have_ipython:
-        tregex_command = 'convert tree.ps tree.png'
-        result = get_ipython().getoutput(tregex_command)
-    else:
-        tregex_command = ["convert", "tree.ps", "tree.png"]
-        result = subprocess.check_output(tregex_command)    
-    os.remove("tree.ps")
-    return Image(filename='tree.png')
-    os.remove("tree.png")
-
-def multiquery(corpus, query, sort_by = 'total', quicksave = False):
-    import corpkit
-    """Creates a named tuple for a list of named queries to count.
-
-    Pass in something like:
-
-    [[u'NPs in corpus', r'NP'], [u'VPs in corpus', r'VP']]"""
-
-    import collections
-    import os
-    import pandas
-    import pandas as pd
-    from time import strftime, localtime
-    from interrogator import interrogator
-    from editor import editor
-
-    if quicksave:
-        savedir = 'saved_interrogations'
-        if not quicksave.endswith('.p'):
-            quicksave = quicksave + '.p'
-        fullpath = os.path.join(savedir, quicksave)
-        while os.path.isfile(fullpath):
-            selection = raw_input("\nSave error: %s already exists in %s.\n\nPick a new name: " % (savename, savedir))
-            if not selection.endswith('.p'):
-                selection = selection + '.p'
-                fullpath = os.path.join(savedir, selection)
-
-    results = []
-    for name, pattern in query:
-        result = interrogator(corpus, 'count', pattern)
-        result.totals.name = name # rename count
-        results.append(result.totals)
-    results = pd.concat(results, axis = 1)
-
-    results = editor(results, sort_by = sort_by, print_info = False, keep_stats = False)
-    time = strftime("%H:%M:%S", localtime())
-    print '%s: Finished! %d unique results, %d total.' % (time, len(results.results.columns), results.totals.sum())
-    if quicksave:
-        from other import save_result
-        save_result(results, quicksave)
-    return results
-
 def interroplot(path, query):
     """Demo function for interrogator/plotter.
 
-    1. Interrogates path with Tregex query, 
-    2. Gets relative frequencies
-    3. Plots the top seven results
+        1. Interrogates path with Tregex query, 
+        2. Gets relative frequencies
+        3. Plots the top seven results
 
-    :param path: Path to corpus
+    :param path: path to corpus
     :type path: str
+    
     :param query: Tregex query
     :type query: str
 
     """
-    
-    
     import corpkit
     from corpkit import interrogator, editor, plotter
     quickstart = interrogator(path, 'words', query, show = ['w'])
@@ -752,12 +575,27 @@ def tregex_engine(corpus = False,
                   root = False,
                   preserve_case = False,
                   **kwargs):
-    """This does a tregex query.
-    query: tregex query
-    options: list of tregex options
-    corpus: place to search
-    check_query: just make sure query ok
-    check_for_trees: find out if corpus contains parse trees"""
+    """
+    Run a Java Tregex query
+    
+    :param query: tregex query
+    :type query: str
+    
+    :param options: list of tregex options
+    :type options: list of strs -- ['-t', '-o']
+    
+    :param corpus: place to search
+    :type corpus: str
+    
+    :param check_query: just make sure query ok
+    :type check_query: bool
+    
+    :param check_for_trees: find out if corpus contains parse trees
+    :type check_for_trees: bool
+
+    :returns: list of search results
+
+    """
     import corpkit
     from other import add_corpkit_to_path
     add_corpkit_to_path()
@@ -1000,8 +838,17 @@ def find_wordnet_tag(tag):
     return res
 
 def load_all_results(data_dir = 'saved_interrogations', only_concs = False, **kwargs):
+    """
+    Load every saved interrogation in data_dir into a dict:
+
+        >>> r = load_all_results()
+
+    :param data_dir: path to saved data
+    :type data_dir: str
+
+    :returns: dict with filenames as keys
+    """
     import corpkit
-    """load every saved interrogation in data_dir into a dict"""
     import os
     import time
     from other import load_result
@@ -1058,8 +905,8 @@ def get_root_note(kwargs):
     return r
 
 def texify(series, n = 20, colname = 'Keyness', toptail = False, sort_by = False):
-    import corpkit
     """turn a series into a latex table"""
+    import corpkit
     import pandas as pd
     if sort_by:
         df = pd.DataFrame(series.order(ascending = False))
@@ -1090,7 +937,8 @@ def make_nltk_text(directory,
                    tagged = False, 
                    lemmatise = False, 
                    just_content_words = False):
-    """turn a lot of trees into an nltk style text"""
+    """
+    Turn a lot of trees into an nltk style text"""
     import nltk
     import os
     from other import tregex_engine
@@ -1141,41 +989,18 @@ def make_nltk_text(directory,
             textx[os.path.basename(name)] = t
     return textx
 
-def get_synonyms(word, pos = False):
-    import corpkit
-    import nltk
-    from nltk.corpus import wordnet
-    if pos:
-        syns = wordnet.synsets(word, pos = pos)
-    else:
-        syns = wordnet.synsets(word)
-    return list(set([l.name().replace('_', ' ').lower() for s in syns for l in s.lemmas()]))
-
-def synonym_dictmaker(df):
-    import corpkit
-    syn_dict = {}
-    text = make_nltk_text(d)
-    for w in list(df.columns):
-        if w not in syn_dict.keys() and w not in syn_dict.values():
-            wds = get_synonyms(w, pos = pos) + text.similar(w)[:10]
-            sel = raw_input('Enter the indexes to remove from this list of proposed synonyms, or type "exit" to quit:\n\n%s\n') % '\n'.join(wds)
-            if sel.startswith('e'):
-                return
-            for i in sel:
-                del wds[i]
-            for word in wds:
-                syn_dict[word] = w
-    return syn_dict
-
 def as_regex(lst, boundaries = 'w', case_sensitive = False, inverse = False):
     """Turns a wordlist into an uncompiled regular expression
 
     :param lst: A wordlist to convert
     :type lst: list
+
     :param boundaries:
     :type boundaries: str -- 'word'/'line'/'space'; tuple -- (leftboundary, rightboundary)
+    
     :param case_sensitive: Make regular expression case sensitive
     :type case_sensitive: bool
+    
     :param inverse: Make regular expression inverse matching
     :type inverse: bool
 
@@ -1212,17 +1037,7 @@ def as_regex(lst, boundaries = 'w', case_sensitive = False, inverse = False):
     else:
         inverser1 = r''
         inverser2 = r''
-    #if no_punctuation:
-    #    if not inverse:
-    #        # not needed
-    #        punct = r''
-    #    else:
-    #        punct = r'|[^A-Za-z0-9]+'
-    #else:
-    #    if not inverse:
-    #        punct = r''
-    #    else:
-    #        punct = r''
+
     if inverse:
         joinbit = r'%s|%s' % (boundary2, boundary1)
         return case + inverser1 + r'(' + boundary1 + joinbit.join(sorted(list(set([re.escape(w) for w in lst])))) + boundary2 + r')' + inverser2
@@ -1231,12 +1046,11 @@ def as_regex(lst, boundaries = 'w', case_sensitive = False, inverse = False):
 
 
 def show(lines, index, show = 'thread'):
-    import corpkit
     """show lines.ix[index][link] as frame"""
+    import corpkit
     url = lines.ix[index]['link'].replace('<a href=', '').replace('>link</a>', '')
     return HTML('<iframe src=%s width=1000 height=500></iframe>' % url)
 
-
 def add_corpkit_to_path():
     import sys
     import os
@@ -1264,9 +1078,6 @@ def add_nltk_data_to_nltk_path(**kwargs):
             if path_within_gui.replace('/nltk/', '/', 1) not in nltk.data.path:
                 nltk.data.path.append(path_within_gui.replace('/nltk/', '/', 1))
 
-    # very temporary! -- for using .py
-    #nltk.data.path.append('/users/daniel/work/corpkit/nltk_data')
-
 def get_gui_resource_dir():
     import inspect
     import os
@@ -1369,7 +1180,6 @@ def determine_datatype(path):
     elif mc == '.p':
         return 'tokens'
 
-
 def make_multi(interrogation, indexnames = None):    
     """
     make pd.multiindex version of an interrogation (for pandas geeks)
diff --git a/corpkit/textprogressbar.py b/corpkit/textprogressbar.py
index 10a6bd48..812d6541 100755
--- a/corpkit/textprogressbar.py
+++ b/corpkit/textprogressbar.py
@@ -1,7 +1,8 @@
 #!/usr/bin/python
 
 class TextProgressBar:
-    """a text progress bar for CLI operations"""
+    """a text progress bar for CLI operations
+       no need for user to call"""
     from time import localtime, strftime
     try:
         from IPython.display import display, clear_output