Merge pull request #3219 from purificant/code_linting

nltk · Dec 18, 2023 · bc32be0 · bc32be0
2 parents b718276 + a7788e9
commit bc32be0
Show file tree

Hide file tree

Showing 93 changed files with 153 additions and 229 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -11,9 +11,9 @@ repos:
     rev: v3.15.0
     hooks:
     - id: pyupgrade
-      args: ["--py37-plus"]
+      args: ["--py38-plus"]
   - repo: https://github.com/ambv/black
-    rev: 22.3.0
+    rev: 23.12.0
     hooks:
     - id: black
   - repo: local

diff --git a/nltk/app/chunkparser_app.py b/nltk/app/chunkparser_app.py
@@ -713,7 +713,7 @@ def _eval_demon(self):
         if self.normalized_grammar != self._eval_normalized_grammar:
             # Check if we've seen this grammar already.  If so, then
             # just use the old evaluation values.
-            for (g, p, r, f) in self._history:
+            for g, p, r, f in self._history:
                 if self.normalized_grammar == self.normalize_grammar(g):
                     self._history.append((g, p, r, f))
                     self._history_index = len(self._history) - 1
@@ -850,7 +850,7 @@ def _init_widgets(self, top):
             ).grid(column=i * 2 + 1, row=0)
         self.helptabs[self.HELP[0][0]].configure(font=self._font)
         self.helpbox.tag_config("elide", elide=True)
-        for (tag, params) in self.HELP_AUTOTAG:
+        for tag, params in self.HELP_AUTOTAG:
             self.helpbox.tag_config("tag-%s" % tag, **params)
         self.show_help(self.HELP[0][0])
 
@@ -1047,7 +1047,7 @@ def show_trace(self, *e):
     def show_help(self, tab):
         self.helpbox["state"] = "normal"
         self.helpbox.delete("1.0", "end")
-        for (name, tabstops, text) in self.HELP:
+        for name, tabstops, text in self.HELP:
             if name == tab:
                 text = text.replace(
                     "<<TAGSET>>",
@@ -1066,7 +1066,7 @@ def show_help(self, tab):
                 self.helpbox.config(tabs=tabstops)
                 self.helpbox.insert("1.0", text + "\n" * 20)
                 C = "1.0 + %d chars"
-                for (tag, params) in self.HELP_AUTOTAG:
+                for tag, params in self.HELP_AUTOTAG:
                     pattern = f"(?s)(<{tag}>)(.*?)(</{tag}>)"
                     for m in re.finditer(pattern, text):
                         self.helpbox.tag_add("elide", C % m.start(1), C % m.end(1))

diff --git a/nltk/app/wordnet_app.py b/nltk/app/wordnet_app.py
@@ -281,6 +281,7 @@ def run():
 # Main logic for wordnet browser.
 #
 
+
 # This is wrapped inside a function since wn is only available if the
 # WordNet corpus is installed.
 def _pos_tuples():
@@ -983,7 +984,7 @@ def app():
     server_mode = False
     help_mode = False
     logfilename = None
-    for (opt, value) in opts:
+    for opt, value in opts:
         if (opt == "-l") or (opt == "--logfile"):
             logfilename = str(value)
         elif (opt == "-p") or (opt == "--port"):

diff --git a/nltk/ccg/api.py b/nltk/ccg/api.py
@@ -125,7 +125,7 @@ def substitute(self, substitutions):
         """If there is a substitution corresponding to this variable,
         return the substituted category.
         """
-        for (var, cat) in substitutions:
+        for var, cat in substitutions:
             if var == self:
                 return cat
         return self
@@ -197,7 +197,7 @@ def substitute(self, subs):
         if not self.is_variable():
             return self
 
-        for (var, restrs) in subs:
+        for var, restrs in subs:
             if var == "_":
                 return Direction(self._dir, restrs)
         return self

diff --git a/nltk/ccg/chart.py b/nltk/ccg/chart.py
@@ -380,7 +380,7 @@ def printCCGDerivation(tree):
 
     # Construct a string with both the leaf word and corresponding
     # category aligned.
-    for (leaf, cat) in leafcats:
+    for leaf, cat in leafcats:
         str_cat = "%s" % cat
         nextlen = 2 + max(len(leaf), len(str_cat))
         lcatlen = (nextlen - len(str_cat)) // 2

diff --git a/nltk/ccg/combinator.py b/nltk/ccg/combinator.py
@@ -128,6 +128,7 @@ def __str__(self):
 
 # Predicates for function application.
 
+
 # Ensures the left functor takes an argument on the right
 def forwardOnly(left, right):
     return left.dir().is_forward()

diff --git a/nltk/chat/util.py b/nltk/chat/util.py
@@ -94,7 +94,7 @@ def respond(self, str):
         """
 
         # check each pattern
-        for (pattern, response) in self._pairs:
+        for pattern, response in self._pairs:
             match = pattern.match(str)
 
             # did the pattern match?

diff --git a/nltk/chunk/named_entity.py b/nltk/chunk/named_entity.py
@@ -140,7 +140,7 @@ def _tagged_to_parse(self, tagged_tokens):
         """
         sent = Tree("S", [])
 
-        for (tok, tag) in tagged_tokens:
+        for tok, tag in tagged_tokens:
             if tag == "O":
                 sent.append(tok)
             elif tag.startswith("B-"):
@@ -260,7 +260,7 @@ def subfunc(m):
     if fmt == "binary":
         i = 0
         toks = Tree("S", [])
-        for (s, e, typ) in sorted(entities):
+        for s, e, typ in sorted(entities):
             if s < i:
                 s = i  # Overlapping!  Deal with this better?
             if e <= s:
@@ -275,7 +275,7 @@ def subfunc(m):
     elif fmt == "multiclass":
         i = 0
         toks = Tree("S", [])
-        for (s, e, typ) in sorted(entities):
+        for s, e, typ in sorted(entities):
             if s < i:
                 s = i  # Overlapping!  Deal with this better?
             if e <= s:

diff --git a/nltk/chunk/regexp.py b/nltk/chunk/regexp.py
@@ -165,7 +165,6 @@ def to_chunkstruct(self, chunk_label="CHUNK"):
         index = 0
         piece_in_chunk = 0
         for piece in re.split("[{}]", self._str):
-
             # Find the list of tokens contained in this piece.
             length = piece.count("<")
             subsequence = self._pieces[index : index + length]

diff --git a/nltk/chunk/util.py b/nltk/chunk/util.py
@@ -457,7 +457,7 @@ def conlltags2tree(
     Convert the CoNLL IOB format to a tree.
     """
     tree = Tree(root_label, [])
-    for (word, postag, chunktag) in sentence:
+    for word, postag, chunktag in sentence:
         if chunktag is None:
             if strict:
                 raise ValueError("Bad conll tag sequence")
@@ -592,7 +592,6 @@ def ieerstr2tree(
 
 
 def demo():
-
     s = "[ Pierre/NNP Vinken/NNP ] ,/, [ 61/CD years/NNS ] old/JJ ,/, will/MD join/VB [ the/DT board/NN ] ./."
     import nltk
 

diff --git a/nltk/classify/decisiontree.py b/nltk/classify/decisiontree.py
@@ -108,7 +108,7 @@ def pseudocode(self, prefix="", depth=4):
         if self._fname is None:
             return f"{prefix}return {self._label!r}\n"
         s = ""
-        for (fval, result) in sorted(
+        for fval, result in sorted(
             self._decisions.items(),
             key=lambda item: (item[0] in [None, False, True], str(item[0]).lower()),
         ):

diff --git a/nltk/classify/maxent.py b/nltk/classify/maxent.py
@@ -146,13 +146,13 @@ def prob_classify(self, featureset):
 
             if self._logarithmic:
                 total = 0.0
-                for (f_id, f_val) in feature_vector:
+                for f_id, f_val in feature_vector:
                     total += self._weights[f_id] * f_val
                 prob_dict[label] = total
 
             else:
                 prod = 1.0
-                for (f_id, f_val) in feature_vector:
+                for f_id, f_val in feature_vector:
                     prod *= self._weights[f_id] ** f_val
                 prob_dict[label] = prod
 
@@ -182,7 +182,7 @@ def explain(self, featureset, columns=4):
             feature_vector.sort(
                 key=lambda fid__: abs(self._weights[fid__[0]]), reverse=True
             )
-            for (f_id, f_val) in feature_vector:
+            for f_id, f_val in feature_vector:
                 if self._logarithmic:
                     score = self._weights[f_id] * f_val
                 else:
@@ -598,18 +598,18 @@ def describe(self, f_id):
             self._inv_mapping
         except AttributeError:
             self._inv_mapping = [-1] * len(self._mapping)
-            for (info, i) in self._mapping.items():
+            for info, i in self._mapping.items():
                 self._inv_mapping[i] = info
 
         if f_id < len(self._mapping):
             (fname, fval, label) = self._inv_mapping[f_id]
             return f"{fname}=={fval!r} and label is {label!r}"
         elif self._alwayson and f_id in self._alwayson.values():
-            for (label, f_id2) in self._alwayson.items():
+            for label, f_id2 in self._alwayson.items():
                 if f_id == f_id2:
                     return "label is %r" % label
         elif self._unseen and f_id in self._unseen.values():
-            for (fname, f_id2) in self._unseen.items():
+            for fname, f_id2 in self._unseen.items():
                 if f_id == f_id2:
                     return "%s is unseen" % fname
         else:
@@ -655,14 +655,13 @@ def train(cls, train_toks, count_cutoff=0, labels=None, **options):
         seen_labels = set()  # The set of labels we've encountered
         count = defaultdict(int)  # maps (fname, fval) -> count
 
-        for (tok, label) in train_toks:
+        for tok, label in train_toks:
             if labels and label not in labels:
                 raise ValueError("Unexpected label %s" % label)
             seen_labels.add(label)
 
             # Record each of the features.
-            for (fname, fval) in tok.items():
-
+            for fname, fval in tok.items():
                 # If a count cutoff is given, then only add a joint
                 # feature once the corresponding (fname, fval, label)
                 # tuple exceeds that cutoff.
@@ -764,7 +763,7 @@ def labels(self):
         return self._labels
 
     def describe(self, fid):
-        for (feature, label) in self._mapping:
+        for feature, label in self._mapping:
             if self._mapping[(feature, label)] == fid:
                 return (feature, label)
 
@@ -780,11 +779,11 @@ def train(cls, train_toks, count_cutoff=0, labels=None, **options):
         # This gets read twice, so compute the values in case it's lazy.
         train_toks = list(train_toks)
 
-        for (featureset, label) in train_toks:
+        for featureset, label in train_toks:
             if label not in labels:
                 labels.append(label)
 
-        for (featureset, label) in train_toks:
+        for featureset, label in train_toks:
             for label in labels:
                 for feature in featureset:
                     if (feature, label) not in mapping:
@@ -939,18 +938,18 @@ def describe(self, f_id):
             self._inv_mapping
         except AttributeError:
             self._inv_mapping = [-1] * len(self._mapping)
-            for (info, i) in self._mapping.items():
+            for info, i in self._mapping.items():
                 self._inv_mapping[i] = info
 
         if f_id < len(self._mapping):
             (fname, fval, label) = self._inv_mapping[f_id]
             return f"{fname}=={fval!r} and label is {label!r}"
         elif self._alwayson and f_id in self._alwayson.values():
-            for (label, f_id2) in self._alwayson.items():
+            for label, f_id2 in self._alwayson.items():
                 if f_id == f_id2:
                     return "label is %r" % label
         elif self._unseen and f_id in self._unseen.values():
-            for (fname, f_id2) in self._unseen.items():
+            for fname, f_id2 in self._unseen.items():
                 if f_id == f_id2:
                     return "%s is unseen" % fname
         else:
@@ -999,13 +998,13 @@ def train(cls, train_toks, count_cutoff=0, labels=None, **options):
         seen_labels = set()  # The set of labels we've encountered
         count = defaultdict(int)  # maps (fname, fval) -> count
 
-        for (tok, label) in train_toks:
+        for tok, label in train_toks:
             if labels and label not in labels:
                 raise ValueError("Unexpected label %s" % label)
             seen_labels.add(label)
 
             # Record each of the features.
-            for (fname, fval) in tok.items():
+            for fname, fval in tok.items():
                 if type(fval) in (int, float):
                     fval = type(fval)
                 # If a count cutoff is given, then only add a joint
@@ -1128,7 +1127,7 @@ def calculate_empirical_fcount(train_toks, encoding):
     fcount = numpy.zeros(encoding.length(), "d")
 
     for tok, label in train_toks:
-        for (index, val) in encoding.encode(tok, label):
+        for index, val in encoding.encode(tok, label):
             fcount[index] += val
 
     return fcount
@@ -1141,7 +1140,7 @@ def calculate_estimated_fcount(classifier, train_toks, encoding):
         pdist = classifier.prob_classify(tok)
         for label in pdist.samples():
             prob = pdist.prob(label)
-            for (fid, fval) in encoding.encode(tok, label):
+            for fid, fval in encoding.encode(tok, label):
                 fcount[fid] += prob * fval
 
     return fcount
@@ -1364,7 +1363,7 @@ def calculate_deltas(
             # Find the number of active features
             nf = sum(val for (id, val) in feature_vector)
             # Update the A matrix
-            for (id, val) in feature_vector:
+            for id, val in feature_vector:
                 A[nfmap[nf], id] += dist.prob(label) * val
     A /= len(train_toks)
 
@@ -1402,6 +1401,7 @@ def calculate_deltas(
 # { Classifier Trainer: megam
 ######################################################################
 
+
 # [xx] possible extension: add support for using implicit file format;
 # this would need to put requirements on what encoding is used.  But
 # we may need this for other maxent classifier trainers that require

diff --git a/nltk/classify/megam.py b/nltk/classify/megam.py
@@ -146,7 +146,7 @@ def _write_megam_features(vector, stream, bernoulli):
         raise ValueError(
             "MEGAM classifier requires the use of an " "always-on feature."
         )
-    for (fid, fval) in vector:
+    for fid, fval in vector:
         if bernoulli:
             if fval == 1:
                 stream.write(" %s" % fid)

diff --git a/nltk/classify/naivebayes.py b/nltk/classify/naivebayes.py
@@ -109,7 +109,7 @@ def prob_classify(self, featureset):
 
         # Then add in the log probability of features given labels.
         for label in self._labels:
-            for (fname, fval) in featureset.items():
+            for fname, fval in featureset.items():
                 if (label, fname) in self._feature_probdist:
                     feature_probs = self._feature_probdist[label, fname]
                     logprob[label] += feature_probs.logprob(fval)
@@ -126,7 +126,7 @@ def show_most_informative_features(self, n=10):
         cpdist = self._feature_probdist
         print("Most Informative Features")
 
-        for (fname, fval) in self.most_informative_features(n):
+        for fname, fval in self.most_informative_features(n):
 
             def labelprob(l):
                 return cpdist[l, fname].prob(fval)
@@ -237,7 +237,7 @@ def train(cls, labeled_featuresets, estimator=ELEProbDist):
 
         # Create the P(fval|label, fname) distribution
         feature_probdist = {}
-        for ((label, fname), freqdist) in feature_freqdist.items():
+        for (label, fname), freqdist in feature_freqdist.items():
             probdist = estimator(freqdist, bins=len(feature_values[fname]))
             feature_probdist[label, fname] = probdist
 

diff --git a/nltk/classify/senna.py b/nltk/classify/senna.py
@@ -45,7 +45,6 @@
 
 
 class Senna(TaggerI):
-
     SUPPORTED_OPERATIONS = ["pos", "chk", "ner"]
 
     def __init__(self, senna_path, operations, encoding="utf-8"):

diff --git a/nltk/classify/textcat.py b/nltk/classify/textcat.py
@@ -45,7 +45,6 @@
 
 
 class TextCat:
-
     _corpus = None
     fingerprints = {}
     _START_CHAR = "<"