Misc spellings flagged by codespell (#4716)

Note the childs to children changes in Bio/Phylo/Consensus.py included fixing internal variable names only. Not applying all the catergories ==> categories fixes to the (deprecated) EMBOSS wrappers as some would be functional changes. Also no more LaTeX files (for spell checking)
biopython · May 1, 2024 · a47460e · a47460e
1 parent f6c6c6a
commit a47460e
Show file tree

Hide file tree

Showing 44 changed files with 86 additions and 91 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -74,7 +74,7 @@ repos:
     rev: v2.2.6
     hooks:
     -   id: codespell
-        files: \.(rst|md|tex)$
+        files: \.(rst|md)$
         args: [
             --ignore-regex,
             '(^|\W)([A-Z]{2,3})(\W|$)',

diff --git a/Bio/Align/__init__.py b/Bio/Align/__init__.py
@@ -4290,7 +4290,7 @@ def parse(source, fmt):
      - source - File or file-like object to read from, or filename as string.
      - fmt    - String describing the file format (case-insensitive).
 
-    Typical usage, opening a file to read in, and looping over the aligments:
+    Typical usage, opening a file to read in, and looping over the alignments:
 
     >>> from Bio import Align
     >>> filename = "Exonerate/exn_22_m_ner_cigar.exn"

diff --git a/Bio/Align/exonerate.py b/Bio/Align/exonerate.py
@@ -571,7 +571,7 @@ def _parse_vulgar(words):
                         % (query_step, target_step)
                     )
             elif operation == "N":  # Non-equivalenced (unaligned) region
-                operation = "U"  # 'N' is alread used for introns in SAM/BAM
+                operation = "U"  # 'N' is already used for introns in SAM/BAM
                 if target_step > 0:
                     ts += target_step
                     coordinates[0, i + 1] = ts

diff --git a/Bio/Alphabet/__init__.py b/Bio/Alphabet/__init__.py
@@ -8,7 +8,7 @@
 # package.
 """Alphabets were previously used to declare sequence type and letters (OBSOLETE).
 
-The design of Bio.Aphabet included a number of historic design choices
+The design of Bio.Alphabet included a number of historic design choices
 which, with the benefit of hindsight, were regretable. Bio.Alphabet was
 therefore removed from Biopython in release 1.78. Instead, the molecule type is
 included as an annotation on SeqRecords where appropriate.

diff --git a/Bio/Data/PDBData.py b/Bio/Data/PDBData.py
@@ -291,7 +291,7 @@
     "8MG": "G", "8OG": "G", "8PY": "G", "8AA": "G", "85Y": "U", "8OS": "G",
 }
 
-# Solvent accesibility scales
+# Solvent accessibility scales
 residue_sasa_scales = {
     # Ahmad: Ahmad et al. 2003 https://doi.org/10.1002/prot.10328
     "Ahmad": {

diff --git a/Bio/Emboss/Applications.py b/Bio/Emboss/Applications.py
@@ -358,9 +358,7 @@ def __init__(self, cmd="fdnadist", **kwargs):
             ),
             _Option(["-method", "method"], "sub. model [f,k,j,l,s]", is_required=True),
             _Option(["-gamma", "gamma"], "gamma [g, i,n]"),
-            _Option(
-                ["-ncategories", "ncategories"], "number of rate catergories (1-9)"
-            ),
+            _Option(["-ncategories", "ncategories"], "number of rate categories (1-9)"),
             _Option(["-rate", "rate"], "rate for each category"),
             _Option(
                 ["-categories", "categories"], "File of substitution rate categories"
@@ -371,7 +369,7 @@ def __init__(self, cmd="fdnadist", **kwargs):
             ),
             _Option(["-invarfrac", "invarfrac"], "proportoin of invariant sites"),
             _Option(["-ttratio", "ttratio"], "ts/tv ratio"),
-            _Option(["-freqsfrom", "freqsfrom"], "use emprical base freqs"),
+            _Option(["-freqsfrom", "freqsfrom"], "use empirical base freqs"),
             _Option(["-basefreq", "basefreq"], "specify basefreqs"),
             _Option(["-lower", "lower"], "lower triangle matrix (y/N)"),
         ]
@@ -579,9 +577,7 @@ def __init__(self, cmd="fprotdist", **kwargs):
                 filename=True,
                 is_required=True,
             ),
-            _Option(
-                ["-ncategories", "ncategories"], "number of rate catergories (1-9)"
-            ),
+            _Option(["-ncategories", "ncategories"], "number of rate categories (1-9)"),
             _Option(["-rate", "rate"], "rate for each category"),
             _Option(["-catergories", "catergories"], "file of rates"),
             _Option(["-weights", "weights"], "weights file"),
@@ -596,7 +592,7 @@ def __init__(self, cmd="fprotdist", **kwargs):
             ),
             _Option(["-aacateg", "aacateg"], "Choose the category to use [G,C,H]"),
             _Option(["-whichcode", "whichcode"], "genetic code [c,m,v,f,y]"),
-            _Option(["-ease", "ease"], "Pob change catergory (float between -0 and 1)"),
+            _Option(["-ease", "ease"], "Pob change category (float between -0 and 1)"),
             _Option(["-ttratio", "ttratio"], "Transition/transversion ratio (0-1)"),
             _Option(
                 ["-basefreq", "basefreq"], "DNA base frequencies (space separated list)"

diff --git a/Bio/File.py b/Bio/File.py
@@ -325,7 +325,7 @@ def __init__(
             self._build_index()
 
     def _load_index(self):
-        """Call from __init__ to re-use an existing index (PRIVATE)."""
+        """Call from __init__ to reuse an existing index (PRIVATE)."""
         index_filename = self._index_filename
         relative_path = self._relative_path
         filenames = self._filenames

diff --git a/Bio/HMM/MarkovModel.py b/Bio/HMM/MarkovModel.py
@@ -527,12 +527,12 @@ def get_blank_transitions(self):
         return self._transition_pseudo
 
     def get_blank_emissions(self):
-        """Get the starting default emmissions for each sequence.
+        """Get the starting default emissions for each sequence.
 
-        This returns a dictionary of the default emmissions for each
+        This returns a dictionary of the default emissions for each
         letter. The dictionary is structured with keys as
-        (seq_letter, emmission_letter) and values as the starting number
-        of emmissions.
+        (seq_letter, emission_letter) and values as the starting number
+        of emissions.
         """
         return self._emission_pseudo
 

diff --git a/Bio/HMM/Trainer.py b/Bio/HMM/Trainer.py
@@ -80,13 +80,13 @@ def log_likelihood(self, probabilities):
         return total_likelihood
 
     def estimate_params(self, transition_counts, emission_counts):
-        """Get a maximum likelihood estimation of transition and emmission.
+        """Get a maximum likelihood estimation of transition and emission.
 
         Arguments:
          - transition_counts -- A dictionary with the total number of counts
            of transitions between two states.
          - emissions_counts -- A dictionary with the total number of counts
-           of emmissions of a particular emission letter by a state letter.
+           of emissions of a particular emission letter by a state letter.
 
         This then returns the maximum likelihood estimators for the
         transitions and emissions, estimated by formulas 3.18 in

diff --git a/Bio/LogisticRegression.py b/Bio/LogisticRegression.py
@@ -16,7 +16,7 @@
  - classify     Classify an observation into a class.
 
 This module has been deprecated, please consider an alternative like scikit-learn
-insead.
+instead.
 """
 
 import warnings

diff --git a/Bio/NaiveBayes.py b/Bio/NaiveBayes.py
@@ -50,7 +50,7 @@
 
 
 def _contents(items):
-    """Return a dictionary where the key is the item and the value is the probablity associated (PRIVATE)."""
+    """Return a dictionary where the key is the item and the value is the probability associated (PRIVATE)."""
     term = 1.0 / len(items)
     counts = {}
     for item in items:

diff --git a/Bio/PDB/DSSP.py b/Bio/PDB/DSSP.py
@@ -143,7 +143,7 @@ def dssp_dict_from_pdb_file(in_file, DSSP="dssp", dssp_version="3.9.9"):
         DSSP executable (argument to subprocess)
 
     dssp_version : string
-        Version of DSSP excutable
+        Version of DSSP executable
 
     Returns
     -------

diff --git a/Bio/PDB/PICIO.py b/Bio/PDB/PICIO.py
@@ -203,8 +203,8 @@ def process_hedron(
         """Create Hedron on current (sbcic) Chain.internal_coord."""
         ek = (akcache(a1), akcache(a2), akcache(a3))
         atmNdx = AtomKey.fields.atm
-        accpt = IC_Residue.accept_atoms
-        if not all(ek[i].akl[atmNdx] in accpt for i in range(3)):
+        accept = IC_Residue.accept_atoms
+        if not all(ek[i].akl[atmNdx] in accept for i in range(3)):
             return
         hl12[ek] = float(l12)
         ha[ek] = float(ang)
@@ -295,8 +295,8 @@ def process_dihedron(
             akcache(a4),
         )
         atmNdx = AtomKey.fields.atm
-        accpt = IC_Residue.accept_atoms
-        if not all(ek[i].akl[atmNdx] in accpt for i in range(4)):
+        accept = IC_Residue.accept_atoms
+        if not all(ek[i].akl[atmNdx] in accept for i in range(4)):
             return
         dangle = float(dangle)
         dangle = dangle if (dangle <= 180.0) else dangle - 360.0
@@ -454,7 +454,7 @@ def dihedra_check(ric: IC_Residue) -> None:
 
         # rnext should be set
         def ake_recurse(akList: List) -> List:
-            """Bulid combinatorics of AtomKey lists."""
+            """Build combinatorics of AtomKey lists."""
             car = akList[0]
             if len(akList) > 1:
                 retList = []
@@ -1104,9 +1104,9 @@ def write_PIC(
                             hdr.upper(), (dd or ""), (pdbid or "")
                         )
                     )
-                nam = entity.header.get("name", None)
-                if nam:
-                    fp.write("TITLE     " + nam.upper() + "\n")
+                name = entity.header.get("name", None)
+                if name:
+                    fp.write("TITLE     " + name.upper() + "\n")
                 for mdl in entity:
                     write_PIC(
                         mdl,

diff --git a/Bio/PDB/ic_rebuild.py b/Bio/PDB/ic_rebuild.py
@@ -134,8 +134,8 @@ def report_IC(
                 hdr = entity.header.get("head", None)
                 if hdr:
                     reportDict["hdr"] += 1
-                nam = entity.header.get("name", None)
-                if nam:
+                name = entity.header.get("name", None)
+                if name:
                     reportDict["hdr"] += 1
             for mdl in entity:
                 reportDict = report_IC(mdl, reportDict)
@@ -492,9 +492,9 @@ def write_PDB(
                             hdr.upper(), (dd or ""), (pdbid or "")
                         )
                     )
-                nam = entity.header.get("name", None)
-                if nam:
-                    fp.write("TITLE     " + nam.upper() + "\n")
+                name = entity.header.get("name", None)
+                if name:
+                    fp.write("TITLE     " + name.upper() + "\n")
             io = PDBIO()
             io.set_structure(entity)
             io.save(fp, preserve_atom_numbering=True)

diff --git a/Bio/PDB/internal_coords.py b/Bio/PDB/internal_coords.py
@@ -1347,7 +1347,7 @@ def init_atom_coords(self) -> None:
         """
 
         if np.any(self.hAtoms_needs_update):
-            # hedra inital coords
+            # hedra initial coords
 
             # sar = supplementary angle radian: angles which add to 180
             sar = np.deg2rad(180.0 - self.hedraAngle[self.hAtoms_needs_update])  # angle
@@ -2058,7 +2058,7 @@ def dihedral_signs(self) -> np.ndarray:
     def distplot_to_dh_arrays(
         self, distplot: np.ndarray, dihedra_signs: np.ndarray
     ) -> None:
-        """Load di/hedra distance arays from distplot.
+        """Load di/hedra distance arrays from distplot.
 
         Fill :class:`IC_Chain` arrays hedraL12, L23, L13 and dihedraL14
         distance value arrays from input distplot, dihedra_signs array from
@@ -2069,7 +2069,7 @@ def distplot_to_dh_arrays(
         Call :meth:`atom_to_internal_coordinates` (or at least :meth:`init_edra`)
         to generate a2ha_map and d2a_map before running this.
 
-        Explcitly removed from :meth:`.distance_to_internal_coordinates` so
+        Explicitly removed from :meth:`.distance_to_internal_coordinates` so
         user may populate these chain di/hedra arrays by other
         methods.
         """
@@ -2116,7 +2116,7 @@ def distance_to_internal_coordinates(
 
         :param bool resetAtoms: default True.
             Mark all atoms in di/hedra and atomArray for updating by
-            :meth:`.internal_to_atom_coordinates`.  Alternatvely set this to
+            :meth:`.internal_to_atom_coordinates`.  Alternatively set this to
             False and manipulate `atomArrayValid`, `dAtoms_needs_update` and
             `hAtoms_needs_update` directly to reduce computation.
         """  # noqa
@@ -3727,12 +3727,12 @@ def set_angle(self, angle_key: Union[EKT, str], v: float, overlap=True):
         protein chain definitions in :mod:`.ic_data` and :meth:`_create_edra`
         (e.g. psi overlaps N-CA-C-O).
 
-        Te default overlap=True is probably what you want for:
+        The default overlap=True is probably what you want for:
         `set_angle("chi1", val)`
 
         The default is probably NOT what you want when processing all dihedrals
         in a chain or residue (such as copying from another structure), as the
-        overlaping dihedra will likely be in the set as well.
+        overlapping dihedra will likely be in the set as well.
 
         N.B. setting e.g. PRO chi2 is permitted without error or warning!
 
@@ -3775,7 +3775,7 @@ def bond_rotate(self, angle_key: Union[EKT, str], delta: float):
 
         Changes a dihedral angle by a given delta, i.e.
         new_angle = current_angle + delta
-        Values are adjusted so new_angle iwll be within +/-180.
+        Values are adjusted so new_angle will be within +/-180.
 
         Changes overlapping dihedra as in :meth:`.set_angle`
 
@@ -3911,7 +3911,7 @@ class Edron:
     re_class: str
         sequence of residue, atoms comprising di/hedron for statistics
     cre_class: str
-        sequence of covalent radii classses comprising di/hedron for statistics
+        sequence of covalent radii classes comprising di/hedron for statistics
     edron_re: compiled regex (Class Attribute)
         A compiled regular expression matching string IDs for Hedron
         and Dihedron objects

diff --git a/Bio/Phylo/Applications/_Raxml.py b/Bio/Phylo/Applications/_Raxml.py
@@ -110,7 +110,7 @@ def __init__(self, cmd="raxmlHPC", **kwargs):
                         e: Optimize model+branch lengths for given input tree under
                         GAMMA/GAMMAI only.
 
-                        g: Compute per site log Likelihoods for one ore more trees
+                        g: Compute per site log Likelihoods for one or more trees
                         passed via '-z' and write them to a file that can be read
                         by CONSEL.
 

diff --git a/Bio/Phylo/Consensus.py b/Bio/Phylo/Consensus.py
@@ -257,9 +257,8 @@ def strict_consensus(trees):
             if bs.contains(bitstr):
                 # remove old bitstring
                 del bitstr_clades[bs]
-                # update clade childs
-                new_childs = [child for child in c.clades if child not in clade_terms]
-                c.clades = new_childs
+                # update clade children
+                c.clades = [child for child in c.clades if child not in clade_terms]
                 # set current clade as child of c
                 c.clades.append(clade)
                 # update bitstring
@@ -324,7 +323,7 @@ def majority_consensus(trees, cutoff=0):
         # record its possible parent and child clades.
         compatible = True
         parent_bitstr = None
-        child_bitstrs = []  # multiple independent childs
+        child_bitstrs = []  # multiple independent children
         for bs in bsckeys:
             if not bs.iscompatible(bitstr):
                 compatible = False
@@ -347,7 +346,7 @@ def majority_consensus(trees, cutoff=0):
         if parent_bitstr:
             # insert current clade; remove old bitstring
             parent_clade = bitstr_clades.pop(parent_bitstr)
-            # update parent clade childs
+            # update parent clade children
             parent_clade.clades = [
                 c for c in parent_clade.clades if c not in clade_terms
             ]
@@ -457,15 +456,15 @@ def _sub_clade(clade, term_names):
         for c in sub_clade.find_clades(terminal=False, order="preorder"):
             if c == sub_clade.root:
                 continue
-            childs = set(c.find_clades(terminal=True)) & set(term_clades)
-            if childs:
+            children = set(c.find_clades(terminal=True)) & set(term_clades)
+            if children:
                 for tc in temp_clade.find_clades(terminal=False, order="preorder"):
-                    tc_childs = set(tc.clades)
-                    tc_new_clades = tc_childs - childs
-                    if childs.issubset(tc_childs) and tc_new_clades:
+                    tc_children = set(tc.clades)
+                    tc_new_clades = tc_children - children
+                    if children.issubset(tc_children) and tc_new_clades:
                         tc.clades = list(tc_new_clades)
                         child_clade = BaseTree.Clade()
-                        child_clade.clades.extend(list(childs))
+                        child_clade.clades.extend(list(children))
                         tc.clades.append(child_clade)
         sub_clade = temp_clade
     return sub_clade

diff --git a/Bio/PopGen/GenePop/Controller.py b/Bio/PopGen/GenePop/Controller.py
@@ -323,7 +323,7 @@ def test_pop_hz_deficiency(
     ):
         """Use Hardy-Weinberg test for heterozygote deficiency.
 
-        Returns a population iterator containing a dictionary wehre
+        Returns a population iterator containing a dictionary where
         dictionary[locus]=(P-val, SE, Fis-WC, Fis-RH, steps).
 
         Some loci have a None if the info is not available.

diff --git a/Bio/Restriction/Restriction.py b/Bio/Restriction/Restriction.py
@@ -949,7 +949,7 @@ def _search(cls):
 
     @classmethod
     def is_palindromic(cls):
-        """Return if the enzyme has a palindromic recoginition site."""
+        """Return if the enzyme has a palindromic recognition site."""
         return True
 
 
@@ -991,7 +991,7 @@ def _search(cls):
 
     @classmethod
     def is_palindromic(cls):
-        """Return if the enzyme has a palindromic recoginition site."""
+        """Return if the enzyme has a palindromic recognition site."""
         return False
 
 

diff --git a/Bio/SeqFeature.py b/Bio/SeqFeature.py
@@ -1392,7 +1392,7 @@ def extract(self, parent_sequence, references=None):
         return f_seq
 
 
-FeatureLocation = SimpleLocation  # OBSOLETE; for backward compatability only.
+FeatureLocation = SimpleLocation  # OBSOLETE; for backward compatibility only.
 
 
 class CompoundLocation(Location):

diff --git a/Bio/SeqIO/PhdIO.py b/Bio/SeqIO/PhdIO.py
@@ -82,7 +82,7 @@ def PhdIterator(source: _TextIOSource) -> Iterator[SeqRecord]:
         seq_record = SeqRecord(
             phd_record.seq, id=name, name=name, description=phd_record.file_name
         )
-        # Just re-use the comments dictionary as the SeqRecord's annotations
+        # Just reuse the comments dictionary as the SeqRecord's annotations
         seq_record.annotations = phd_record.comments
         seq_record.annotations["molecule_type"] = "DNA"
         # And store the qualities and peak locations as per-letter-annotation