Fix typos in docs (explosion#13466)

* fix typos * prettier formatting --------- Co-authored-by: svlandeg <svlandeg@github.com>
jordankanter · May 3, 2024 · bce9b02 · bce9b02
1 parent 8c75003
commit bce9b02
Show file tree

Hide file tree

Showing 14 changed files with 349 additions and 128 deletions.
diff --git a/spacy/cli/find_threshold.py b/spacy/cli/find_threshold.py
@@ -39,7 +39,7 @@ def find_threshold_cli(
     # fmt: on
 ):
     """
-    Runs prediction trials for a trained model with varying tresholds to maximize
+    Runs prediction trials for a trained model with varying thresholds to maximize
     the specified metric. The search space for the threshold is traversed linearly
     from 0 to 1 in `n_trials` steps. Results are displayed in a table on `stdout`
     (the corresponding API call to `spacy.cli.find_threshold.find_threshold()`
@@ -81,7 +81,7 @@ def find_threshold(
     silent: bool = True,
 ) -> Tuple[float, float, Dict[float, float]]:
     """
-    Runs prediction trials for models with varying tresholds to maximize the specified metric.
+    Runs prediction trials for models with varying thresholds to maximize the specified metric.
     model (Union[str, Path]): Pipeline to evaluate. Can be a package or a path to a data directory.
     data_path (Path): Path to file with DocBin with docs to use for threshold search.
     pipe_name (str): Name of pipe to examine thresholds for.

diff --git a/spacy/tests/test_language.py b/spacy/tests/test_language.py
@@ -424,7 +424,7 @@ def test_language_pipe_error_handler(n_process):
         nlp.set_error_handler(raise_error)
         with pytest.raises(ValueError):
             list(nlp.pipe(texts, n_process=n_process))
-        # set explicitely to ignoring
+        # set explicitly to ignoring
         nlp.set_error_handler(ignore_error)
         docs = list(nlp.pipe(texts, n_process=n_process))
         assert len(docs) == 0

diff --git a/website/docs/api/attributes.mdx b/website/docs/api/attributes.mdx
@@ -45,33 +45,33 @@ For attributes that represent string values, the internal integer ID is accessed
 as `Token.attr`, e.g. `token.dep`, while the string value can be retrieved by
 appending `_` as in `token.dep_`.
 
-| Attribute    | Description                                                                                                                                                   |
-| ------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `DEP`        | The token's dependency label. ~~str~~                                                                                                                         |
-| `ENT_ID`     | The token's entity ID (`ent_id`). ~~str~~                                                                                                                     |
-| `ENT_IOB`    | The IOB part of the token's entity tag. Uses custom integer vaues rather than the string store: unset is `0`, `I` is `1`, `O` is `2`, and `B` is `3`. ~~str~~ |
-| `ENT_KB_ID`  | The token's entity knowledge base ID. ~~str~~                                                                                                                 |
-| `ENT_TYPE`   | The token's entity label. ~~str~~                                                                                                                             |
-| `IS_ALPHA`   | Token text consists of alphabetic characters. ~~bool~~                                                                                                        |
-| `IS_ASCII`   | Token text consists of ASCII characters. ~~bool~~                                                                                                             |
-| `IS_DIGIT`   | Token text consists of digits. ~~bool~~                                                                                                                       |
-| `IS_LOWER`   | Token text is in lowercase. ~~bool~~                                                                                                                          |
-| `IS_PUNCT`   | Token is punctuation. ~~bool~~                                                                                                                                |
-| `IS_SPACE`   | Token is whitespace. ~~bool~~                                                                                                                                 |
-| `IS_STOP`    | Token is a stop word. ~~bool~~                                                                                                                                |
-| `IS_TITLE`   | Token text is in titlecase. ~~bool~~                                                                                                                          |
-| `IS_UPPER`   | Token text is in uppercase. ~~bool~~                                                                                                                          |
-| `LEMMA`      | The token's lemma. ~~str~~                                                                                                                                    |
-| `LENGTH`     | The length of the token text. ~~int~~                                                                                                                         |
-| `LIKE_EMAIL` | Token text resembles an email address. ~~bool~~                                                                                                               |
-| `LIKE_NUM`   | Token text resembles a number. ~~bool~~                                                                                                                       |
-| `LIKE_URL`   | Token text resembles a URL. ~~bool~~                                                                                                                          |
-| `LOWER`      | The lowercase form of the token text. ~~str~~                                                                                                                 |
-| `MORPH`      | The token's morphological analysis. ~~MorphAnalysis~~                                                                                                         |
-| `NORM`       | The normalized form of the token text. ~~str~~                                                                                                                |
-| `ORTH`       | The exact verbatim text of a token. ~~str~~                                                                                                                   |
-| `POS`        | The token's universal part of speech (UPOS). ~~str~~                                                                                                          |
-| `SENT_START` | Token is start of sentence. ~~bool~~                                                                                                                          |
-| `SHAPE`      | The token's shape. ~~str~~                                                                                                                                    |
-| `SPACY`      | Token has a trailing space. ~~bool~~                                                                                                                          |
-| `TAG`        | The token's fine-grained part of speech. ~~str~~                                                                                                              |
+| Attribute    | Description                                                                                                                                                    |
+| ------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `DEP`        | The token's dependency label. ~~str~~                                                                                                                          |
+| `ENT_ID`     | The token's entity ID (`ent_id`). ~~str~~                                                                                                                      |
+| `ENT_IOB`    | The IOB part of the token's entity tag. Uses custom integer values rather than the string store: unset is `0`, `I` is `1`, `O` is `2`, and `B` is `3`. ~~str~~ |
+| `ENT_KB_ID`  | The token's entity knowledge base ID. ~~str~~                                                                                                                  |
+| `ENT_TYPE`   | The token's entity label. ~~str~~                                                                                                                              |
+| `IS_ALPHA`   | Token text consists of alphabetic characters. ~~bool~~                                                                                                         |
+| `IS_ASCII`   | Token text consists of ASCII characters. ~~bool~~                                                                                                              |
+| `IS_DIGIT`   | Token text consists of digits. ~~bool~~                                                                                                                        |
+| `IS_LOWER`   | Token text is in lowercase. ~~bool~~                                                                                                                           |
+| `IS_PUNCT`   | Token is punctuation. ~~bool~~                                                                                                                                 |
+| `IS_SPACE`   | Token is whitespace. ~~bool~~                                                                                                                                  |
+| `IS_STOP`    | Token is a stop word. ~~bool~~                                                                                                                                 |
+| `IS_TITLE`   | Token text is in titlecase. ~~bool~~                                                                                                                           |
+| `IS_UPPER`   | Token text is in uppercase. ~~bool~~                                                                                                                           |
+| `LEMMA`      | The token's lemma. ~~str~~                                                                                                                                     |
+| `LENGTH`     | The length of the token text. ~~int~~                                                                                                                          |
+| `LIKE_EMAIL` | Token text resembles an email address. ~~bool~~                                                                                                                |
+| `LIKE_NUM`   | Token text resembles a number. ~~bool~~                                                                                                                        |
+| `LIKE_URL`   | Token text resembles a URL. ~~bool~~                                                                                                                           |
+| `LOWER`      | The lowercase form of the token text. ~~str~~                                                                                                                  |
+| `MORPH`      | The token's morphological analysis. ~~MorphAnalysis~~                                                                                                          |
+| `NORM`       | The normalized form of the token text. ~~str~~                                                                                                                 |
+| `ORTH`       | The exact verbatim text of a token. ~~str~~                                                                                                                    |
+| `POS`        | The token's universal part of speech (UPOS). ~~str~~                                                                                                           |
+| `SENT_START` | Token is start of sentence. ~~bool~~                                                                                                                           |
+| `SHAPE`      | The token's shape. ~~str~~                                                                                                                                     |
+| `SPACY`      | Token has a trailing space. ~~bool~~                                                                                                                           |
+| `TAG`        | The token's fine-grained part of speech. ~~str~~                                                                                                               |
diff --git a/website/docs/api/cli.mdx b/website/docs/api/cli.mdx
@@ -565,7 +565,7 @@ New: 'ORG' (23860), 'PERSON' (21395), 'GPE' (21193), 'DATE' (18080), 'CARDINAL'
 'LOC' (2113), 'TIME' (1616), 'WORK_OF_ART' (1229), 'QUANTITY' (1150), 'FAC'
 (1134), 'EVENT' (974), 'PRODUCT' (935), 'LAW' (444), 'LANGUAGE' (338)
 ✔ Good amount of examples for all labels
-✔ Examples without occurences available for all labels
+✔ Examples without occurrences available for all labels
 ✔ No entities consisting of or starting/ending with whitespace
 
 =========================== Part-of-speech Tagging ===========================
@@ -1318,7 +1318,7 @@ $ python -m spacy apply [model] [data-path] [output-file] [--code] [--text-key]
 
 ## find-threshold {id="find-threshold",version="3.5",tag="command"}
 
-Runs prediction trials for a trained model with varying tresholds to maximize
+Runs prediction trials for a trained model with varying thresholds to maximize
 the specified metric. The search space for the threshold is traversed linearly
 from 0 to 1 in `n_trials` steps. Results are displayed in a table on `stdout`
 (the corresponding API call to `spacy.cli.find_threshold.find_threshold()`