Merge pull request #12626 from adrianeboyd/backport/v3.5.3-1

Backports for v3.5.3
explosion · May 12, 2023 · 424e917 · 424e917
2 parents aea4a96 + 9beaec6
commit 424e917
Show file tree

Hide file tree

Showing 20 changed files with 298 additions and 311 deletions.
diff --git a/.github/azure-steps.yml b/.github/azure-steps.yml
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -165,6 +165,7 @@ jobs:
       - name: "Run CPU tests"
         run: |
           python -m pytest --pyargs spacy -W error
+        if: "!(startsWith(matrix.os, 'macos') && matrix.python_version == '3.11')"
 
       - name: "Run CPU tests with thinc-apple-ops"
         run: |

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
diff --git a/spacy/cli/debug_data.py b/spacy/cli/debug_data.py
@@ -336,7 +336,7 @@ def debug_data(
                 show=verbose,
             )
         else:
-            msg.good("Examples without ocurrences available for all labels")
+            msg.good("Examples without occurrences available for all labels")
 
     if "ner" in factory_names:
         # Get all unique NER labels present in the data

diff --git a/spacy/cli/download.py b/spacy/cli/download.py
@@ -81,11 +81,8 @@ def download(
 
 def get_model_filename(model_name: str, version: str, sdist: bool = False) -> str:
     dl_tpl = "{m}-{v}/{m}-{v}{s}"
-    egg_tpl = "#egg={m}=={v}"
     suffix = SDIST_SUFFIX if sdist else WHEEL_SUFFIX
     filename = dl_tpl.format(m=model_name, v=version, s=suffix)
-    if sdist:
-        filename += egg_tpl.format(m=model_name, v=version)
     return filename
 
 

diff --git a/spacy/cli/evaluate.py b/spacy/cli/evaluate.py
@@ -122,13 +122,16 @@ def evaluate(
         docs = list(nlp.pipe(ex.reference.text for ex in dev_dataset[:displacy_limit]))
         render_deps = "parser" in factory_names
         render_ents = "ner" in factory_names
+        render_spans = "spancat" in factory_names
+
         render_parses(
             docs,
             displacy_path,
             model_name=model,
             limit=displacy_limit,
             deps=render_deps,
             ents=render_ents,
+            spans=render_spans,
         )
         msg.good(f"Generated {displacy_limit} parses as HTML", displacy_path)
 
@@ -182,6 +185,7 @@ def render_parses(
     limit: int = 250,
     deps: bool = True,
     ents: bool = True,
+    spans: bool = True,
 ):
     docs[0].user_data["title"] = model_name
     if ents:
@@ -195,6 +199,11 @@ def render_parses(
         with (output_path / "parses.html").open("w", encoding="utf8") as file_:
             file_.write(html)
 
+    if spans:
+        html = displacy.render(docs[:limit], style="span", page=True)
+        with (output_path / "spans.html").open("w", encoding="utf8") as file_:
+            file_.write(html)
+
 
 def print_prf_per_type(
     msg: Printer, scores: Dict[str, Dict[str, float]], name: str, type: str

diff --git a/spacy/matcher/dependencymatcher.pyx b/spacy/matcher/dependencymatcher.pyx
@@ -432,22 +432,22 @@ cdef class DependencyMatcher:
         return [doc[child.i] for child in doc[node].head.children if child.i < node]
 
     def _imm_right_child(self, doc, node):
-        for child in doc[node].children:
+        for child in doc[node].rights:
             if child.i == node + 1:
                 return [doc[child.i]]
         return []
 
     def _imm_left_child(self, doc, node):
-        for child in doc[node].children:
+        for child in doc[node].lefts:
             if child.i == node - 1:
                 return [doc[child.i]]
         return []
 
     def _right_child(self, doc, node):
-        return [doc[child.i] for child in doc[node].children if child.i > node]
+        return [child for child in doc[node].rights]
 
     def _left_child(self, doc, node):
-        return [doc[child.i] for child in doc[node].children if child.i < node]
+        return [child for child in doc[node].lefts]
 
     def _imm_right_parent(self, doc, node):
         if doc[node].head.i == node + 1:

diff --git a/spacy/ml/extract_spans.py b/spacy/ml/extract_spans.py
@@ -1,4 +1,4 @@
-from typing import Tuple, Callable
+from typing import List, Tuple, Callable
 from thinc.api import Model, to_numpy
 from thinc.types import Ragged, Ints1d
 
@@ -52,14 +52,14 @@ def _get_span_indices(ops, spans: Ragged, lengths: Ints1d) -> Ints1d:
     indices will be [5, 6, 7, 8, 8, 9].
     """
     spans, lengths = _ensure_cpu(spans, lengths)
-    indices = []
+    indices: List[int] = []
     offset = 0
     for i, length in enumerate(lengths):
         spans_i = spans[i].dataXd + offset
         for j in range(spans_i.shape[0]):
-            indices.append(ops.xp.arange(spans_i[j, 0], spans_i[j, 1]))  # type: ignore[call-overload, index]
+            indices.extend(range(spans_i[j, 0], spans_i[j, 1]))  # type: ignore[arg-type, call-overload]
         offset += length
-    return ops.flatten(indices, dtype="i", ndim_if_empty=1)
+    return ops.asarray1i(indices)
 
 
 def _ensure_cpu(spans: Ragged, lengths: Ints1d) -> Tuple[Ragged, Ints1d]:

diff --git a/spacy/tests/doc/test_morphanalysis.py b/spacy/tests/doc/test_morphanalysis.py
@@ -33,6 +33,8 @@ def test_token_morph_key(i_has):
 def test_morph_props(i_has):
     assert i_has[0].morph.get("PronType") == ["prs"]
     assert i_has[1].morph.get("PronType") == []
+    assert i_has[1].morph.get("AsdfType", ["asdf"]) == ["asdf"]
+    assert i_has[1].morph.get("AsdfType", default=["asdf", "qwer"]) == ["asdf", "qwer"]
 
 
 def test_morph_iter(i_has):