Skip to content

Commit

Permalink
Merge pull request #12626 from adrianeboyd/backport/v3.5.3-1
Browse files Browse the repository at this point in the history
Backports for v3.5.3
  • Loading branch information
adrianeboyd committed May 12, 2023
2 parents aea4a96 + 9beaec6 commit 424e917
Show file tree
Hide file tree
Showing 20 changed files with 298 additions and 311 deletions.
118 changes: 0 additions & 118 deletions .github/azure-steps.yml

This file was deleted.

1 change: 1 addition & 0 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@ jobs:
- name: "Run CPU tests"
run: |
python -m pytest --pyargs spacy -W error
if: "!(startsWith(matrix.os, 'macos') && matrix.python_version == '3.11')"

- name: "Run CPU tests with thinc-apple-ops"
run: |
Expand Down
120 changes: 0 additions & 120 deletions azure-pipelines.yml

This file was deleted.

2 changes: 1 addition & 1 deletion spacy/cli/debug_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,7 @@ def debug_data(
show=verbose,
)
else:
msg.good("Examples without ocurrences available for all labels")
msg.good("Examples without occurrences available for all labels")

if "ner" in factory_names:
# Get all unique NER labels present in the data
Expand Down
3 changes: 0 additions & 3 deletions spacy/cli/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,11 +81,8 @@ def download(

def get_model_filename(model_name: str, version: str, sdist: bool = False) -> str:
dl_tpl = "{m}-{v}/{m}-{v}{s}"
egg_tpl = "#egg={m}=={v}"
suffix = SDIST_SUFFIX if sdist else WHEEL_SUFFIX
filename = dl_tpl.format(m=model_name, v=version, s=suffix)
if sdist:
filename += egg_tpl.format(m=model_name, v=version)
return filename


Expand Down
9 changes: 9 additions & 0 deletions spacy/cli/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,13 +122,16 @@ def evaluate(
docs = list(nlp.pipe(ex.reference.text for ex in dev_dataset[:displacy_limit]))
render_deps = "parser" in factory_names
render_ents = "ner" in factory_names
render_spans = "spancat" in factory_names

render_parses(
docs,
displacy_path,
model_name=model,
limit=displacy_limit,
deps=render_deps,
ents=render_ents,
spans=render_spans,
)
msg.good(f"Generated {displacy_limit} parses as HTML", displacy_path)

Expand Down Expand Up @@ -182,6 +185,7 @@ def render_parses(
limit: int = 250,
deps: bool = True,
ents: bool = True,
spans: bool = True,
):
docs[0].user_data["title"] = model_name
if ents:
Expand All @@ -195,6 +199,11 @@ def render_parses(
with (output_path / "parses.html").open("w", encoding="utf8") as file_:
file_.write(html)

if spans:
html = displacy.render(docs[:limit], style="span", page=True)
with (output_path / "spans.html").open("w", encoding="utf8") as file_:
file_.write(html)


def print_prf_per_type(
msg: Printer, scores: Dict[str, Dict[str, float]], name: str, type: str
Expand Down
8 changes: 4 additions & 4 deletions spacy/matcher/dependencymatcher.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -432,22 +432,22 @@ cdef class DependencyMatcher:
return [doc[child.i] for child in doc[node].head.children if child.i < node]

def _imm_right_child(self, doc, node):
for child in doc[node].children:
for child in doc[node].rights:
if child.i == node + 1:
return [doc[child.i]]
return []

def _imm_left_child(self, doc, node):
for child in doc[node].children:
for child in doc[node].lefts:
if child.i == node - 1:
return [doc[child.i]]
return []

def _right_child(self, doc, node):
return [doc[child.i] for child in doc[node].children if child.i > node]
return [child for child in doc[node].rights]

def _left_child(self, doc, node):
return [doc[child.i] for child in doc[node].children if child.i < node]
return [child for child in doc[node].lefts]

def _imm_right_parent(self, doc, node):
if doc[node].head.i == node + 1:
Expand Down
8 changes: 4 additions & 4 deletions spacy/ml/extract_spans.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Tuple, Callable
from typing import List, Tuple, Callable
from thinc.api import Model, to_numpy
from thinc.types import Ragged, Ints1d

Expand Down Expand Up @@ -52,14 +52,14 @@ def _get_span_indices(ops, spans: Ragged, lengths: Ints1d) -> Ints1d:
indices will be [5, 6, 7, 8, 8, 9].
"""
spans, lengths = _ensure_cpu(spans, lengths)
indices = []
indices: List[int] = []
offset = 0
for i, length in enumerate(lengths):
spans_i = spans[i].dataXd + offset
for j in range(spans_i.shape[0]):
indices.append(ops.xp.arange(spans_i[j, 0], spans_i[j, 1])) # type: ignore[call-overload, index]
indices.extend(range(spans_i[j, 0], spans_i[j, 1])) # type: ignore[arg-type, call-overload]
offset += length
return ops.flatten(indices, dtype="i", ndim_if_empty=1)
return ops.asarray1i(indices)


def _ensure_cpu(spans: Ragged, lengths: Ints1d) -> Tuple[Ragged, Ints1d]:
Expand Down
2 changes: 2 additions & 0 deletions spacy/tests/doc/test_morphanalysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ def test_token_morph_key(i_has):
def test_morph_props(i_has):
assert i_has[0].morph.get("PronType") == ["prs"]
assert i_has[1].morph.get("PronType") == []
assert i_has[1].morph.get("AsdfType", ["asdf"]) == ["asdf"]
assert i_has[1].morph.get("AsdfType", default=["asdf", "qwer"]) == ["asdf", "qwer"]


def test_morph_iter(i_has):
Expand Down

0 comments on commit 424e917

Please sign in to comment.