Skip to content

Commit

Permalink
Merge pull request #11958 from adrianeboyd/backport/v3.3.2
Browse files Browse the repository at this point in the history
Backport bug fixes to v3.3.x
  • Loading branch information
adrianeboyd committed Dec 14, 2022
2 parents 1cb197e + 5b3b18d commit 4e032da
Show file tree
Hide file tree
Showing 29 changed files with 206 additions and 110 deletions.
70 changes: 32 additions & 38 deletions .github/azure-steps.yml
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
parameters:
python_version: ''
architecture: ''
prefix: ''
gpu: false
num_build_jobs: 1
architecture: 'x64'

steps:
- task: UsePythonVersion@0
Expand All @@ -16,16 +13,16 @@ steps:
displayName: 'Set variables'
- script: |
${{ parameters.prefix }} python -m pip install -U pip setuptools
${{ parameters.prefix }} python -m pip install -U -r requirements.txt
python -m pip install -U build pip setuptools
python -m pip install -U -r requirements.txt
displayName: "Install dependencies"
- script: |
${{ parameters.prefix }} python setup.py build_ext --inplace -j ${{ parameters.num_build_jobs }}
${{ parameters.prefix }} python setup.py sdist --formats=gztar
displayName: "Compile and build sdist"
python -m build --sdist
displayName: "Build sdist"
- script: python -m mypy spacy
- script: |
python -m mypy spacy
displayName: 'Run mypy'
condition: ne(variables['python_version'], '3.10')
Expand All @@ -34,35 +31,24 @@ steps:
contents: "spacy"
displayName: "Delete source directory"

- task: DeleteFiles@1
inputs:
contents: "*.egg-info"
displayName: "Delete egg-info directory"

- script: |
${{ parameters.prefix }} python -m pip freeze --exclude torch --exclude cupy-cuda110 > installed.txt
${{ parameters.prefix }} python -m pip uninstall -y -r installed.txt
python -m pip freeze > installed.txt
python -m pip uninstall -y -r installed.txt
displayName: "Uninstall all packages"
- bash: |
${{ parameters.prefix }} SDIST=$(python -c "import os;print(os.listdir('./dist')[-1])" 2>&1)
${{ parameters.prefix }} python -m pip install dist/$SDIST
SDIST=$(python -c "import os;print(os.listdir('./dist')[-1])" 2>&1)
python -m pip install dist/$SDIST
displayName: "Install from sdist"
- script: |
${{ parameters.prefix }} python -m pip install -U -r requirements.txt
displayName: "Install test requirements"
- script: |
${{ parameters.prefix }} python -m pip install -U cupy-cuda110 -f https://github.com/cupy/cupy/releases/v9.0.0
${{ parameters.prefix }} python -m pip install "torch==1.7.1+cu110" -f https://download.pytorch.org/whl/torch_stable.html
displayName: "Install GPU requirements"
condition: eq(${{ parameters.gpu }}, true)
- script: |
${{ parameters.prefix }} python -m pytest --pyargs spacy
displayName: "Run CPU tests"
condition: eq(${{ parameters.gpu }}, false)
- script: |
${{ parameters.prefix }} python -m pytest --pyargs spacy -p spacy.tests.enable_gpu
displayName: "Run GPU tests"
condition: eq(${{ parameters.gpu }}, true)
python -W error -c "import spacy"
displayName: "Test import"
- script: |
python -m spacy download ca_core_news_sm
Expand Down Expand Up @@ -106,12 +92,20 @@ steps:
condition: eq(variables['python_version'], '3.8')
- script: |
python .github/validate_universe_json.py website/meta/universe.json
displayName: 'Test website/meta/universe.json'
condition: eq(variables['python_version'], '3.8')
python -m pip install -U -r requirements.txt
displayName: "Install test requirements"
- script: |
python -m pytest --pyargs spacy -W error
displayName: "Run CPU tests"
- script: |
${{ parameters.prefix }} python -m pip install thinc-apple-ops
${{ parameters.prefix }} python -m pytest --pyargs spacy
python -m pip install 'spacy[apple]'
python -m pytest --pyargs spacy
displayName: "Run CPU tests with thinc-apple-ops"
condition: and(startsWith(variables['imageName'], 'macos'), eq(variables['python.version'], '3.9'))
condition: and(startsWith(variables['imageName'], 'macos'), eq(variables['python.version'], '3.10'))
- script: |
python .github/validate_universe_json.py website/meta/universe.json
displayName: 'Test website/meta/universe.json'
condition: eq(variables['python_version'], '3.8')
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ repos:
language_version: python3.7
additional_dependencies: ['click==8.0.4']
- repo: https://gitlab.com/pycqa/flake8
rev: 3.9.2
rev: 5.0.4
hooks:
- id: flake8
args:
Expand Down
23 changes: 3 additions & 20 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ jobs:
inputs:
versionSpec: "3.7"
- script: |
pip install flake8==3.9.2
pip install flake8==5.0.4
python -m flake8 spacy --count --select=E901,E999,F821,F822,F823 --show-source --statistics
displayName: "flake8"
Expand All @@ -41,7 +41,7 @@ jobs:
matrix:
# We're only running one platform per Python version to speed up builds
Python36Linux:
imageName: "ubuntu-latest"
imageName: "ubuntu-20.04"
python.version: "3.6"
# Python36Windows:
# imageName: "windows-latest"
Expand All @@ -50,7 +50,7 @@ jobs:
# imageName: "macos-latest"
# python.version: "3.6"
# Python37Linux:
# imageName: "ubuntu-latest"
# imageName: "ubuntu-20.04"
# python.version: "3.7"
Python37Windows:
imageName: "windows-latest"
Expand Down Expand Up @@ -92,20 +92,3 @@ jobs:
- template: .github/azure-steps.yml
parameters:
python_version: '$(python.version)'
architecture: 'x64'

# - job: "TestGPU"
# dependsOn: "Validate"
# strategy:
# matrix:
# Python38LinuxX64_GPU:
# python.version: '3.8'
# pool:
# name: "LinuxX64_GPU"
# steps:
# - template: .github/azure-steps.yml
# parameters:
# python_version: '$(python.version)'
# architecture: 'x64'
# gpu: true
# num_build_jobs: 24
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ srsly>=2.4.3,<3.0.0
catalogue>=2.0.6,<2.1.0
typer>=0.3.0,<0.5.0
pathy>=0.3.5
smart-open>=5.2.1,<7.0.0
# Third party dependencies
numpy>=1.15.0
requests>=2.13.0,<3.0.0
Expand Down
3 changes: 2 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,10 @@ install_requires =
wasabi>=0.9.1,<1.1.0
srsly>=2.4.3,<3.0.0
catalogue>=2.0.6,<2.1.0
# Third-party dependencies
typer>=0.3.0,<0.5.0
pathy>=0.3.5
# Third-party dependencies
smart-open>=5.2.1,<7.0.0
tqdm>=4.38.0,<5.0.0
numpy>=1.15.0
requests>=2.13.0,<3.0.0
Expand Down
2 changes: 1 addition & 1 deletion spacy/about.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# fmt: off
__title__ = "spacy"
__version__ = "3.3.1"
__version__ = "3.3.2"
__download_url__ = "https://github.com/explosion/spacy-models/releases/download"
__compatibility__ = "https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json"
__projects__ = "https://github.com/explosion/projects"
Expand Down
2 changes: 1 addition & 1 deletion spacy/cli/_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,7 @@ def download_file(src: Union[str, "Pathy"], dest: Path, *, force: bool = False)
if dest.exists() and not force:
return None
src = str(src)
with smart_open.open(src, mode="rb", ignore_ext=True) as input_file:
with smart_open.open(src, mode="rb", compression="disable") as input_file:
with dest.open(mode="wb") as output_file:
shutil.copyfileobj(input_file, output_file)

Expand Down
5 changes: 3 additions & 2 deletions spacy/displacy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,12 +227,13 @@ def parse_spans(doc: Doc, options: Dict[str, Any] = {}) -> Dict[str, Any]:
"kb_id": span.kb_id_ if span.kb_id_ else "",
"kb_url": kb_url_template.format(span.kb_id_) if kb_url_template else "#",
}
for span in doc.spans[spans_key]
for span in doc.spans.get(spans_key, [])
]
tokens = [token.text for token in doc]

if not spans:
warnings.warn(Warnings.W117.format(spans_key=spans_key))
keys = list(doc.spans.keys())
warnings.warn(Warnings.W117.format(spans_key=spans_key, keys=keys))
title = doc.user_data.get("title", None) if hasattr(doc, "user_data") else None
settings = get_doc_settings(doc)
return {
Expand Down
7 changes: 6 additions & 1 deletion spacy/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ class Warnings(metaclass=ErrorsWithCodes):
W117 = ("No spans to visualize found in Doc object with spans_key: '{spans_key}'. If this is "
"surprising to you, make sure the Doc was processed using a model "
"that supports span categorization, and check the `doc.spans[spans_key]` "
"property manually if necessary.")
"property manually if necessary.\n\nAvailable keys: {keys}")
W118 = ("Term '{term}' not found in glossary. It may however be explained in documentation "
"for the corpora used to train the language. Please check "
"`nlp.meta[\"sources\"]` for any relevant links.")
Expand Down Expand Up @@ -335,6 +335,11 @@ class Errors(metaclass=ErrorsWithCodes):
"clear the existing vectors and resize the table.")
E074 = ("Error interpreting compiled match pattern: patterns are expected "
"to end with the attribute {attr}. Got: {bad_attr}.")
E079 = ("Error computing states in beam: number of predicted beams "
"({pbeams}) does not equal number of gold beams ({gbeams}).")
E080 = ("Duplicate state found in beam: {key}.")
E081 = ("Error getting gradient in beam: number of histories ({n_hist}) "
"does not equal number of losses ({losses}).")
E082 = ("Error deprojectivizing parse: number of heads ({n_heads}), "
"projective heads ({n_proj_heads}) and labels ({n_labels}) do not "
"match.")
Expand Down
2 changes: 1 addition & 1 deletion spacy/lang/ko/punctuation.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@


_infixes = (
["·", "ㆍ", "\(", "\)"]
["·", "ㆍ", r"\(", r"\)"]
+ [r"(?<=[0-9])~(?=[0-9-])"]
+ LIST_QUOTES
+ BASE_TOKENIZER_INFIXES
Expand Down
10 changes: 8 additions & 2 deletions spacy/ml/_precomputable_affine.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,15 @@ def forward(model, X, is_train):
nP = model.get_dim("nP")
nI = model.get_dim("nI")
W = model.get_param("W")
Yf = model.ops.gemm(X, W.reshape((nF * nO * nP, nI)), trans2=True)
# Preallocate array for layer output, including padding.
Yf = model.ops.alloc2f(X.shape[0] + 1, nF * nO * nP)
model.ops.gemm(X, W.reshape((nF * nO * nP, nI)), trans2=True, out=Yf[1:])
Yf = Yf.reshape((Yf.shape[0], nF, nO, nP))
Yf = model.ops.xp.vstack((model.get_param("pad"), Yf))

# Set padding. Padding has shape (1, nF, nO, nP). Unfortunately, we cannot
# change its shape to (nF, nO, nP) without breaking existing models. So
# we'll squeeze the first dimension here.
Yf[0] = model.ops.xp.squeeze(model.get_param("pad"), 0)

def backward(dY_ids):
# This backprop is particularly tricky, because we get back a different
Expand Down
4 changes: 2 additions & 2 deletions spacy/pipeline/edit_tree_lemmatizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,9 +331,9 @@ def _add_labels(self, labels: Dict):

tree = dict(tree)
if "orig" in tree:
tree["orig"] = self.vocab.strings[tree["orig"]]
tree["orig"] = self.vocab.strings.add(tree["orig"])
if "orig" in tree:
tree["subst"] = self.vocab.strings[tree["subst"]]
tree["subst"] = self.vocab.strings.add(tree["subst"])

trees.append(tree)

Expand Down
5 changes: 4 additions & 1 deletion spacy/pipeline/spancat.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,10 @@ def predict(self, docs: Iterable[Doc]):
DOCS: https://spacy.io/api/spancategorizer#predict
"""
indices = self.suggester(docs, ops=self.model.ops)
scores = self.model.predict((docs, indices)) # type: ignore
if indices.lengths.sum() == 0:
scores = self.model.ops.alloc2f(0, 0)
else:
scores = self.model.predict((docs, indices)) # type: ignore
return indices, scores

def set_candidates(
Expand Down
4 changes: 2 additions & 2 deletions spacy/tests/doc/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,14 +123,14 @@ def test_doc_from_array_heads_in_bounds(en_vocab):

# head before start
arr = doc.to_array(["HEAD"])
arr[0] = -1
arr[0] = numpy.int32(-1).astype(numpy.uint64)
doc_from_array = Doc(en_vocab, words=words)
with pytest.raises(ValueError):
doc_from_array.from_array(["HEAD"], arr)

# head after end
arr = doc.to_array(["HEAD"])
arr[0] = 5
arr[0] = numpy.int32(5).astype(numpy.uint64)
doc_from_array = Doc(en_vocab, words=words)
with pytest.raises(ValueError):
doc_from_array.from_array(["HEAD"], arr)
5 changes: 3 additions & 2 deletions spacy/tests/doc/test_doc_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import numpy
from numpy.testing import assert_array_equal
import pytest
import warnings
from thinc.api import NumpyOps, get_current_ops

from spacy.attrs import DEP, ENT_IOB, ENT_TYPE, HEAD, IS_ALPHA, MORPH, POS
Expand Down Expand Up @@ -529,9 +530,9 @@ def test_doc_from_array_sent_starts(en_vocab):
# no warning using default attrs
attrs = doc._get_array_attrs()
arr = doc.to_array(attrs)
with pytest.warns(None) as record:
with warnings.catch_warnings():
warnings.simplefilter("error")
new_doc.from_array(attrs, arr)
assert len(record) == 0
# only SENT_START uses SENT_START
attrs = [SENT_START]
arr = doc.to_array(attrs)
Expand Down
3 changes: 3 additions & 0 deletions spacy/tests/lang/ru/test_lemmatizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
from spacy.tokens import Doc


pytestmark = pytest.mark.filterwarnings("ignore::DeprecationWarning")


def test_ru_doc_lemmatization(ru_lemmatizer):
words = ["мама", "мыла", "раму"]
pos = ["NOUN", "VERB", "NOUN"]
Expand Down
4 changes: 4 additions & 0 deletions spacy/tests/lang/uk/test_lemmatizer.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
import pytest
from spacy.tokens import Doc


pytestmark = pytest.mark.filterwarnings("ignore::DeprecationWarning")


def test_uk_lemmatizer(uk_lemmatizer):
"""Check that the default uk lemmatizer runs."""
doc = Doc(uk_lemmatizer.vocab, words=["a", "b", "c"])
Expand Down
9 changes: 5 additions & 4 deletions spacy/tests/matcher/test_phrase_matcher.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import pytest
import warnings
import srsly
from mock import Mock

Expand Down Expand Up @@ -344,13 +345,13 @@ def test_phrase_matcher_validation(en_vocab):
matcher.add("TEST1", [doc1])
with pytest.warns(UserWarning):
matcher.add("TEST2", [doc2])
with pytest.warns(None) as record:
with warnings.catch_warnings():
warnings.simplefilter("error")
matcher.add("TEST3", [doc3])
assert not record.list
matcher = PhraseMatcher(en_vocab, attr="POS", validate=True)
with pytest.warns(None) as record:
with warnings.catch_warnings():
warnings.simplefilter("error")
matcher.add("TEST4", [doc2])
assert not record.list


def test_attr_validation(en_vocab):
Expand Down

0 comments on commit 4e032da

Please sign in to comment.