From 15466c9c1dcaf87210c31cb4c3ccedf017a107ed Mon Sep 17 00:00:00 2001 From: Adriane Boyd Date: Wed, 24 May 2023 16:41:14 +0200 Subject: [PATCH 1/6] Switch from azure to GHA --- .github/azure-steps.yml | 111 ----------------------- .github/workflows/tests.yml | 170 ++++++++++++++++++++++++++++++++++++ azure-pipelines.yml | 94 -------------------- 3 files changed, 170 insertions(+), 205 deletions(-) delete mode 100644 .github/azure-steps.yml create mode 100644 .github/workflows/tests.yml delete mode 100644 azure-pipelines.yml diff --git a/.github/azure-steps.yml b/.github/azure-steps.yml deleted file mode 100644 index 7e3f94df6ba..00000000000 --- a/.github/azure-steps.yml +++ /dev/null @@ -1,111 +0,0 @@ -parameters: - python_version: '' - architecture: 'x64' - -steps: - - task: UsePythonVersion@0 - inputs: - versionSpec: ${{ parameters.python_version }} - architecture: ${{ parameters.architecture }} - - - bash: | - echo "##vso[task.setvariable variable=python_version]${{ parameters.python_version }}" - displayName: 'Set variables' - - - script: | - python -m pip install -U build pip setuptools - python -m pip install -U -r requirements.txt - displayName: "Install dependencies" - - - script: | - python -m build --sdist - displayName: "Build sdist" - - - script: | - python -m mypy spacy - displayName: 'Run mypy' - condition: ne(variables['python_version'], '3.10') - - - task: DeleteFiles@1 - inputs: - contents: "spacy" - displayName: "Delete source directory" - - - task: DeleteFiles@1 - inputs: - contents: "*.egg-info" - displayName: "Delete egg-info directory" - - - script: | - python -m pip freeze > installed.txt - python -m pip uninstall -y -r installed.txt - displayName: "Uninstall all packages" - - - bash: | - SDIST=$(python -c "import os;print(os.listdir('./dist')[-1])" 2>&1) - python -m pip install dist/$SDIST - displayName: "Install from sdist" - - - script: | - python -W error -c "import spacy" - displayName: "Test import" - - - script: | - python -m spacy download ca_core_news_sm - python -m spacy download ca_core_news_md - python -c "import spacy; nlp=spacy.load('ca_core_news_sm'); doc=nlp('test')" - displayName: 'Test download CLI' - condition: eq(variables['python_version'], '3.8') - - - script: | - python -m spacy convert extra/example_data/ner_example_data/ner-token-per-line-conll2003.json . - displayName: 'Test convert CLI' - condition: eq(variables['python_version'], '3.8') - - - script: | - python -m spacy init config -p ner -l ca ner.cfg - python -m spacy debug config ner.cfg --paths.train ner-token-per-line-conll2003.spacy --paths.dev ner-token-per-line-conll2003.spacy - displayName: 'Test debug config CLI' - condition: eq(variables['python_version'], '3.8') - - - script: | - # will have errors due to sparse data, check for summary in output - python -m spacy debug data ner.cfg --paths.train ner-token-per-line-conll2003.spacy --paths.dev ner-token-per-line-conll2003.spacy | grep -q Summary - displayName: 'Test debug data CLI' - condition: eq(variables['python_version'], '3.8') - - - script: | - python -m spacy train ner.cfg --paths.train ner-token-per-line-conll2003.spacy --paths.dev ner-token-per-line-conll2003.spacy --training.max_steps 10 --gpu-id -1 - displayName: 'Test train CLI' - condition: eq(variables['python_version'], '3.8') - - - script: | - python -c "import spacy; config = spacy.util.load_config('ner.cfg'); config['components']['ner'] = {'source': 'ca_core_news_sm'}; config.to_disk('ner_source_sm.cfg')" - PYTHONWARNINGS="error,ignore::DeprecationWarning" python -m spacy assemble ner_source_sm.cfg output_dir - displayName: 'Test assemble CLI' - condition: eq(variables['python_version'], '3.8') - - - script: | - python -c "import spacy; config = spacy.util.load_config('ner.cfg'); config['components']['ner'] = {'source': 'ca_core_news_md'}; config.to_disk('ner_source_md.cfg')" - python -m spacy assemble ner_source_md.cfg output_dir 2>&1 | grep -q W113 - displayName: 'Test assemble CLI vectors warning' - condition: eq(variables['python_version'], '3.8') - - - script: | - python -m pip install -U -r requirements.txt - displayName: "Install test requirements" - - - script: | - python -m pytest --pyargs spacy -W error - displayName: "Run CPU tests" - - - script: | - python -m pip install 'spacy[apple]' - python -m pytest --pyargs spacy - displayName: "Run CPU tests with thinc-apple-ops" - condition: and(startsWith(variables['imageName'], 'macos'), eq(variables['python.version'], '3.10')) - - - script: | - python .github/validate_universe_json.py website/meta/universe.json - displayName: 'Test website/meta/universe.json' - condition: eq(variables['python_version'], '3.8') diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 00000000000..7bedbe8c6ae --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,170 @@ +name: tests + +on: + push: + branches-ignore: + - "spacy.io" + - "nightly.spacy.io" + - "v2.spacy.io" + paths-ignore: + - "*.md" + - "*.mdx" + - "website/**" + - ".github/workflows/**" + pull_request: + types: [opened, synchronize, reopened, edited] + paths-ignore: + - "*.md" + - "*.mdx" + - "website/**" + +jobs: + validate: + name: Validate + if: github.repository_owner == 'explosion' + runs-on: ubuntu-latest + steps: + - name: Check out repo + uses: actions/checkout@v3 + + - name: Configure Python version + uses: actions/setup-python@v4 + with: + python-version: "3.7" + architecture: x64 + + - name: black + run: | + python -m pip install black -c requirements.txt + python -m black spacy --check + - name: flake8 + run: | + python -m pip install flake8==5.0.4 + python -m flake8 spacy --count --select=E901,E999,F821,F822,F823,W605 --show-source --statistics + tests: + name: Test + needs: Validate + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, windows-latest, macos-latest] + python_version: ["3.10"] + include: + - os: ubuntu-20.04 + python_version: "3.6" + - os: windows-latest + python_version: "3.7" + - os: macos-latest + python_version: "3.8" + - os: ubuntu-latest + python_version: "3.9" + + runs-on: ${{ matrix.os }} + + steps: + - name: Check out repo + uses: actions/checkout@v3 + + - name: Configure Python version + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python_version }} + architecture: x64 + + - name: Install dependencies + run: | + python -m pip install -U build pip setuptools + python -m pip install -U -r requirements.txt + + - name: Build sdist + run: | + python -m build --sdist + + - name: Run mypy + run: | + # Install older numpy for mypy (bug with newer numpy+mypy not fixed + # until mypy 0.981) + python -m pip install "numpy<1.22" + python -m mypy spacy + if: matrix.python_version != '3.6' + + - name: Delete source directory and .egg-info + run: | + rm -rf spacy *.egg-info + shell: bash + + - name: Uninstall all packages + run: | + python -m pip freeze + python -m pip freeze --exclude pywin32 > installed.txt + python -m pip uninstall -y -r installed.txt + + - name: Install from sdist + run: | + SDIST=$(python -c "import os;print(os.listdir('./dist')[-1])" 2>&1) + SPACY_NUM_BUILD_JOBS=2 python -m pip install dist/$SDIST + shell: bash + + - name: Test import + run: python -W error -c "import spacy" + + - name: "Test download CLI" + run: | + python -m spacy download ca_core_news_sm + python -m spacy download ca_core_news_md + python -c "import spacy; nlp=spacy.load('ca_core_news_sm'); doc=nlp('test')" + if: matrix.python_version == '3.9' + + - name: "Test no warnings on load (#11713)" + run: | + python -W error -c "import ca_core_news_sm; nlp = ca_core_news_sm.load(); doc=nlp('test')" + if: matrix.python_version == '3.9' + + - name: "Test convert CLI" + run: | + python -m spacy convert extra/example_data/ner_example_data/ner-token-per-line-conll2003.json . + if: matrix.python_version == '3.9' + + - name: "Test debug config CLI" + run: | + python -m spacy init config -p ner -l ca ner.cfg + python -m spacy debug config ner.cfg --paths.train ner-token-per-line-conll2003.spacy --paths.dev ner-token-per-line-conll2003.spacy + if: matrix.python_version == '3.9' + + - name: "Test debug data CLI" + run: | + # will have errors due to sparse data, check for summary in output + python -m spacy debug data ner.cfg --paths.train ner-token-per-line-conll2003.spacy --paths.dev ner-token-per-line-conll2003.spacy | grep -q Summary + if: matrix.python_version == '3.9' + + - name: "Test train CLI" + run: | + python -m spacy train ner.cfg --paths.train ner-token-per-line-conll2003.spacy --paths.dev ner-token-per-line-conll2003.spacy --training.max_steps 10 --gpu-id -1 + if: matrix.python_version == '3.9' + + - name: "Test assemble CLI" + run: | + python -c "import spacy; config = spacy.util.load_config('ner.cfg'); config['components']['ner'] = {'source': 'ca_core_news_sm'}; config.to_disk('ner_source_sm.cfg')" + PYTHONWARNINGS="error,ignore::DeprecationWarning" python -m spacy assemble ner_source_sm.cfg output_dir + if: matrix.python_version == '3.9' + + - name: "Test assemble CLI vectors warning" + run: | + python -c "import spacy; config = spacy.util.load_config('ner.cfg'); config['components']['ner'] = {'source': 'ca_core_news_md'}; config.to_disk('ner_source_md.cfg')" + python -m spacy assemble ner_source_md.cfg output_dir 2>&1 | grep -q W113 + if: matrix.python_version == '3.9' + + - name: "Install test requirements" + run: | + python -m pip install -U -r requirements.txt + + - name: "Run CPU tests" + run: | + python -m pytest --pyargs spacy -W error + if: "!(startsWith(matrix.os, 'macos') && matrix.python_version == '3.10')" + + - name: "Run CPU tests with thinc-apple-ops" + run: | + python -m pip install 'spacy[apple]' + python -m pytest --pyargs spacy + if: startsWith(matrix.os, 'macos') && matrix.python_version == '3.10' diff --git a/azure-pipelines.yml b/azure-pipelines.yml deleted file mode 100644 index a26d269747a..00000000000 --- a/azure-pipelines.yml +++ /dev/null @@ -1,94 +0,0 @@ -trigger: - batch: true - branches: - include: - - "*" - exclude: - - "spacy.io" - - "nightly.spacy.io" - - "v2.spacy.io" - paths: - exclude: - - "website/*" - - "*.md" - - ".github/workflows/*" -pr: - paths: - exclude: - - "*.md" - - "website/docs/*" - - "website/src/*" - - ".github/workflows/*" - -jobs: - # Perform basic checks for most important errors (syntax etc.) Uses the config - # defined in .flake8 and overwrites the selected codes. - - job: "Validate" - pool: - vmImage: "ubuntu-latest" - steps: - - task: UsePythonVersion@0 - inputs: - versionSpec: "3.7" - - script: | - pip install flake8==5.0.4 - python -m flake8 spacy --count --select=E901,E999,F821,F822,F823 --show-source --statistics - displayName: "flake8" - - - job: "Test" - dependsOn: "Validate" - strategy: - matrix: - # We're only running one platform per Python version to speed up builds - Python36Linux: - imageName: "ubuntu-20.04" - python.version: "3.6" - # Python36Windows: - # imageName: "windows-latest" - # python.version: "3.6" - # Python36Mac: - # imageName: "macos-latest" - # python.version: "3.6" - # Python37Linux: - # imageName: "ubuntu-20.04" - # python.version: "3.7" - Python37Windows: - imageName: "windows-latest" - python.version: "3.7" - # Python37Mac: - # imageName: "macos-latest" - # python.version: "3.7" - # Python38Linux: - # imageName: "ubuntu-latest" - # python.version: "3.8" - # Python38Windows: - # imageName: "windows-latest" - # python.version: "3.8" - Python38Mac: - imageName: "macos-latest" - python.version: "3.8" - Python39Linux: - imageName: "ubuntu-latest" - python.version: "3.9" - # Python39Windows: - # imageName: "windows-latest" - # python.version: "3.9" - # Python39Mac: - # imageName: "macos-latest" - # python.version: "3.9" - Python310Linux: - imageName: "ubuntu-latest" - python.version: "3.10" - Python310Windows: - imageName: "windows-latest" - python.version: "3.10" - Python310Mac: - imageName: "macos-latest" - python.version: "3.10" - maxParallel: 4 - pool: - vmImage: $(imageName) - steps: - - template: .github/azure-steps.yml - parameters: - python_version: '$(python.version)' From 2f2c23384c6e1172a89f050ea3869ce4292051a5 Mon Sep 17 00:00:00 2001 From: kadarakos Date: Thu, 27 Apr 2023 15:27:13 +0200 Subject: [PATCH 2/6] Spancat speed improvement (#12577) * avoid nesting then flattening * mypy fix * Apply suggestions from code review * Add type for indices * Run full matrix for mypy * Add back modified type: ignore * Revert "Run full matrix for mypy" This reverts commit e218873d049d5634e6faa0341ada9af5d53b5a29. --------- Co-authored-by: Adriane Boyd --- spacy/ml/extract_spans.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/spacy/ml/extract_spans.py b/spacy/ml/extract_spans.py index d5e9bc07cba..adea6cda0b9 100644 --- a/spacy/ml/extract_spans.py +++ b/spacy/ml/extract_spans.py @@ -1,4 +1,4 @@ -from typing import Tuple, Callable +from typing import List, Tuple, Callable from thinc.api import Model, to_numpy from thinc.types import Ragged, Ints1d @@ -52,14 +52,14 @@ def _get_span_indices(ops, spans: Ragged, lengths: Ints1d) -> Ints1d: indices will be [5, 6, 7, 8, 8, 9]. """ spans, lengths = _ensure_cpu(spans, lengths) - indices = [] + indices: List[int] = [] offset = 0 for i, length in enumerate(lengths): spans_i = spans[i].dataXd + offset for j in range(spans_i.shape[0]): - indices.append(ops.xp.arange(spans_i[j, 0], spans_i[j, 1])) # type: ignore[call-overload, index] + indices.extend(range(spans_i[j, 0], spans_i[j, 1])) # type: ignore offset += length - return ops.flatten(indices, dtype="i", ndim_if_empty=1) + return ops.asarray1i(indices) def _ensure_cpu(spans: Ragged, lengths: Ints1d) -> Tuple[Ragged, Ints1d]: From 515c2394c43a1df896b15632a99c418303994616 Mon Sep 17 00:00:00 2001 From: Adriane Boyd Date: Wed, 24 May 2023 16:53:32 +0200 Subject: [PATCH 3/6] Remove #egg from download URLs The current URLs will become invalid in pip 25.0. According to the pip docs, the egg= URLs are currently only needed for editable VCS installs. --- spacy/cli/download.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spacy/cli/download.py b/spacy/cli/download.py index 4ea9a8f0e71..e2bb17ef72d 100644 --- a/spacy/cli/download.py +++ b/spacy/cli/download.py @@ -50,7 +50,7 @@ def download(model: str, direct: bool = False, sdist: bool = False, *pip_args) - ) pip_args = pip_args + ("--no-deps",) suffix = SDIST_SUFFIX if sdist else WHEEL_SUFFIX - dl_tpl = "{m}-{v}/{m}-{v}{s}#egg={m}=={v}" + dl_tpl = "{m}-{v}/{m}-{v}{s}" if direct: components = model.split("-") model_name = "".join(components[:-1]) From f82950657ef89c43dd5753dad919127749dc016f Mon Sep 17 00:00:00 2001 From: Adriane Boyd Date: Wed, 24 May 2023 17:31:47 +0200 Subject: [PATCH 4/6] Add typing_extensions requirement for pydantic --- requirements.txt | 4 +++- setup.cfg | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 783e6f0f8f0..81de4ce1427 100644 --- a/requirements.txt +++ b/requirements.txt @@ -23,7 +23,9 @@ langcodes>=3.2.0,<4.0.0 # Official Python utilities setuptools packaging>=20.0 -typing_extensions>=3.7.4.1,<4.2.0; python_version < "3.8" +# Require and pin typing_extensions for all python versions as a workaround +# for pydantic incompatibility with typing_extensions>=4.6.0 +typing_extensions>=3.7.4.1,<4.6.0 # Development dependencies pre-commit>=2.13.0 cython>=0.25,<3.0 diff --git a/setup.cfg b/setup.cfg index 97e6efc213e..97170384475 100644 --- a/setup.cfg +++ b/setup.cfg @@ -63,7 +63,9 @@ install_requires = # Official Python utilities setuptools packaging>=20.0 - typing_extensions>=3.7.4,<4.2.0; python_version < "3.8" + # Require and pin typing_extensions for all python versions as a workaround + # for pydantic incompatibility with typing_extensions>=4.6.0 + typing_extensions>=3.7.4.1,<4.6.0 langcodes>=3.2.0,<4.0.0 [options.entry_points] From b4117be7d06d63b50acf8de08f254ad03057ec23 Mon Sep 17 00:00:00 2001 From: Adriane Boyd Date: Wed, 24 May 2023 20:56:56 +0200 Subject: [PATCH 5/6] Format --- spacy/tests/pipeline/test_entity_ruler.py | 1 - 1 file changed, 1 deletion(-) diff --git a/spacy/tests/pipeline/test_entity_ruler.py b/spacy/tests/pipeline/test_entity_ruler.py index bbd537cef75..6851e2a7c20 100644 --- a/spacy/tests/pipeline/test_entity_ruler.py +++ b/spacy/tests/pipeline/test_entity_ruler.py @@ -491,7 +491,6 @@ def test_entity_ruler_remove_nonexisting_pattern(nlp, entity_ruler_factory): ruler.remove_by_id("nepattern") - @pytest.mark.parametrize("entity_ruler_factory", ENTITY_RULERS) def test_entity_ruler_remove_several_patterns(nlp, entity_ruler_factory): ruler = nlp.add_pipe(entity_ruler_factory, name="entity_ruler") From f645747553bd7fd2fe4a5044dc37d85b7d8b3666 Mon Sep 17 00:00:00 2001 From: Adriane Boyd Date: Wed, 24 May 2023 20:21:05 +0200 Subject: [PATCH 6/6] Set version to v3.3.3 --- spacy/about.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spacy/about.py b/spacy/about.py index b4ef29260f1..8a5390d7734 100644 --- a/spacy/about.py +++ b/spacy/about.py @@ -1,6 +1,6 @@ # fmt: off __title__ = "spacy" -__version__ = "3.3.2" +__version__ = "3.3.3" __download_url__ = "https://github.com/explosion/spacy-models/releases/download" __compatibility__ = "https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json" __projects__ = "https://github.com/explosion/projects"