Skip to content

Commit

Permalink
Merge branch 'develop' into grammar-fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
stefkauf committed Apr 12, 2024
2 parents cfe33b8 + 8c233dc commit a38da33
Show file tree
Hide file tree
Showing 407 changed files with 6,629 additions and 2,836 deletions.
64 changes: 64 additions & 0 deletions .github/labeler.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# https://github.com/actions/labeler
CI:
- changed-files:
- any-glob-to-any-file: [".github/workflows/*", ".github/labeler.yml"]

classifier:
- changed-files:
- any-glob-to-any-file: "nltk/classify/**/*"

cli:
- changed-files:
- any-glob-to-any-file: "nltk/cli.py"

cluster:
- changed-files:
- any-glob-to-any-file: "nltk/cluster/**/*"

corpus:
- changed-files:
- any-glob-to-any-file: "nltk/corpus/**/*"

GUI:
- changed-files:
- any-glob-to-any-file: "nltk/app/**/*"

internals:
- changed-files:
- any-glob-to-any-file: "nltk/internals.py"

language-model:
- changed-files:
- any-glob-to-any-file: "nltk/lm/**/*"

metrics:
- changed-files:
- any-glob-to-any-file: "nltk/metrics/**/*"

parsing:
- changed-files:
- any-glob-to-any-file: "nltk/parse/**/*"

sentiment:
- changed-files:
- any-glob-to-any-file: "nltk/sentiment/**/*"

stem/lemma:
- changed-files:
- any-glob-to-any-file: "nltk/stem/**/*"

tagger:
- changed-files:
- any-glob-to-any-file: "nltk/tag/**/*"

tokenizer:
- changed-files:
- any-glob-to-any-file: "nltk/tokenize/**/*"

twitter:
- changed-files:
- any-glob-to-any-file: "nltk/twitter/**/*"

wordnet:
- changed-files:
- any-glob-to-any-file: "nltk/wordnet/**/*"
20 changes: 20 additions & 0 deletions .github/workflows/cffconvert.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
name: cffconvert

on:
push:
paths:
- CITATION.cff
workflow_dispatch:

jobs:
validate:
name: "validate"
runs-on: ubuntu-latest
steps:
- name: Check out a copy of the repository
uses: actions/checkout@v4

- name: Check whether the citation metadata from CITATION.cff is valid
uses: citation-file-format/cffconvert-github-action@2.0.0
with:
args: "--validate"
59 changes: 32 additions & 27 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,13 @@ jobs:
name: Run pre-commit
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/setup-python@v2
- uses: pre-commit/action@v2.0.0
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: '3.x' # run with latest python version
- run: |
pip install pre-commit
pre-commit run --all-files
cache_nltk_data:
name: Cache nltk_data
Expand All @@ -34,10 +38,10 @@ jobs:
runs-on: ${{ matrix.os }}
steps:
- name: Checkout code
uses: actions/checkout@v2
uses: actions/checkout@v4

- name: Cache nltk data
uses: actions/cache@v2
uses: actions/cache@v4
id: restore-cache
with:
path: ~/nltk_data
Expand All @@ -56,14 +60,14 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v2
uses: actions/checkout@v4

- name: Cache third party tools
uses: actions/cache@v2
uses: actions/cache@v4
id: restore-cache
with:
path: ~/third
key: third_${{ secrets.CACHE_VERSION }}
key: third_${{ hashFiles('tools/github_actions/third-party.sh') }}_${{ secrets.CACHE_VERSION }}

- name: Download third party data
run: |
Expand All @@ -76,52 +80,53 @@ jobs:
needs: [cache_nltk_data, cache_third_party]
strategy:
matrix:
python-version: [3.6, 3.7, 3.8, 3.9]
python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']
os: [ubuntu-latest, macos-latest, windows-latest]
fail-fast: false
runs-on: ${{ matrix.os }}
steps:
- name: Checkout code
uses: actions/checkout@v2
uses: actions/checkout@v4

- name: Setup python
uses: actions/setup-python@v2
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}

- name: Set up JDK 16
uses: actions/setup-java@v1
with:
java-version: 16
if: runner.os == 'Linux'

- name: Cache dependencies
uses: actions/cache@v2
- name: Restore cached dependencies
uses: actions/cache@v4
id: restore-cache
with:
path: ${{ env.pythonLocation }}
key: python-dependencies-${{ matrix.os }}-${{ matrix.python-version }}-${{ hashFiles('requirements-ci.txt') }}-${{ env.pythonLocation }}

- name: Install dependencies on cache miss
- name: Install dependencies
run: |
pip install --no-cache-dir --upgrade pip
pip install --no-cache-dir --upgrade --requirement requirements-ci.txt
if: steps.restore-cache.outputs.cache-hit != 'true'
pip install --upgrade pip
pip install --upgrade --requirement requirements-ci.txt
#if: steps.restore-cache.outputs.cache-hit != 'true' # disabled due to a persistent issue with restoring cache on macos runner

- name: Use cached nltk data
uses: actions/cache@v2
uses: actions/cache@v4
with:
path: ~/nltk_data
key: nltk_data_${{ secrets.CACHE_VERSION }}

- name: Use cached third party tools
uses: actions/cache@v2
uses: actions/cache@v4
with:
path: ~/third
key: third_${{ secrets.CACHE_VERSION }}
key: third_${{ hashFiles('tools/github_actions/third-party.sh') }}_${{ secrets.CACHE_VERSION }}
if: runner.os == 'Linux'

- name: Set up JDK 16
uses: actions/setup-java@v4
with:
distribution: 'zulu'
java-version: '16'
if: runner.os == 'Linux'

- name: Run pytest
shell: bash
run: |
pytest --numprocesses auto -rsx --doctest-modules nltk/test
pytest --numprocesses auto -rsx --doctest-modules nltk
15 changes: 15 additions & 0 deletions .github/workflows/labeler.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
name: "Pull Request Labeler"
on:
- pull_request_target

jobs:
triage:
permissions:
contents: read
pull-requests: write
runs-on: ubuntu-latest
steps:
- uses: actions/labeler@v5
with:
repo-token: "${{ secrets.GITHUB_TOKEN }}"
sync-labels: true
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ web/_build
# Test artifacts and coverage reports
*.tox
*.errs
.hypothesis
.noseids
.coverage*
nltk/test/*.html
Expand Down Expand Up @@ -43,7 +44,6 @@ web/api/*.rst
web/howto/*.rst

# iPython notebooks

.ipynb_checkpoints

# pyenv files
Expand All @@ -66,3 +66,6 @@ venv.bak/

# Mypy
.mypy_cache

# macOS
.DS_Store
21 changes: 15 additions & 6 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,21 +1,30 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.0.1
rev: v4.5.0
hooks:
- id: fix-byte-order-marker
- id: trailing-whitespace
- id: end-of-file-fixer
- id: requirements-txt-fixer
- id: check-yaml
- repo: https://github.com/asottile/pyupgrade
rev: v2.23.3
rev: v3.15.0
hooks:
- id: pyupgrade
args: ["--py36-plus"]
args: ["--py38-plus"]
- repo: https://github.com/ambv/black
rev: 21.7b0
rev: 23.12.0
hooks:
- id: black
- repo: https://github.com/pycqa/isort
rev: 5.9.3
- repo: local
hooks:
- id: isort
name: isort
entry: isort
require_serial: true
language: python
language_version: python3
types_or: [cython, pyi, python]
args: ['--filter-files']
minimum_pre_commit_version: '2.9.2'
additional_dependencies: ['isort==5.13.2']
52 changes: 0 additions & 52 deletions .travis.yml

This file was deleted.

16 changes: 16 additions & 0 deletions AUTHORS.md
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@
- Tom Lippincott
- Peter Ljunglöf
- Alex Louden
- David Lukeš
- Joseph Lynch
- Nitin Madnani
- Felipe Madrigal
Expand All @@ -128,6 +129,7 @@
- David McClosky
- Xinfan Meng
- Dmitrijs Milajevs
- Matt Miller
- Margaret Mitchell
- Tomonori Nagano
- Jason Narad
Expand Down Expand Up @@ -202,6 +204,7 @@
- Álvaro Justen <https://github.com/turicas>
- bjut-hz
- Sergio Oller
- Izam Mohammed <https://github.com/izam-mohammed>
- Will Monroe
- Elijah Rippeth
- Emil Manukyan
Expand Down Expand Up @@ -287,6 +290,19 @@
- Ahmet Yildirim <https://github.com/RnDevelover>
- Yuta Nakamura <https://github.com/yutanakamura-tky>
- Adam Hawley <https://github.com/adamjhawley>
- Panagiotis Simakis <https://github.com/sp1thas>
- Richard Wang <https://github.com/richarddwang>
- Alexandre Perez-Lebel <https://github.com/aperezlebel>
- Fernando Carranza <https://github.com/fernandocar86>
- Martin Kondratzky <https://github.com/martinkondra>
- Heungson Lee <https://github.com/heungson>
- M.K. Pawelkiewicz <https://github.com/hamiltonianflow>
- Steven Thomas Smith <https://github.com/essandess>
- Jan Lennartz <https://github.com/Madnex>
- Tim Sockel <https://github.com/TiMauzi>
- Akihiro Yamazaki <https://github.com/zakkie>
- Ron Urbach <https://github.com/sharpblade4>
- Vivek Kalyan <https://github.com/vivekkalyan>

## Others whose work we've taken and included in NLTK, but who didn't directly contribute it:

Expand Down

0 comments on commit a38da33

Please sign in to comment.