Skip to content

Commit

Permalink
Merge pull request #91 from microsoft/omri/master_updates
Browse files Browse the repository at this point in the history
  • Loading branch information
omri374 committed Dec 25, 2023
2 parents d07ceee + 1819181 commit e55c173
Show file tree
Hide file tree
Showing 8 changed files with 93 additions and 90 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Expand Up @@ -189,4 +189,5 @@ datasets/
/data

*.spacy
*.pickle
*.pickle
/poetry.lock
45 changes: 0 additions & 45 deletions .pipelines/ci.yml

This file was deleted.

6 changes: 4 additions & 2 deletions README.md
Expand Up @@ -36,8 +36,10 @@ conda create --name presidio python=3.9
conda activate presidio

# Install package+dependencies
pip install -r requirements.txt
python setup.py install
pip install poetry
poetry install
# To install with all additional NER dependencies (e.g. Flair, Stanza, CRF), run:
# poetry install --with ner

# Download a spaCy model used by presidio-analyzer
python -m spacy download en_core_web_lg
Expand Down
35 changes: 35 additions & 0 deletions azure-pipelines.yml
@@ -0,0 +1,35 @@
pr:
branches:
include:
- master
- feature/*

pool:
vmImage: 'ubuntu-latest'
strategy:
matrix:
Python38:
python.version: '3.8'
Python39:
python.version: '3.9'
Python310:
python.version: '3.10'
Python311:
python.version: '3.11'
steps:
- task: UsePythonVersion@0
inputs:
versionSpec: '$(python.version)'
displayName: 'Use Python $(python.version)'

- script: |
python -m pip install --upgrade pip
pip install poetry
poetry install --with dev,ner
displayName: 'Install dependencies'

- script: |
poetry add pytest-azurepipelines
poetry run pytest --runslow
displayName: 'pytest'
5 changes: 4 additions & 1 deletion presidio_evaluator/models/presidio_recognizer_wrapper.py
Expand Up @@ -41,12 +41,15 @@ def __init__(
self.recognizer = recognizer
self.nlp_engine = nlp_engine

if not self.nlp_engine.is_loaded():
self.nlp_engine.load()

#
def __make_nlp_artifacts(self, text: str):
return self.nlp_engine.process_text(text, "en")

#
def predict(self, sample: InputSample) -> List[str]:
def predict(self, sample: InputSample, **kwargs) -> List[str]:
nlp_artifacts = None
if self.with_nlp_artifacts:
nlp_artifacts = self.__make_nlp_artifacts(sample.full_text)
Expand Down
48 changes: 48 additions & 0 deletions pyproject.toml
@@ -0,0 +1,48 @@
[tool.poetry]
name = "presidio_evaluator"
version = "0.1.0"
description = ""
authors = ["Omri Mendels <omri374@users.noreply.github.com>"]
readme = "README.md"

[tool.poetry.dependencies]
python = "^3.9"
spacy = ">=3.2.0, <4.0.0"
numpy = ">=1.20.2,<2.0.0"
jupyter = ">=1"
pandas = ">=1.2.4,<2.0.0"
tqdm = ">=4.60.0,<5.0.0"
haikunator = ">=2.1.0,<3.0.0"
schwifty = ">=2023.11.2,<2024.0.0"
faker = ">=9.6.0,<10.0.0"
scikit-learn = ">1.3.2,<2.0.0"
pytest = ">=6.2.3"
presidio-analyzer = "^2.2.351"
presidio-anonymizer = "^2.2.351"
requests = ">=2.25.1"
xmltodict = ">=0.12.0"
python-dotenv = "^1.0.0"
plotly = "^5.18.0"
azure-ai-textanalytics = ">=5.3.0"
en_core_web_sm = {url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1.tar.gz"}
en_core_web_lg = {url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.7.1/en_core_web_lg-3.7.1.tar.gz"}

# optional dependencies for the different NLP approaches
[tool.poetry.group.ner]
optional=true

[tool.poetry.group.ner.dependencies]
flair = "^0.13.0"
spacy_stanza = "^1.0.0"
sklearn_crfsuite = "^0.3.6"
spacy_huggingface_pipelines = "^0.0.4"


[tool.poetry.group.dev.dependencies]
pytest = ">=6.*"
flake8 = ">=3.*"
pytest-azurepipelines = "^1.0.5"

[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
21 changes: 0 additions & 21 deletions requirements.txt

This file was deleted.

20 changes: 0 additions & 20 deletions requirements_all.txt

This file was deleted.

0 comments on commit e55c173

Please sign in to comment.