Skip to content

Commit

Permalink
Merge pull request #42 from zaibacu/shortcuts-module
Browse files Browse the repository at this point in the history
Shortcuts module
  • Loading branch information
zaibacu committed Jan 25, 2020
2 parents 1fdb95e + 8838668 commit 718ddfe
Show file tree
Hide file tree
Showing 9 changed files with 144 additions and 6 deletions.
20 changes: 19 additions & 1 deletion CHANGELOG.md
@@ -1,4 +1,22 @@
None 0.3.2 (2019-12-19)
0.4.0 (2020-01-25)
****************************

Features
--------

- Support for deaccent. In general, if accented version of word is given, both deaccented and accented will be used to match. To turn iit off - `!CONFIG("deaccent", "N")`
#38
- Added shortcuts module to simplify injecting into spaCy
#42

Fix
---

- Fix issue regarding Spacy rules with `IN_LIST` and using case-sensitive mode. It was creating Regex pattern which is not valid spacy pattern
#40


0.3.2 (2019-12-19)
***********************

Features
Expand Down
46 changes: 46 additions & 0 deletions README.md
Expand Up @@ -20,3 +20,49 @@ This is a language, loosely based on language [Apache UIMA RUTA](https://uima.ap
## Support

[![reddit](https://img.shields.io/reddit/subreddit-subscribers/ritaDSL?style=social)](https://www.reddit.com/r/ritaDSL/)


## Simple Rules example

```python
rules = """
cuts = {"fitted", "wide-cut"}
lengths = {"short", "long", "calf-length", "knee-length"}
fabric_types = {"soft", "airy", "crinkled"}
fabrics = {"velour", "chiffon", "knit", "woven", "stretch"}
{IN_LIST(cuts)?, IN_LIST(lengths), WORD("dress")}->MARK("DRESS_TYPE")
{IN_LIST(lengths), IN_LIST(cuts), WORD("dress")}->MARK("DRESS_TYPE")
{IN_LIST(fabric_types)?, IN_LIST(fabrics)}->MARK("DRESS_FABRIC")
"""
```

### Loading in spaCy
```python
import spacy
from rita.shortcuts import setup_spacy


nlp = spacy.load("en")
setup_spacy(nlp, rules_string=rules)
```

And using it:
```
>>> r = nlp("She was wearing a short wide-cut dress")
>>> [{"label": e.label_, "text": e.text} for e in r.ents]
[{'label': 'DRESS_TYPE', 'text': 'short wide-cut dress'}]
```

### Loading using Regex (standalone)
```
import rita
patterns = rita.compile_string(rules, use_engine="standalone")
```

And using it:
```
>>> list(patterns.execute("She was wearing a short wide-cut dress"))
[{'end': 38, 'label': 'DRESS_TYPE', 'start': 18, 'text': 'short wide-cut dress'}]
```
1 change: 0 additions & 1 deletion changes/38.feature.rst

This file was deleted.

1 change: 0 additions & 1 deletion changes/40.fix.rst

This file was deleted.

1 change: 1 addition & 0 deletions changes/42.feature.rst
@@ -0,0 +1 @@
Added shortcuts module to simplify injecting into spaCy
17 changes: 17 additions & 0 deletions docs/quickstart.md
Expand Up @@ -22,6 +22,23 @@ Now you can compile these rules `rita -f <your-file>.rita output.jsonl`

## spaCy backend

### NEW in 0.4.0: Shortcuts to simplify life:
```
import spacy
from rita.shortcuts import setup_spacy
nlp = spacy.load("en")
setup_spacy(nlp, ...)
```

If comipling rules from string:
`setup_spacy(nlp, rules_string=rules)`
If loading rules from `.rita` file
`setup_spacy(nlp, rules_path="examples/car-colors.rita")`
If loading from spaCy compiled rules:
`setup_spacy(nlp, patterns="rules.jsonl")`

### Doing it manually
```python
import spacy
from spacy.pipeline import EntityRuler
Expand Down
6 changes: 3 additions & 3 deletions rita/__init__.py
Expand Up @@ -9,7 +9,7 @@

logger = logging.getLogger(__name__)

__version__ = (0, 3, 4, os.getenv("VERSION_PATCH"))
__version__ = (0, 4, 0, os.getenv("VERSION_PATCH"))


def get_version():
Expand Down Expand Up @@ -39,8 +39,8 @@ def compile_string(raw, config, use_engine=None):
return result


def compile(fname, compile_fn=None):
def compile(fname, use_engine=None):
with open(fname, "r") as f:
raw = f.read()

return compile_string(raw)
return compile_string(raw, use_engine=use_engine)
20 changes: 20 additions & 0 deletions rita/shortcuts.py
@@ -0,0 +1,20 @@
import rita


def setup_spacy(model, patterns=None, rules_path=None, rules_string=None, override_ents=True):
from spacy.pipeline import EntityRuler
ruler = EntityRuler(model, overwrite_ents=override_ents)
if not patterns:
if rules_path:
patterns = rita.compile(rules_path, use_engine="spacy")
elif rules_string:
patterns = rita.compile_string(rules_string, use_engine="spacy")
else:
raise RuntimeError("Please provides rules. Either `patterns`, `rules_path` or `rules_string`")

ruler.add_patterns(patterns)
else:
ruler.from_disk(patterns)

model.add_pipe(ruler)
return model
38 changes: 38 additions & 0 deletions tests/test_run.py
@@ -1,6 +1,12 @@
import sys
import json
import tempfile

import pytest
import rita

from rita.run import main
from rita.shortcuts import setup_spacy


def test_simple_compile(mocker):
Expand All @@ -11,3 +17,35 @@ def test_simple_compile(mocker):
"output.jsonl"
]
main()


def test_shortcuts_spacy_inline():
spacy = pytest.importorskip("spacy", minversion="2.1")
nlp = spacy.load("en")
rules = """
{WORD("TEST")}->MARK("TEST")
"""
setup_spacy(nlp, rules_string=rules)


def test_shortcuts_spacy_file():
spacy = pytest.importorskip("spacy", minversion="2.1")
nlp = spacy.load("en")
setup_spacy(nlp, rules_path="examples/color-car.rita")


def test_shortcuts_spacy_compiled():
spacy = pytest.importorskip("spacy", minversion="2.1")
nlp = spacy.load("en")
with tempfile.NamedTemporaryFile(mode="w", suffix=".jsonl") as f:
patterns = rita.compile("examples/color-car.rita")
for pattern in patterns:
f.write(json.dumps(pattern) + "\n")
setup_spacy(nlp, patterns=f.name)


def test_shortcuts_spacy_giving_no_rules():
spacy = pytest.importorskip("spacy", minversion="2.1")
nlp = spacy.load("en")
with pytest.raises(RuntimeError):
setup_spacy(nlp)

0 comments on commit 718ddfe

Please sign in to comment.