Skip to content

Commit

Permalink
Merge pull request #26 from zaibacu/config-revamp
Browse files Browse the repository at this point in the history
Config revamp
  • Loading branch information
zaibacu committed Dec 14, 2019
2 parents 524781b + f7cfdc1 commit 487e63f
Show file tree
Hide file tree
Showing 19 changed files with 409 additions and 174 deletions.
20 changes: 20 additions & 0 deletions docs/config.md
@@ -0,0 +1,20 @@
# Config

Configuration is mostly applied per-rule-basis, meaning, that different rules can have different configuration while running from same process.

## Syntax

It is intended to do configuration from within the rule, like so:

```
!CONFIG("ignore_case", "Y")
```

First argument is config key, second value. `"1"`, `"Y"` and `"T"` results in `True`, `"0"`, `"N"`, `"F"` - in `False`

## Configurations

| Setting | Default | Description |
|--------------------|----------------------|----------------------------------------------------------------------------|
| implicit_punct |`T` |Automatically adds punctuation characters `,.!:\;` to the rules |
| ignore_case |`T` |All rules are case-insensitive |
3 changes: 1 addition & 2 deletions docs/quickstart.md
Expand Up @@ -85,8 +85,7 @@ Here's a test covering this case

```python
def test_standalone_simple():
from rita.engine.translate_standalone import compile_rules
patterns = rita.compile("examples/simple-match.rita", compile_fn=compile_rules)
patterns = rita.compile("examples/simple-match.rita", use_engine="standalone")
results = list(patterns.execute("Donald Trump was elected President in 2016 defeating Hilary Clinton."))
assert len(results) == 2
entities = list([(r["text"], r["label"]) for r in results])
Expand Down
1 change: 1 addition & 0 deletions mkdocs.yml
Expand Up @@ -8,6 +8,7 @@ nav:
- Syntax: syntax.md
- Macros: macros.md
- Extending: extend.md
- Config: config.md
theme: readthedocs
markdown_extensions:
- toc:
Expand Down
22 changes: 9 additions & 13 deletions rita/__init__.py
Expand Up @@ -2,30 +2,26 @@
import types

from rita import engine
from rita.config import Config
from rita.config import with_config
from rita.parser import RitaParser
from rita.preprocess import preprocess_rules


logger = logging.getLogger(__name__)


def config():
return Config()


def compile_string(raw, compile_fn=None):
parser = RitaParser()
@with_config
def compile_string(raw, config, use_engine=None):
parser = RitaParser(config)
parser.build()
root = parser.parse(raw)
logger.debug(root)
if compile_fn:
compile_rules = compile_fn
if use_engine:
compile_rules = config.get_engine(use_engine)
else:
compile_rules = engine.get_default()

rules = list(preprocess_rules(root))
result = compile_rules(rules)
compile_rules = config.default_engine
rules = list(preprocess_rules(root, config))
result = compile_rules(rules, config)
if isinstance(result, types.GeneratorType):
return list(result)
else:
Expand Down
105 changes: 90 additions & 15 deletions rita/config.py
@@ -1,17 +1,92 @@
class Config(object):
@property
def list_ignore_case(self):
"""
Ignore case while doing `IN_LIST` operation
"""
return True
import operator
import logging
from importlib import import_module

try:
import spacy
from rita.engine.translate_spacy import compile_rules as spacy_engine
except ImportError:
pass

from rita.engine.translate_standalone import compile_rules as standalone_engine

from rita.utils import SingletonMixin


logger = logging.getLogger(__name__)


class Config(SingletonMixin):
def __init__(self):
self.available_engines = []
self.engines_by_key = {}

try:
self.register_engine(1, "spacy", spacy_engine)
except NameError:
# spacy_engine is not imported
pass
self.register_engine(2, "standalone", standalone_engine)

def register_engine(self, priority, key, compile_fn):
self.available_engines.append((priority, key, compile_fn))
self.engines_by_key[key] = compile_fn
sorted(self.available_engines, key=operator.itemgetter(0))

@property
def implicit_punct(self):
"""
Automatically add optional Punctuation characters inside rule between macros.
eg. `WORD(w1), WORD(w2)`
would be converted into:
`WORD(w1), PUNCT?, WORD(w2)`
"""
return True
def default_engine(self):
(_, _, compile_fn) = self.available_engines[0]
return compile_fn

def get_engine(self, key):
return self.engines_by_key[key]



class SessionConfig(object):
def __init__(self):
self._root = Config()
self.modules = []
# Default config
self._data = {
"ignore_case": True,
"implicit_punct": True

}
self.variables = {}

def register_module(self, mod_name):
logger.debug("Importing module: {}".format(mod_name))
self.modules.append(import_module(mod_name))

def set_variable(self, k, v):
self.variables[k] = v

def get_variable(self, k):
return self.variables[k]

def __getattr__(self, name):
if name == "_root":
return self._root

elif name in self._data:
return self._data[name]

return getattr(self._root, name)

def set_config(self, k, v):
# Handle booleans first
if v.upper() in ["1", "T", "Y"]:
self._data[k] = True
elif v.upper() in ["0", "F", "N"]:
self._data[k] = False
else:
self._data[k] = v


def with_config(fn):
def wrapper(*args, **kwargs):
config = SessionConfig()
return fn(*args, config=config, **kwargs)

return wrapper
9 changes: 0 additions & 9 deletions rita/engine/__init__.py
@@ -1,9 +0,0 @@
try:
import spacy
from .translate_spacy import compile_rules
except ImportError:
from .translate_standalone import compile_rules


def get_default():
return compile_rules
40 changes: 25 additions & 15 deletions rita/engine/translate_spacy.py
Expand Up @@ -7,52 +7,61 @@
logger = logging.getLogger(__name__)


def any_of_parse(lst, op=None):
base = {"LOWER": {"REGEX": r"({0})".format("|".join(sorted(lst)))}}
def any_of_parse(lst, config, op=None):
if config.ignore_case:
normalized = sorted([item.lower()
for item in lst])
base = {"LOWER": {"REGEX": r"({0})".format("|".join(normalized))}}
else:
base = {"REGEX": r"({0})".format("|".join(sorted(lst)))}

if op:
base["OP"] = op
yield base


def regex_parse(r, op=None):
d = {"TEXT": {"REGEX": r}}
def regex_parse(r, config, op=None):
if config.ignore_case:
d = {"LOWER": {"REGEX": r.lower()}}
else:
d = {"TEXT": {"REGEX": r}}

if op:
d["OP"] = op
yield d


def fuzzy_parse(r, op=None):
def fuzzy_parse(r, config, op=None):
# TODO: build premutations
d = {"LOWER": {"REGEX": "({0})[.,?;!]?".format("|".join(r))}}
if op:
d["OP"] = op
yield d


def generic_parse(tag, value, op=None):
def generic_parse(tag, value, config, op=None):
d = {}
d[tag] = value
if op:
d["OP"] = op
yield d

def punct_parse(_, op=None):
def punct_parse(_, config, op=None):
d = {}
d["IS_PUNCT"] = True
if op:
d["OP"] = op
yield d

def phrase_parse(value, op=None):
def phrase_parse(value, config, op=None):
"""
TODO: Does not support operators
"""
buff = value.split("-")
yield next(generic_parse("ORTH", buff[0], None))
yield next(generic_parse("ORTH", buff[0], config=config, op=None))
for b in buff[1:]:
yield next(generic_parse("ORTH", "-", None))
yield next(generic_parse("ORTH", b, None))
yield next(generic_parse("ORTH", "-", config=config, op=None))
yield next(generic_parse("ORTH", b, config=config, op=None))


PARSERS = {
Expand All @@ -68,16 +77,17 @@ def phrase_parse(value, op=None):
}


def rules_to_patterns(label, data):
def rules_to_patterns(label, data, config):
print(data)
return {
"label": label,
"pattern": [p
for (t, d, op) in data
for p in PARSERS[t](d, op)],
for p in PARSERS[t](d, config=config, op=op)],
}


def compile_rules(rules):
def compile_rules(rules, config):
logger.info("Using spaCy rules implementation")
return [rules_to_patterns(*group)
return [rules_to_patterns(*group, config=config)
for group in rules]
5 changes: 4 additions & 1 deletion rita/engine/translate_standalone.py
Expand Up @@ -87,6 +87,9 @@ def gen():
"""
Implicitly add spaces between rules
"""
if len(data) == 0:
return

yield data[0]
for (t, d, op) in data[1:]:
if t != "punct":
Expand Down Expand Up @@ -118,7 +121,7 @@ def execute(self, text):
}


def compile_rules(rules):
def compile_rules(rules, config):
logger.info("Using standalone rule implementation")
patterns = [rules_to_patterns(*group) for group in rules]
executor = RuleExecutor(patterns)
Expand Down

0 comments on commit 487e63f

Please sign in to comment.