Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
  • Loading branch information
RobinL committed Oct 14, 2019
1 parent 5f0b19d commit 4dd25eb
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 3 deletions.
16 changes: 14 additions & 2 deletions fuzzymatcher/data_getter_sqlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,17 +177,29 @@ def _tokens_to_matches(self, tokens, misspelling = False):
"""

# This fails if the special tokens 'and' or 'or' are in fts string! See issue 35!
tokens_to_remove = ["AND", "OR"]
tokens = [t for t in tokens if t not in tokens_to_remove]
tokens_to_escape = ["AND", "OR", "NEAR"]

def escape_token(t):
# return t
if t in tokens_to_escape:
return '"' + t + '"'
else:
return t


tokens = [escape_token(t) for t in tokens]

fts_string = " ".join(tokens)


if misspelling:
table_name = "_concat_all_alternatives"
else:
table_name = "_concat_all"

sql = get_records_sql.format(table_name, fts_string, self.return_records_limit)


cur = self.con.cursor()
cur.execute(sql)
results = cur.fetchall()
Expand Down
5 changes: 5 additions & 0 deletions tests/data/left_token_escape.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
id,fname,mname,lname,dob,another_field
1,or,or and,and,20/05/1980,other data
2,or,or,or smith or,15/06/1990,more data
3,near,and,near,20/05/1960,another thing

4 changes: 4 additions & 0 deletions tests/data/right_token_escape.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
id,name,middlename,surname,date,other
1,or,or,or smith or,15/06/1990,more data
2,near,and,near,20/05/1960,another thing
3,or,or and,and,20/05/1980,other data
29 changes: 28 additions & 1 deletion tests/test_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,31 @@ def test_nulls_no_errors(self):

on = ["first_name", "surname", "dob", "city"]

flj = link_table(df_left, df_right, on, on)
flj = link_table(df_left, df_right, on, on)


class TestNulls(unittest.TestCase):
"""
Test what happens when the user provides input data with
fts4 match expression keyworks like AND, OR, NEAR
"""

def test_nulls_no_errors(self):
"""
"""


df_left = pd.read_csv("tests/data/left_token_escape.csv")
df_right = pd.read_csv("tests/data/right_token_escape.csv")

# Columns to match on from df_left
left_on = ["fname", "mname", "lname"]

# Columns to match on from df_right
right_on = ["name", "middlename", "surname"]

on = ["first_name", "surname", ]

flj = link_table(df_left, df_right, left_on, right_on,
left_id_col="id", right_id_col="id")

0 comments on commit 4dd25eb

Please sign in to comment.