Skip to content

Commit

Permalink
Mix format
Browse files Browse the repository at this point in the history
  • Loading branch information
arjan committed Mar 7, 2024
1 parent 41a2b68 commit 9ef48e4
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 5 deletions.
2 changes: 1 addition & 1 deletion lib/bubble_match/parser.ex
Expand Up @@ -9,7 +9,7 @@ defmodule BubbleMatch.Parser do
@ws [9, 10, 11, 12, 13, 32]
ws = ignore(utf8_string(@ws, min: 1))

special_chars = '`~!@#$%^&*()_+=-{}|\\][\';":?><,./' ++ @ws
special_chars = ~c"`~!@#$%^&*()_+=-{}|\\][';\":?><,./" ++ @ws

string = utf8_string(Enum.map(special_chars, &{:not, &1}), min: 1)

Expand Down
5 changes: 4 additions & 1 deletion lib/bubble_match/unidekode.ex
Expand Up @@ -44,6 +44,9 @@ defmodule BubbleMatch.Unidekode do
iex> BubbleMatch.Unidekode.drop_accented("código 👍")
"codigo 👍"
iex> BubbleMatch.Unidekode.drop_accented("éé немає 👍")
"ee немає 👍"
"""
@spec drop_accented(binary()) :: binary()
def drop_accented(string), do: drop_accented(string, <<>>)
Expand Down Expand Up @@ -92,7 +95,7 @@ defmodule BubbleMatch.Unidekode do
end)
|> Stream.concat(
for x <-
'!"#%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~\s\t\n',
~c"!\"#%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~\s\t\n",
do: {x, <<x>>}
)
|> Enum.uniq()
Expand Down
6 changes: 3 additions & 3 deletions test/bubble_match/sentence_test.exs
Expand Up @@ -142,9 +142,9 @@ defmodule BubbleMatch.SentenceTest do
end

@spacy_json2 """
{"detected_language": null, "detected_language_prob": 0.12450417876243591, "ents": [], "nlp_language": "en", "sents": [{"end": 8, "start": 0}], "text": "Thanks 8", "tokens": [{"dep": "compound", "end": 6, "head": 1, "id": 0, "lemma": "thanks", "norm": "thanks", "pos": "INTJ", "start": 0, "string": "Thanks ", "tag": "UH"}, {"dep": "ROOT", "end": 8, "head": 1, "id": 1, "lemma": "8", "norm": "8", "pos": "PROPN", "start": 7, "string": "8", "tag": "NNP"}]}
"""
|> Jason.decode!()
{"detected_language": null, "detected_language_prob": 0.12450417876243591, "ents": [], "nlp_language": "en", "sents": [{"end": 8, "start": 0}], "text": "Thanks 8", "tokens": [{"dep": "compound", "end": 6, "head": 1, "id": 0, "lemma": "thanks", "norm": "thanks", "pos": "INTJ", "start": 0, "string": "Thanks ", "tag": "UH"}, {"dep": "ROOT", "end": 8, "head": 1, "id": 1, "lemma": "8", "norm": "8", "pos": "PROPN", "start": 7, "string": "8", "tag": "NNP"}]}
"""
|> Jason.decode!()

test "Emoji false positive" do
s = %{tokenizations: [_]} = Sentence.from_spacy(@spacy_json2)
Expand Down

0 comments on commit 9ef48e4

Please sign in to comment.