-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Taqi Jaffri
committed
Mar 13, 2024
1 parent
9592d58
commit f958be1
Showing
1 changed file
with
50 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,15 +1,54 @@ | ||
import pytest | ||
|
||
from docugami_dfm_benchmarks.utils.similarity import compute_f1 | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"text1,text2,expected_f1", | ||
[ | ||
("This is a test", "This is a test", 1.0), # Exact match | ||
("One two a three", "one two three", 1.0), # Exact match modulo article, whitespace and casing | ||
("One two a three", " four five a six", 0.0), # No match | ||
], | ||
) | ||
def test_compute_f1(text1: str, text2: str, expected_f1: float) -> None: | ||
def test_compute_f1_exact_match() -> None: | ||
""" | ||
Test compute_f1 with texts that are exactly the same. | ||
Expected F1 score should be 1.0, indicating a perfect match. | ||
""" | ||
text1 = "This is a test" | ||
text2 = "This is a test" | ||
expected_f1 = 1.0 | ||
assert compute_f1(text1, text2) == expected_f1 | ||
|
||
|
||
def test_compute_f1_normalized_match() -> None: | ||
""" | ||
Test compute_f1 with texts that match exactly when normalized. | ||
This includes removal of articles, ignoring whitespace differences, and case insensitivity. | ||
Expected F1 score should be 1.0, indicating a perfect match after normalization. | ||
""" | ||
text1 = "One two a three" | ||
text2 = "one two three" | ||
expected_f1 = 1.0 | ||
assert compute_f1(text1, text2) == expected_f1 | ||
|
||
|
||
def test_compute_f1_no_match() -> None: | ||
""" | ||
Test compute_f1 with texts that have no matching tokens. | ||
Expected F1 score should be 0.0, indicating no similarity between the texts. | ||
""" | ||
text1 = "One two a three" | ||
text2 = " four five a six" | ||
expected_f1 = 0.0 | ||
assert compute_f1(text1, text2) == expected_f1 | ||
|
||
|
||
def test_compute_f1_partial_match() -> None: | ||
""" | ||
Test compute_f1 with partially overlapping tokens to ensure correct partial matching. | ||
""" | ||
text1 = "quick brown fox" | ||
text2 = "lazy brown dog" | ||
# Expected F1 considering overlap is "brown", with precision = recall = F1 = 1/3 | ||
expected_f1 = 2 * (1 / 3 * 1 / 3) / (1 / 3 + 1 / 3) | ||
assert compute_f1(text1, text2) == expected_f1 | ||
|
||
|
||
def test_compute_f1_with_empty_strings() -> None: | ||
""" | ||
Test compute_f1 with one or both strings empty to check edge case handling. | ||
""" | ||
assert compute_f1("", "") == 1.0 # Both empty, perfect match | ||
assert compute_f1("quick brown fox", "") == 0.0 # One empty, no match |