Skip to content

Commit

Permalink
added ignored columns handling
Browse files Browse the repository at this point in the history
  • Loading branch information
Taqi Jaffri committed Mar 13, 2024
1 parent f15cb12 commit ecc9f14
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 23 deletions.
12 changes: 11 additions & 1 deletion docugami_dfm_benchmarks/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ def eval_by_column(
def eval_by_csv(
ground_truth_csv: Path,
model_output_csv: Path,
key_column: Optional[str] = None,
output_format: OutputFormat = OutputFormat.GITHUB_MARKDOWN,
) -> None:

Expand All @@ -54,10 +55,19 @@ def eval_by_csv(
model_output_reader = csv.DictReader(model_output_file)
model_output_data = [row for row in model_output_reader]

scores = score_by_separate_csvs(gt_data, model_output_data)
scores, ignored_columns_gt, ignored_columns_model = score_by_separate_csvs(
gt_data, model_output_data
)
table = tabulate_scores(scores, output_format)
typer.echo(table)

typer.echo(
f"Ignored columns in ground truth CSV (no match in model output): {ignored_columns_gt}"
)
typer.echo(
f"Ignored columns in model output CSV (no match in ground truth): {ignored_columns_model}"
)


def _version_callback(value: bool) -> None:
"""
Expand Down
54 changes: 37 additions & 17 deletions docugami_dfm_benchmarks/utils/scorer.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Any
from typing import Any, Optional

import numpy as np
from tqdm import tqdm
Expand Down Expand Up @@ -106,8 +106,10 @@ def score_by_column(data: list[dict[str, Any]]) -> dict[str, dict[str, Any]]:


def score_by_separate_csvs(
ground_truth_data: list[dict[str, Any]], model_output_data: list[dict[str, Any]]
) -> dict:
ground_truth_data: list[dict[str, Any]],
model_output_data: list[dict[str, Any]],
key_column: Optional[str] = None,
) -> tuple[dict, set, set]:
"""
Scores model output against ground truth data when provided in separate CSVs.
Each CSV should have columns with identical names for comparison. This function
Expand All @@ -124,22 +126,40 @@ def score_by_separate_csvs(
Returns:
- A dictionary of scores for each common column.
"""
gt_columns = set(ground_truth_data[0].keys())
model_columns = set(model_output_data[0].keys())
common_columns = gt_columns.intersection(model_columns)
ignored_columns = (gt_columns.union(model_columns)) - common_columns

if ignored_columns:
print(
f"Warning: Ignoring columns without matches in both CSVs: {ignored_columns}"
)
# Create mappings for ground truth and model output columns from normalized to original names
gt_columns_normalized = {normalize(key): key for key in ground_truth_data[0].keys()}
model_columns_normalized = {
normalize(key): key for key in model_output_data[0].keys()
}

# Identify common columns based on normalized names and keep track of the original names for later use
common_columns_normalized = set(gt_columns_normalized.keys()).intersection(
model_columns_normalized.keys()
)

# Initialize scores dictionary
scores = {}
for column in tqdm(common_columns):
gt_annotations = [row[column] for row in ground_truth_data]
model_outputs = [row[column] for row in model_output_data]

# Prepare sets to track ignored columns based on their original names
ignored_columns_gt = set(ground_truth_data[0].keys()) - set(
gt_columns_normalized[norm] for norm in common_columns_normalized
)
ignored_columns_model = set(model_output_data[0].keys()) - set(
model_columns_normalized[norm] for norm in common_columns_normalized
)

# Iterate over common columns using the normalized names to facilitate comparison
for norm_col in common_columns_normalized:
original_gt_col = gt_columns_normalized[norm_col]
original_model_col = model_columns_normalized[norm_col]

gt_annotations = [row[original_gt_col] for row in ground_truth_data]
model_outputs = [row[original_model_col] for row in model_output_data]

column_scores = _compute_scores_for_column(gt_annotations, model_outputs)
_finalize_scores(column_scores, len(ground_truth_data))
scores[column] = column_scores
scores[original_gt_col] = (
column_scores # Use the ground truth's original column name
)

return scores
return scores, ignored_columns_gt, ignored_columns_model
31 changes: 26 additions & 5 deletions tests/utils/test_scorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,28 @@ def test_score_by_column() -> None:

def test_score_by_separate_csvs() -> None:
ground_truth_data = [
{"Column1": "Test sentence.", "Column2": "Another test."},
{"Column1": "Second sentence.", "Column2": "Yet another test."},
{
"Column1": "Test sentence.",
"Column2": "Another test.",
"Unique GT column": "xyz",
},
{
"Column1": "Second sentence.",
"Column2": "Yet another test.",
"Unique GT column": "xyz",
},
]
model_output_data = [
{"Column1": "Test sentence.", "Column2": ""},
{"Column1": "A different second sentence.", "Column2": "Yet another test."},
{
"Column1": "Test sentence.",
"Column2": "",
"Unique MO column": "abc",
},
{
"Column1": "A different second sentence.",
"Column2": "Yet another test.",
"Unique MO column": "abc",
},
]
expected_scores = {
"Column1": {
Expand All @@ -79,7 +95,12 @@ def test_score_by_separate_csvs() -> None:
f"{SIM_TITLE}0.6": 0.5,
},
}
scores = score_by_separate_csvs(ground_truth_data, model_output_data)
scores, ignored_columns_gt, ignored_columns_model = score_by_separate_csvs(
ground_truth_data, model_output_data
)

assert ignored_columns_gt == {"Unique GT column"}
assert ignored_columns_model == {"Unique MO column"}

for column in expected_scores:
for metric in expected_scores[column]:
Expand Down

0 comments on commit ecc9f14

Please sign in to comment.