Skip to content

Commit

Permalink
Merge branch 'master' of github.com:wikilinks/neleval
Browse files Browse the repository at this point in the history
  • Loading branch information
jnothman committed Aug 8, 2014
2 parents 97c0bed + 86ec7a3 commit b4d87bd
Show file tree
Hide file tree
Showing 7 changed files with 200 additions and 14 deletions.
49 changes: 49 additions & 0 deletions scripts/run_report_confidence.sh
@@ -0,0 +1,49 @@
#!/usr/bin/env bash
#
# Prepare summary report per measure with confidence intervals
set -e

usage="Usage: $0 OUT_DIR MEASURE ..."

if [ "$#" -lt 2 ]; then
echo $usage
exit 1
fi

outdir=$1; shift # directory to which results are written

MEASURES=(
"strong_mention_match"
"strong_link_match"
"strong_nil_match"
"strong_all_match"
"strong_typed_all_match"
"entity_ceaf"
)

for measure in ${@}
do
echo "INFO preparing $measure report.."

# INITIALISE REPORT HEADER
report=$outdir/00report.$measure
echo -e "90%(\t95%(\t99%(\tscore\t)99%\t)95%\t)90%\tsystem" \
> $report

# ADD SYSTEM SCORES
(
for sys_eval in $outdir/*.confidence
do
cat $sys_eval \
| grep "$measure" \
| grep "fscore" \
| awk 'BEGIN{OFS="\t"} {print $3,$4,$5,$6,$7,$8,$9}' \
| tr '\n' '\t'
basename $sys_eval \
| sed 's/\.confidence//'
done
) \
| sort -t$'\t' -k4 -nr \
>> $report

done
29 changes: 20 additions & 9 deletions scripts/run_tac13_report.sh
Expand Up @@ -14,21 +14,32 @@ outdir=$1; shift # directory to which results are written

# INITIALISE REPORT HEADER
report=$outdir/00report.tab
echo -e "system\tKBP2010 micro-average\tB^3 Precision\tB^3 Recall\tB^3 F1" \
> $report
(
echo -en "system" # run name
echo -en "\tKBP2010 micro-average" # overall linking score
echo -en "\tB^3 Precision\tB^3 Recall\tB^3 F1" # B^3 clustering scores
echo -e "\tB^3+ Precision\tB^3+ Recall\tB^3+ F1" # B^3+ clustering scores
) > $report

# ADD SYSTEM SCORES
# TODO add B^3+
for eval in $outdir/*.evaluation
for sys_eval in $outdir/*.evaluation
do
basename $eval \
basename $sys_eval \
| sed 's/\.evaluation//' \
| tr '\n' '\t' \
>> $report
cat $eval \
| egrep '(strong_all_match|b_cubed)' \
| cut -f5,6,7,8 \
cat $sys_eval \
| grep -P '\tstrong_all_match$' \
| cut -f 7 \
| tr '\n' '\t' \
| cut -f2,5,6,7 \
>> $report
cat $sys_eval \
| grep -P '\tb_cubed$' \
| cut -f 5,6,7 \
| tr '\n' '\t' \
>> $report
cat $sys_eval \
| grep -P '\tb_cubed_plus$' \
| cut -f 5,6,7 \
>> $report
done
2 changes: 1 addition & 1 deletion scripts/run_tac14_all.sh
@@ -1,6 +1,6 @@
#!/usr/bin/env bash
#
# Run TAC13 evaluation and analysis
# Run TAC14 evaluation and analysis
set -e

usage="Usage: $0 GOLD_XML GOLD_TAB SYSTEMS_DIR OUT_DIR"
Expand Down
5 changes: 5 additions & 0 deletions scripts/run_tac14_evaluation.sh
Expand Up @@ -36,3 +36,8 @@ ls $outdir/*.combined.tsv \
| grep -v "gold\.combined\.tsv$" \
| xargs -n 1 -P $jobs $SCR/run_evaluate.sh $gold


# PREPARE SUMMARY REPORT
echo "INFO Preparing summary report.."
$SCR/run_tac14_report.sh $outdir

87 changes: 87 additions & 0 deletions scripts/run_tac14_filtered.sh
@@ -0,0 +1,87 @@
#!/usr/bin/env bash
#
# Run TAC14 filtered evaluation and analysis
set -e

usage="Usage: $0 GOLD_XML GOLD_TAB SYSTEMS_DIR OUT_DIR"

if [ "$#" -ne 4 ]; then
echo $usage
exit 1
fi

goldx=$1; shift # gold standard queries/mentions (XML)
goldt=$1; shift # gold standard link annotations (tab-separated)
sysdir=$1; shift # directory containing output from systems
outdir=$1; shift # directory to which results are written

SCR=`dirname $0`

JOBS=8 # number of jobs for parallel mode (set to number of CPUs if possible)


# CONFIGURE FILTERS
FILTERS=(
# NE type filters
"PER:::PER$"
"ORG:::ORG$"
"GPE:::GPE$"
# genre filters
"NW:::^(AFP|APW|CNA|LTW|NYT|WPB|XIN)_ENG_"
"WB:::^eng-(NG|WL)-"
"DF:::^bolt-eng-DF-"
# combined filters
"PER_NW:::^(AFP|APW|CNA|LTW|NYT|WPB|XIN)_ENG_.*PER$"
"PER_WB:::^eng-(NG|WL)-.*PER$"
"PER_DF:::^bolt-eng-DF-.*PER$"
"ORG_NW:::^(AFP|APW|CNA|LTW|NYT|WPB|XIN)_ENG_.*ORG$"
"ORG_WB:::^eng-(NG|WL)-.*ORG$"
"ORG_DF:::^bolt-eng-DF-.*ORG$"
"GPE_NW:::^(AFP|APW|CNA|LTW|NYT|WPB|XIN)_ENG_.*GPE$"
"GPE_WB:::^eng-(NG|WL)-.*GPE$"
"GPE_DF:::^bolt-eng-DF-.*GPE$"
)


# RUN OVERALL EVALUATION
$SCR/run_tac14_evaluation.sh $goldx $goldt $sysdir $outdir $JOBS


# GET GOLD STANDARD PATH
gold=$outdir/gold.combined.tsv
if [ ! -e $gold ]
then
echo "ERROR $gold does not exist"
exit 1
fi


# GET LIST OF SYSTEM OUTPUT PATHS
systems=(`ls $outdir/*.combined.tsv | grep -v "gold\.combined\.tsv$"`)
if [ ${#systems[*]} == 0 ]
then
echo "ERROR did not find any system output"
exit 1
fi


# RUN FILTERED EVALUTION
for filter in ${FILTERS[@]}
do
subset=`echo $filter | sed 's/:::.*$//'`
regex=`echo $filter | sed 's/^.*::://'`

# MAKE DIRECTORY FOR FILTERED EVALUATION
subdir=$outdir/00filtered/$subset
mkdir -p $subdir

# FILTER AND EVALUATE
echo "INFO Evaluating on $subset subset.."
printf "%s\n" "${systems[@]}" \
| xargs -n 1 -P $JOBS $SCR/run_filtrate.sh $subdir "$regex" $gold

# PREPARE SUMMARY REPORT
echo "INFO Preparing summary report.."
$SCR/run_tac14_report.sh $subdir

done
36 changes: 36 additions & 0 deletions scripts/run_tac14_report.sh
@@ -0,0 +1,36 @@
#!/usr/bin/env bash
#
# Prepare score summary in TAC 2014 format
set -e

usage="Usage: $0 OUT_DIR"

if [ "$#" -ne 1 ]; then
echo $usage
exit 1
fi

outdir=$1; shift # directory to which results are written

# INITIALISE REPORT HEADER
report=$outdir/00report.tab
echo -e "WikiF1\tCEAFeP\tCEAFeR\tCEAFeF1\tSystem" \
> $report

# ADD SYSTEM SCORES
for eval in $outdir/*.evaluation
do
cat $eval \
| grep 'strong_typed_all_match' \
| cut -f7 \
| tr '\n' '\t' \
>> $report
cat $eval \
| grep 'entity_ceaf' \
| cut -f5,6,7 \
| tr '\n' '\t' \
>> $report
basename $eval \
| sed 's/\.evaluation//' \
>> $report
done
6 changes: 2 additions & 4 deletions scripts/test_tac13_evaluation.sh
Expand Up @@ -38,12 +38,10 @@ official=$outdir/00official.tab
cat $scores \
| egrep -v '^[0-9]* queries' \
| head -1 \
| cut -f1,2,3,4,5 \
> $official
cat $scores \
| egrep -v '^[0-9]* queries' \
| awk '{if (NR>1) print}' \
| cut -f1,2,3,4,5 \
| tail -n +2 \
| sort \
>> $official

Expand All @@ -54,7 +52,7 @@ if [ "" != "`diff $official $report`" ]
then
difff=$outdir/00diff.txt
diff -y $official $report \
> $difff
> $difff
echo "FAIL see $difff"
else
echo "PASS"
Expand Down

0 comments on commit b4d87bd

Please sign in to comment.