-
Notifications
You must be signed in to change notification settings - Fork 20
/
predict.sh
executable file
·154 lines (134 loc) · 3.15 KB
/
predict.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
#!/bin/bash
set -e
usage() {
echo "Usage: $0 -m MODEL_ARCHIVE_PATH -t TEST_FILE -p PREDICTOR [-c CUDA_DEVICE] [ -b BATCH_SIZE ] [ -M METEOR_JAR ]" 1>&2
}
exit_abnormal() {
usage
exit 1
}
m_flag=false;
t_flag=false;
p_flag=false;
b_flag=false;
l_flag=false;
D_flag=false;
c_flag=false;
R_flag=false;
L_flag=false;
P_flag=false;
while getopts ":m:t:p:b:M:TRDc:L:lP:" opt; do
case $opt in
# Options for AllenNLP 'predict'
# Path to tar.gz archive with model
m) MODEL_ARCHIVE_PATH="$OPTARG"; m_flag=true
;;
# Path to file with data for testing
t) TEST_FILE="$OPTARG"; t_flag=true
;;
# Registered AllenNLP Predictor name
p) PREDICTOR="$OPTARG"; p_flag=true
;;
# Batch size (default: 32)
b) BATCH_SIZE="$OPTARG"; b_flag=true
;;
# Cuda device
c) CUDA_DEVICE=$OPTARG; c_flag=true
;;
# Options for evaluate.py
# Path to validation data (for early stopping)
M) METEOR_JAR="$OPTARG"; M_flag=true
;;
# --tokenize-after for evaluate.py
T) T_flag=true
;;
# --is-multiple-ref for evaluate.py
R) R_flag=true
;;
# Language
L) LANGUAGE="$OPTARG"; L_flag=true
;;
# --lower for evaluate.py
l) l_flag=true
;;
# Other options
# Do not remove temporary files
D) D_flag=true
;;
# Set path for predictions
P) PRED_FILE="$OPTARG"; P_flag=true
;;
\?) echo "Invalid option -$OPTARG" >&2; exit_abnormal
;;
:) echo "Missing option argument for -$OPTARG" >&2; exit_abnormal
;;
esac
done
if ! $m_flag
then
echo "Missing -m option (path to model archive)"; exit_abnormal;
fi
if ! $t_flag
then
echo "Missing -t option (path to test dataset)"; exit_abnormal;
fi
if ! $p_flag
then
echo "Missing -p option (name of Predictor)"; exit_abnormal;
fi
if ! $L_flag
then
echo "Missing -L option (language, 'en' or 'ru')"; exit_abnormal;
fi
if ! $b_flag
then
BATCH_SIZE=32;
fi
if ! $c_flag
then
CUDA_DEVICE=0;
fi
if ! $P_flag
then
PRED_FILE=$(mktemp);
fi
REF_FILE=$(mktemp)
ALLENNLP_FILE=$(which allennlp)
ALLENNLP_SHEBANG=$(head -1 $ALLENNLP_FILE)
PYTHON_STRING="${ALLENNLP_SHEBANG:2}"
echo "Calling AllenNLP predict...";
allennlp predict \
"${MODEL_ARCHIVE_PATH}" \
"${TEST_FILE}" \
--output-file "${PRED_FILE}" \
--include-package summarus \
--cuda-device ${CUDA_DEVICE} \
--use-dataset-reader \
--predictor "${PREDICTOR}" \
--silent \
--batch-size ${BATCH_SIZE};
echo "File with predictions: ${PRED_FILE}";
echo "Calling target_to_lines.py...";
eval '${PYTHON_STRING} target_to_lines.py \
--input-file "${TEST_FILE}" \
--output-file "${REF_FILE}"';
echo "File with gold summaries: ${REF_FILE}";
echo "Calling evaluate.py...";
eval '${PYTHON_STRING} evaluate.py \
--predicted-path "${PRED_FILE}" \
--gold-path "${REF_FILE}" \
--metric all \
--language "${LANGUAGE}" \
${l_flag:+--lower} \
${R_flag:+--is-multiple-ref} \
${M_flag:+--meteor-jar "${METEOR_JAR}"} \
${T_flag:+--tokenize-after}';
if ! $D_flag
then
echo "Removing temporary files...";
rm "${PRED_FILE}";
rm "${REF_FILE}";
else
echo "File with predicted summaries: ${PRED_FILE}";
echo "File with gold summaries: ${REF_FILE}";
fi