/
eval.py
102 lines (86 loc) · 4.23 KB
/
eval.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import texar as tx
import nltk
import numpy as np
from collections import Counter
import sys, os, pickle
sys.path.append('../..')
from LIB.EVAL.meteor import Meteor
from LIB.EVAL.bleu import compute_bleu, diverse_bleu
from LIB.EVAL.rouge import compute_rouge_L
from Reinforcement.rewards_v2 import compute_emotion_distance, get_emotion_prob, sent_vectorizer
#from bert_embedding import BertEmbedding
from sklearn.metrics.pairwise import cosine_similarity
#bert_embedding = BertEmbedding(model='bert_24_1024_16', dataset_name='book_corpus_wiki_en_cased')
SCORE_PATH = 'comet-commonsense/precomputed_similarities/'
with open(os.path.join(SCORE_PATH, 'word2index.pkl'), 'rb') as f:
word2ind = pickle.load(f)
with open(os.path.join(SCORE_PATH, 'index2word.pkl'), 'rb') as f:
ind2word = pickle.load(f)
with open(os.path.join(SCORE_PATH, 'glove/glove_embeddings.pkl'), 'rb') as f:
glove_embeddings = pickle.load(f)
def compute_edit_distance_v2(y_pred, y_true, batch_normalize=True):
# with open('computed_similarities.pickle', 'rb') as handle:
# sim_matrix = pickle.load(handle)
sim_matrix = np.load(os.path.join(SCORE_PATH, 'glove/similarity_matrix.npy'))
embedding_matrix = np.load(os.path.join(SCORE_PATH, 'glove/embedding_matrix.npy'))
basic2comet = {'anger': 'angry', 'fear': 'scared', 'joy': 'happy', 'sadness': 'sad', 'neutral': 'neutral'}
reward = []
# batch_size = len(y_pred)
flag_save = False
for k, v in y_pred.items():
y_hat = v #y_pred[j] #.split()
if len(y_hat) < 3:
reward.append(0)
continue
y = y_true[k] #.split()
y = [basic2comet[i] for i in y]
for i, react in enumerate(y_hat[:5]):
if react not in word2ind:
flag_save = True
word2ind[react] = len(ind2word)
ind2word[word2ind[react]] = react
# emb = np.mean(np.array(bert_embedding([react])[0][1]), axis=0).reshape(1,-1)
emb = sent_vectorizer(react.split(), glove_embeddings).reshape(1, -1)
### Bert version
# if not np.isnan(emb.any()) and np.isfinite(emb.all()):
# sim_matrix = np.append(sim_matrix, cosine_similarity(emb, embedding_matrix), axis=0)
# else:
# sim_matrix = np.append(sim_matrix, np.zeros((1, sim_matrix.shape[1])), axis=0)
sim_matrix = np.append(sim_matrix, cosine_similarity(emb, embedding_matrix), axis=0)
sub_reward = sim_matrix[word2ind[y_hat[0]]][word2ind[y[0]]] + \
max([sim_matrix[word2ind[mid_arc]][word2ind[y[1]]] for mid_arc in y_hat[1:-1] ]) + \
sim_matrix[word2ind[y_hat[-1]]][word2ind[y[2]]]
reward.append(sub_reward/3)
if flag_save:
np.save(os.path.join(SCORE_PATH, 'glove/similarity_matrix.npy'), sim_matrix)
with open(os.path.join(SCORE_PATH, 'word2index.pkl'), 'wb') as f:
pickle.dump(word2ind, f)
with open(os.path.join(SCORE_PATH, 'index2word.pkl'), 'wb') as f:
pickle.dump(ind2word, f)
reward = np.array(reward)
if batch_normalize:
reward = np.mean(reward)
return reward # shape (bs,)
def evaluate_full(gts, gen_res, gen_dict, gts_arc, emo_feat, method=None):
assert (len(gts) == len(gen_res))
reward = 0
if method == "comet":
assert (len(gen_dict) == len(gts_arc))
reward = compute_edit_distance_v2(gen_dict, gts_arc)
elif method == "clf":
pred_dist = np.zeros_like(emo_feat)
for k, v in gen_dict.items():
pred_dist[k,:] = v
reward = compute_emotion_distance(pred_dist, emo_feat, batch_normalize=True, method=method)
elif method == 'clf_prob':
pred_dist = np.zeros_like(emo_feat)
for k, v in gen_dict.items():
pred_dist[k,:] = v
reward = get_emotion_prob(pred_dist, gts_arc, batch_normalize=True)
translation_corpus = gen_res
reference_corpus = gts
rouges = []
meteor = Meteor()
bleu = compute_bleu([[j.split()] for j in reference_corpus], [i.split() for i in translation_corpus])
mete = meteor.compute_score([[r] for r in reference_corpus], translation_corpus)
return {"bleu": bleu[0] * 100, "meteor": mete[0] * 100, "best_reward": reward}