/
iirc_qa_only.jsonnet
91 lines (90 loc) · 2.93 KB
/
iirc_qa_only.jsonnet
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
local bert_model = "roberta-base";
local bert_hidden_size = 768;
local preprocessed_wiki_file_path = "data/iirc/preprocessed_context_articles.json";
local top_k_link_per_question = 3;
{
"dataset_reader" : {
"type": "iirc-joint-qa-reader",
"wiki_file_path": preprocessed_wiki_file_path,
"transformer_model_name": bert_model,
"q_max_tokens": 64,
"c_max_tokens": 384,
"skip_invalid_examples": false,
"sent_n": 1,
"padding_sent_n": 1,
"stride": 1,
"neg_n": 7,
"include_main": false,
"add_ctx_sep": false,
"add_init_context": false,
"link_per_question": top_k_link_per_question,
},
"validation_dataset_reader" : {
"type": "iirc-joint-qa-reader",
"wiki_file_path": preprocessed_wiki_file_path,
"transformer_model_name": bert_model,
"q_max_tokens": 64,
"c_max_tokens": 384,
"skip_invalid_examples": false,
"sent_n": 1,
"padding_sent_n": 1,
"stride": 1,
"neg_n": 7,
"max_neg_n": 500,
"include_main": false,
"add_ctx_sep": false,
"add_init_context": false,
"link_per_question": top_k_link_per_question,
},
"train_data_path": "data/iirc/preprocessed_iirc_tiny.json",
"validation_data_path": "data/iirc/preprocessed_iirc_tiny.json",
"vocabulary": {
"type": "empty",
},
"model": {
"type": "joint-qa",
# below are for retrieval
"transformer_model_name": bert_model,
"beam_size_link": top_k_link_per_question,
"print_trajectory": false,
"use_joint_prob": false,
# below are for qa
"skip_when_all_empty": ["passage_span", "question_span", "addition_subtraction", "counting", "none", "binary"],
"relaxed_span_match_for_finding_labels": true,
"q_max_tokens": 64,
"c_max_tokens": 463,
"hidden_size": bert_hidden_size,
"answering_abilities": ["passage_span_extraction", "question_span_extraction",
"addition_subtraction", "counting", "none", "binary"],
"use_gcn": true,
"gcn_steps": 3,
"dropout_prob": 0.1,
"top_m_context": 0,
"gold_link_for_retrieval_training": true,
"marginalization_loss_weight": 0.0,
"gold_context_loss_weight": 1.0,
"invalid_context_loss_weight": 0.0,
"use_link_prediction_model": false,
"use_context_retrieval_model": false,
"use_qa_model": true,
},
"data_loader": {
"batch_size": 16,
"shuffle": true,
},
"validation_data_loader": {
"batch_size": 16,
"shuffle": false
},
"trainer": {
"optimizer": {
"type": "huggingface_adamw",
"lr": 1.0e-5,
"eps": 1e-6,
},
"num_epochs": 30,
"cuda_device": 0,
"validation_metric": "+qa_f1",
"grad_clipping": 1.0,
},
}