-
Notifications
You must be signed in to change notification settings - Fork 13
/
ground_truth.csv-metadata.json
120 lines (120 loc) · 4.37 KB
/
ground_truth.csv-metadata.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
{
"title": "CrowdTruth ground truth for medical relation extraction",
"description": "A ground truth corpus for medical relation extraction, acquired with crowdsourcing and processed with CrowdTruth metrics.",
"fields": [{
"name": "SID",
"description": "An unique identifier of the data entry.",
"constraints": {
"required": true,
"unique": true
}
}, {
"name": "relation",
"description": "The medical relation for which the ground truth is collected.",
"constraints": {
"required": true
}
}, {
"name": "sentence_relation_score",
"description": "The sentence relation score of the medical relation; using cosine similarity over the aggregated crowd data, it computes the likelihood that the relation is expressed between the two terms in the sentence.",
"constraints": {
"required": true,
"type": "http://www.w3.org/2001/XMLSchema#double"
}
}, {
"name": "crowd",
"description": "The score used to train the relation extraction classifier by Chang et al. with crowd data; it is the sentence-relation score, with a threshold to select positive and negative examples equal to 0.5, and rescaled in [0.5, 1] for positives, and [-1, -0.5] for negatives.",
"constraints": {
"required": true,
"type": "http://www.w3.org/2001/XMLSchema#double"
}
}, {
"name": "baseline",
"description": "Discrete (positive or negative) labels are given for each data entry by the distant supervision method, based on whether the relation is expressed between the 2 terms in the sentence",
"constraints": {
"required": true,
"pattern": "(-1|1|)"
}
}, {
"name": "expert",
"description": "Discrete labels based on an expert’s judgment as to whether the distant supervision label is correct.",
"constraints": {
"pattern": "(-1|1|)"
}
}, {
"name": "test_partition",
"description": "Manual evaluation scores over the sentences where crowd and expert disagreed, used for evaluating the classifier; the sentence-relation score threshold was set at 0.7 for maximum agreement; sentences scored with 0 were determined to be unclear and were removed from testing.",
"constraints": {
"pattern": "(-1|0|1|)"
}
}, {
"name": "term1",
"description": "The first medical term, after correction with crowdsourcing; together with Term2, it expresses the relation: 'term1 relation term2'.",
"constraints": {
"required": true
}
}, {
"name": "b1",
"description": "The beginning position of Term1 in the sentence, measured in number of characters.",
"constraints": {
"required": true,
"type": "http://www.w3.org/2001/XMLSchema#int"
}
}, {
"name": "e1",
"description": "The ending position of Term1 in the sentence, measured in number of characters.",
"datatype": "number",
"constraints": {
"required": true,
"type": "http://www.w3.org/2001/XMLSchema#int"
}
}, {
"name": "term2",
"title": "Term2",
"description": "The second medical term, after correction with crowdsourcing; together with Term1, it expresses the relation: 'term1 relation term2'.",
"constraints": {
"required": true
}
}, {
"name": "b2",
"description": "The beginning position of Term2 in the sentence, measured in number of characters.",
"datatype": "number",
"constraints": {
"required": true,
"type": "http://www.w3.org/2001/XMLSchema#int"
}
}, {
"name": "e2",
"description": "The ending position of Term2 in the sentence, measured in number of characters.",
"datatype": "number",
"constraints": {
"required": true,
"type": "http://www.w3.org/2001/XMLSchema#int"
}
}, {
"name": "sentence",
"description": "The medical sentence in which the relation is expressed.",
"constraints": {
"required": true
}
}, {
"name": "term1_UMLS",
"description": "The original UMLS version of Term1, used for distant supervision, before correction with crowdsourcing.",
"constraints": {
"required": true
}
}, {
"name": "term2_UMLS",
"description": "The original UMLS version of Term2, used for distant supervision, before correction with crowdsourcing.",
"constraints": {
"required": true
}
}, {
"name": "UMLS_seed_relation",
"description": "The UMLS relation used as a seed in distant supervision to find the given entry.",
"constraints": {
"required": true
}
}
]
}