-
Notifications
You must be signed in to change notification settings - Fork 17
/
supersenses.py
143 lines (123 loc) · 4.45 KB
/
supersenses.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
"""
Information about and utilities for supersense categories for lexical expressions in the corpus.
@author: Nathan Schneider (@nschneid)
@since: 2017-12-31
"""
import sys
SPECIAL_LABELS = {'??', # a semantic supersense could not be assigned:
# e.g. due to unintelligible/unclear context, missing word, or marginal or nonnative usage
'`$'} # opaque possessive slot in an idiom
# Noun supersenses
NSS = {'n.ACT', 'n.ANIMAL', 'n.ARTIFACT', 'n.ATTRIBUTE', 'n.BODY', 'n.COGNITION',
'n.COMMUNICATION', 'n.EVENT', 'n.FEELING', 'n.FOOD', 'n.GROUP',
'n.LOCATION', 'n.MOTIVE', 'n.NATURALOBJECT', 'n.OTHER', 'n.PERSON',
'n.PHENOMENON', 'n.PLANT', 'n.POSSESSION', 'n.PROCESS', 'n.QUANTITY',
'n.RELATION', 'n.SHAPE', 'n.STATE', 'n.SUBSTANCE', 'n.TIME'}
# Verb supersenses
VSS = {'v.body', 'v.change', 'v.cognition', 'v.communication', 'v.competition',
'v.consumption', 'v.contact', 'v.creation', 'v.emotion', 'v.motion',
'v.perception', 'v.possession', 'v.social', 'v.stative'}
# Adposition (preposition/postposition) and case supersenses
# As of SNACS v2.6 guidelines, for STREUSLE v4.5
PSS_TREE = {
'p.Circumstance': {
'p.Temporal': {
'p.Time': {
'p.StartTime': {},
'p.EndTime': {}},
'p.Frequency': {},
'p.Duration': {},
'p.Interval': {}},
'p.Locus': {
'p.Source': {},
'p.Goal': {}},
'p.Path': {
'p.Direction': {},
'p.Extent': {}},
'p.Means': {},
'p.Manner': {},
'p.Explanation': {
'p.Purpose': {}}},
'p.Participant': {
'p.Causer': {},
'p.Force': {
'p.Agent': {}},
'p.Theme': {
'p.Topic': {},
'p.Content': {}},
'p.Ancillary': {},
'p.Stimulus': {},
'p.Experiencer': {},
'p.Originator': {},
'p.Recipient': {},
'p.Cost': {},
'p.Beneficiary': {},
'p.Instrument': {}},
'p.Configuration': {
'p.Identity': {},
'p.Species': {},
'p.Gestalt': {
'p.Possessor': {},
'p.Whole': {},
'p.Org': {},
'p.QuantityItem': {}},
'p.Characteristic': {
'p.Possession': {},
'p.PartPortion': {
'p.Stuff': {}},
'p.OrgMember': {},
'p.QuantityValue': {
'p.Approximator': {}}},
'p.Ensemble': {},
'p.ComparisonRef': {},
'p.SetIteration': {},
'p.SocialRel': {}},
}
PSS_PARENTS = {}
PSS_DEPTH = {}
queue = [[ss,None,PSS_TREE[ss]] for ss in PSS_TREE]
while queue:
ss, par, descendants = queue.pop()
PSS_PARENTS[ss] = par
PSS_DEPTH[ss] = 1 if par is None else PSS_DEPTH[par] + 1
queue.extend([[ch,ss,descendants[ch]] for ch in descendants])
del queue, ss, par, descendants
PSS = set(PSS_PARENTS.keys())
assert len(PSS_DEPTH)==len(PSS)==52
assert max(PSS_DEPTH.values())==4
assert min(PSS_DEPTH.values())==1
ALL_SS = SPECIAL_LABELS | NSS | VSS | PSS
# v1 preposition supersenses (used in STREUSLE 3.0 but removed in v2)
PSS_REMOVED = {'1DTrajectory', '2DArea', '3DMedium',
'Activity', 'Age', 'Asset', 'Attribute', 'ClockTimeCxn', 'Contour',
'Co-Participant', 'Co-Patient', 'Comparison/Contrast', 'Course', 'Creator',
'DeicticTime', 'Donor/Speaker', 'Function', 'Instance', 'Material',
'State', 'StartState', 'EndState',
'Location', 'InitialLocation', 'Destination',
'Patient', 'ProfessionalAspect', 'Reciprocation', 'RelativeTime', 'Scalar/Rank',
'Transit', 'Traversed', 'Value', 'ValueComparison', 'Via'}
# Note also that Part/Portion was renamed to PartPortion in STREUSLE 4.1
# In SNACS 2.6/STREUSLE 4.5:
# - RateUnit was renamed to SetIteration
# - old Causer was renamed to Force, but Causer remains in the hierarchy with a new meaning
# - added: Content (but not for English)
def coarsen_pss(ss, depth):
coarse = ss
while PSS_DEPTH[coarse]>depth:
coarse = PSS_PARENTS[coarse]
return coarse
def ancestors(ss):
par = PSS_PARENTS[ss]
if par is None:
return []
return [par] + ancestors(par)
def makesslabel(lexe):
"""Serialize all of a strong lexical expression's supersenses in a string for the full lextag"""
ss1, ss2 = lexe['ss'], lexe['ss2']
if ss1 is None:
return None
assert ss1
if ss2 is not None and ss2!=ss1:
assert ss2
return ss1 + '|' + ss2
return ss1