-
Notifications
You must be signed in to change notification settings - Fork 40
/
RelationTypeFilter.java
222 lines (184 loc) · 6.73 KB
/
RelationTypeFilter.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
// Dstl (c) Crown Copyright 2017
package uk.gov.dstl.baleen.annotators.cleaners;
import java.util.*;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.ExternalResource;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.bson.Document;
import com.google.common.collect.ImmutableSet;
import com.mongodb.client.MongoCollection;
import uk.gov.dstl.baleen.annotators.patterns.data.RelationConstraint;
import uk.gov.dstl.baleen.core.pipelines.orderers.AnalysisEngineAction;
import uk.gov.dstl.baleen.resources.SharedMongoResource;
import uk.gov.dstl.baleen.types.semantic.Relation;
import uk.gov.dstl.baleen.uima.BaleenAnnotator;
/**
* Removes relationships that don't match UIMA type constraints.
*
* <p>Many relationships will only make sense between specific entity types. For example (Person,
* went to, Location) not (DateTime, went to, Location). This filter allows for relational type
* constraints.
*
* <p>Since relationship extractors may have different capabilities (e.g. finding the direction of
* relationships, discovering new unknown relationships) there are several configuration parameters
* which relax the strictness of filtering.
*
* <p>Mongo constraint documents are formed as:
*
* <pre>
* {
* source: 'type of source',
* target: 'type of source',
* type: 'relation type',
* }
* </pre>
*
* See {@link uk.gov.dstl.baleen.jobs.interactions.UploadInteractionsToMongo} and {@link
* uk.gov.dstl.baleen.jobs.interactions.io.MongoInteractionWriter} for information how to create
* this collection.
*
* @baleen.javadoc
*/
public class RelationTypeFilter extends BaleenAnnotator {
/**
* Connection to Mongo
*
* @baleen.resource uk.gov.dstl.baleen.resources.SharedMongoResource
*/
public static final String KEY_MONGO = "mongo";
@ExternalResource(key = KEY_MONGO)
private SharedMongoResource mongo;
/**
* The name of the Mongo collection containing the relation types
*
* @baleen.config gazetteer
*/
public static final String PARAM_COLLECTION = "collection";
@ConfigurationParameter(name = PARAM_COLLECTION, defaultValue = "relationTypes")
private String collection;
/**
* The name of the field in Mongo that contains the relation type
*
* @baleen.config type
*/
public static final String PARAM_TYPE_FIELD = "typeField";
@ConfigurationParameter(name = PARAM_TYPE_FIELD, defaultValue = "type")
private String typeField;
/**
* The name of the field in Mongo that contains the relation sub type
*
* @baleen.config type
*/
public static final String PARAM_SUBTYPE_FIELD = "subTypeField";
@ConfigurationParameter(name = PARAM_SUBTYPE_FIELD, defaultValue = "subType")
private String subTypeField;
/**
* The name of the field in Mongo that contains the relation source type
*
* @baleen.config source
*/
public static final String PARAM_SOURCE_FIELD = "sourceField";
@ConfigurationParameter(name = PARAM_SOURCE_FIELD, defaultValue = "source")
private String sourceField;
/**
* The name of the field in Mongo that contains the relation source type
*
* @baleen.config target
*/
public static final String PARAM_TARGET_FIELD = "targetField";
@ConfigurationParameter(name = PARAM_TARGET_FIELD, defaultValue = "target")
private String targetField;
/**
* The name of the field in Mongo that contains the relation pos
*
* @baleen.config posField pos
*/
public static final String PARAM_POS_FIELD = "posField";
@ConfigurationParameter(name = PARAM_POS_FIELD, defaultValue = "pos")
private String posField;
/**
* Determines strictness of filtering.
*
* <p>In strict mode the relationship type must be defined and the source and target type the same
* in order to pass the filter. In non-strict mode, if the relationship type has no constraints
* then the relationship will pass. If the relationship type has constraints then these must be
* adhered too.
*
* @baleen.config false
*/
public static final String PARAM_STRICT = "strict";
@ConfigurationParameter(name = PARAM_STRICT, defaultValue = "false")
private boolean strict;
/**
* Determines if relations can be considered symmetric (source and target swapped)
*
* @baleen.config true
*/
public static final String PARAM_SYMMETRIC = "symmetric";
@ConfigurationParameter(name = PARAM_SYMMETRIC, defaultValue = "true")
private boolean symetric;
private final Map<String, Set<RelationConstraint>> constraints = new HashMap<>();
@Override
public void doInitialize(final UimaContext aContext) throws ResourceInitializationException {
super.doInitialize(aContext);
final MongoCollection<Document> dbCollection = mongo.getDB().getCollection(collection);
for (Document o : dbCollection.find()) {
RelationConstraint constraint =
new RelationConstraint(
(String) o.get(typeField),
(String) o.get(subTypeField),
(String) o.get(posField),
(String) o.get(sourceField),
(String) o.get(targetField));
if (constraint.isValid()) {
Set<RelationConstraint> set = constraints.get(constraint.getType());
if (set == null) {
set = new HashSet<>();
constraints.put(constraint.getType().toLowerCase(), set);
}
set.add(constraint);
}
}
}
@Override
protected void doProcess(final JCas jCas) throws AnalysisEngineProcessException {
final List<Relation> toRemove = new ArrayList<>();
for (final Relation relation : JCasUtil.select(jCas, Relation.class)) {
final String type = relation.getRelationshipType().toLowerCase();
final Set<RelationConstraint> rcs = constraints.get(type);
boolean remove;
if (rcs == null || rcs.isEmpty()) {
// In strict mode we remove
if (strict) {
remove = true;
} else {
remove = false;
}
} else {
remove = !checkValid(rcs, relation);
}
if (remove) {
toRemove.add(relation);
}
}
removeFromJCasIndex(toRemove);
}
/**
* Check if the relation is valid against the constraints.
*
* @param rcs the rcs
* @param relation the relation
* @return true, if successful
*/
private boolean checkValid(final Set<RelationConstraint> rcs, final Relation relation) {
return rcs.stream().anyMatch(p -> p.matches(relation, symetric));
}
@Override
public AnalysisEngineAction getAction() {
return new AnalysisEngineAction(ImmutableSet.of(Relation.class), Collections.emptySet());
}
}