Skip to content

Commit

Permalink
Merge pull request #97 from dstl/v2.7
Browse files Browse the repository at this point in the history
V2.7
  • Loading branch information
jbaker-dstl committed May 8, 2019
2 parents 38a2638 + 1a713d9 commit bb88621
Show file tree
Hide file tree
Showing 538 changed files with 5,836 additions and 11,129 deletions.
1 change: 1 addition & 0 deletions .gitignore
Expand Up @@ -63,3 +63,4 @@ target/

# Baleen specific
testing.log
derby.log
2 changes: 1 addition & 1 deletion BUILD.md
Expand Up @@ -10,4 +10,4 @@
2. Run `mvn package` from the Baleen project directory
3. Optionally run `mvn javadoc:aggregate-jar` to build Javadoc
4. The Baleen JAR will be built and saved in the target directory under the top level project directory
5. Run Baleen by running `java -jar baleen-2.7.0-SNAPSHOT.jar` and then navigating to <http://localhost:6413>
5. Run Baleen by running `java -jar baleen-2.7.0.jar` and then navigating to <http://localhost:6413>
2 changes: 1 addition & 1 deletion README.md
Expand Up @@ -20,7 +20,7 @@ Baleen includes an in-built server, which hosts full documentation and guides on
To get started, you will need to launch this server and read this documentation.
To launch the server, run the following command.

> java -jar baleen-2.7.0-SNAPSHOT.jar
> java -jar baleen-2.7.0.jar
Once running, the server can be accessed at [http://localhost:6413](http://localhost:6413).

Expand Down
303 changes: 170 additions & 133 deletions THIRD-PARTY.txt

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion baleen-activemq/pom.xml
Expand Up @@ -4,7 +4,7 @@
<parent>
<groupId>uk.gov.dstl.baleen</groupId>
<artifactId>baleen</artifactId>
<version>2.7.0-SNAPSHOT</version>
<version>2.7.0</version>
</parent>
<artifactId>baleen-activemq</artifactId>
<name>Baleen ActiveMQ</name>
Expand Down
Expand Up @@ -40,11 +40,11 @@ public class ActiveMQReaderTest extends AbstractReaderTest {
BROKERARGS_VALUE
};
final ExternalResourceDescription erd =
ExternalResourceFactory.createExternalResourceDescription(
ExternalResourceFactory.createNamedResourceDescription(
ACTIVEMQ, SharedActiveMQResource.class, configArr);

final ExternalResourceDescription cerd =
ExternalResourceFactory.createExternalResourceDescription(
ExternalResourceFactory.createNamedResourceDescription(
KEY_CONTENT_EXTRACTOR, PlainTextContentExtractor.class);

public ActiveMQReaderTest() {
Expand Down
Expand Up @@ -62,7 +62,7 @@ public static void setupClass() throws UIMAException, JMSException {

// Create descriptors
ExternalResourceDescription erd =
ExternalResourceFactory.createExternalResourceDescription(
ExternalResourceFactory.createNamedResourceDescription(
ACTIVEMQ, SharedActiveMQResource.class, configArr);
AnalysisEngineDescription aed =
AnalysisEngineFactory.createEngineDescription(
Expand Down
Expand Up @@ -26,7 +26,7 @@ public class ActiveMQTransportsTest {
private static String BROKERARGS_VALUE = "broker.persistent=false";

private final ExternalResourceDescription mqerd =
ExternalResourceFactory.createExternalResourceDescription(
ExternalResourceFactory.createNamedResourceDescription(
SharedActiveMQResource.RESOURCE_KEY,
SharedActiveMQResource.class,
SharedActiveMQResource.PARAM_PROTOCOL,
Expand All @@ -35,7 +35,7 @@ public class ActiveMQTransportsTest {
BROKERARGS_VALUE);

private final ExternalResourceDescription ceerd =
ExternalResourceFactory.createExternalResourceDescription(
ExternalResourceFactory.createNamedResourceDescription(
KEY_CONTENT_EXTRACTOR, FakeBaleenContentExtractor.class);

@Test
Expand Down
13 changes: 12 additions & 1 deletion baleen-annotators/pom.xml
Expand Up @@ -5,7 +5,7 @@
<parent>
<groupId>uk.gov.dstl.baleen</groupId>
<artifactId>baleen</artifactId>
<version>2.7.0-SNAPSHOT</version>
<version>2.7.0</version>
</parent>
<artifactId>baleen-annotators</artifactId>
<name>Baleen Annotators</name>
Expand Down Expand Up @@ -81,6 +81,17 @@
<artifactId>jsoup</artifactId>
<version>${jsoup.version}</version>
</dependency>
<dependency>
<groupId>org.opensextant</groupId>
<artifactId>geodesy</artifactId>
<version>${geodesy.version}</version>
<exclusions>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-simple</artifactId>
</exclusion>
</exclusions>
</dependency>

<dependency>
<groupId>uk.gov.dstl.baleen</groupId>
Expand Down
Expand Up @@ -42,10 +42,7 @@ protected void doProcess(final JCas jCas) throws AnalysisEngineProcessException

final List<T> toDelete = new LinkedList<>();

annotations
.asMap()
.entrySet()
.stream()
annotations.asMap().entrySet().stream()
.map(Map.Entry::getValue)
.filter(e -> e.size() > 1)
// Convert to a list of all annotations, BUT skip (drop) one...
Expand Down
Expand Up @@ -46,9 +46,8 @@ public class Blacklist extends BaleenAnnotator {
public static final String PARAM_BLACKLIST = "blacklist";

@ConfigurationParameter(
name = PARAM_BLACKLIST,
defaultValue = {}
)
name = PARAM_BLACKLIST,
defaultValue = {})
String[] terms;

List<String> thingsToRemove = null;
Expand All @@ -72,9 +71,8 @@ public class Blacklist extends BaleenAnnotator {
public static final String PARAM_TYPE = "type";

@ConfigurationParameter(
name = PARAM_TYPE,
defaultValue = "uk.gov.dstl.baleen.types.semantic.Entity"
)
name = PARAM_TYPE,
defaultValue = "uk.gov.dstl.baleen.types.semantic.Entity")
String type;

Class<? extends Annotation> et = null;
Expand Down
Expand Up @@ -5,7 +5,7 @@
import java.util.Collection;
import java.util.Collections;

import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
Expand Down
Expand Up @@ -62,9 +62,8 @@ public class CorefBrackets extends BaleenAnnotator {
public static final String PARAM_TYPE = "excludedTypes";

@ConfigurationParameter(
name = PARAM_TYPE,
defaultValue = {}
)
name = PARAM_TYPE,
defaultValue = {})
String[] excludedTypes;

List<Class<? extends Entity>> classTypes = new ArrayList<>();
Expand Down Expand Up @@ -104,9 +103,7 @@ public void doProcess(JCas jCas) throws AnalysisEngineProcessException {
while (m.find()) {
final Integer end = offset + m.end(1);
matched.addAll(
entityMap
.get(offset + m.start(1))
.stream()
entityMap.get(offset + m.start(1)).stream()
.filter(
f ->
f.getClass().isAssignableFrom(e.getClass())
Expand Down
Expand Up @@ -63,9 +63,8 @@ public class MergeAdjacent extends BaleenAnnotator {
public static final String PARAM_TYPE = "types";

@ConfigurationParameter(
name = PARAM_TYPE,
defaultValue = {}
)
name = PARAM_TYPE,
defaultValue = {})
String[] types;

List<Class<? extends Entity>> classTypes = new ArrayList<>();
Expand Down Expand Up @@ -107,8 +106,7 @@ private void processType(JCas jCas, Class<? extends Entity> type) {
int end = mergeable.get(mergeable.size() - 1).getEnd();

Double lowestConfidence =
mergeable
.stream()
mergeable.stream()
.min((e1, e2) -> Double.compare(e1.getConfidence(), e2.getConfidence()))
.map(Entity::getConfidence)
.orElse(Double.valueOf(0.0));
Expand Down
Expand Up @@ -7,7 +7,7 @@
import java.util.Set;
import java.util.regex.Pattern;

import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.uima.UimaContext;
import org.apache.uima.resource.ResourceInitializationException;

Expand Down
Expand Up @@ -28,9 +28,7 @@ protected void doProcessTextBlock(TextBlock block) throws AnalysisEngineProcessE
if (nationalities.isEmpty()) return;

List<Entity> entities =
block
.select(Entity.class)
.stream()
block.select(Entity.class).stream()
.filter(e -> !e.getClass().equals(Nationality.class))
.collect(Collectors.toList());
for (Nationality n : nationalities) {
Expand Down
Expand Up @@ -38,9 +38,8 @@ public class NormalizeTemporal extends AbstractNormalizeEntities {
public static final String PARAM_DATE_FORMAT = "correctFormat";

@ConfigurationParameter(
name = PARAM_DATE_FORMAT,
defaultValue = "yyyy'-'MM'-'dd'T'HH':'mm':'ss'Z'"
)
name = PARAM_DATE_FORMAT,
defaultValue = "yyyy'-'MM'-'dd'T'HH':'mm':'ss'Z'")
String correctFormat;

/**
Expand Down
@@ -1,13 +1,7 @@
// Dstl (c) Crown Copyright 2017
package uk.gov.dstl.baleen.annotators.cleaners;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.*;

import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
Expand Down Expand Up @@ -48,8 +42,9 @@
* }
* </pre>
*
* See {@link UploadInteractionsToMongo} and {@link MongoInteractionWriter} for information how to
* create this collection.
* See {@link uk.gov.dstl.baleen.jobs.interactions.UploadInteractionsToMongo} and {@link
* uk.gov.dstl.baleen.jobs.interactions.io.MongoInteractionWriter} for information how to create
* this collection.
*
* @baleen.javadoc
*/
Expand Down
Expand Up @@ -47,9 +47,8 @@ public class RemoveNestedEntities extends AbstractNestedEntities<Entity> {
public static final String PARAM_EXCLUDED_TYPES = "excludedTypes";

@ConfigurationParameter(
name = PARAM_EXCLUDED_TYPES,
defaultValue = {}
)
name = PARAM_EXCLUDED_TYPES,
defaultValue = {})
private Set<String> excluded;

Set<Class<? extends Annotation>> classTypes = new HashSet<>();
Expand Down
Expand Up @@ -50,9 +50,8 @@ public class RemoveOverlappingEntities extends AbstractNestedEntities<Entity> {
public static final String PARAM_EXCLUDED_TYPES = "excludedTypes";

@ConfigurationParameter(
name = PARAM_EXCLUDED_TYPES,
defaultValue = {}
)
name = PARAM_EXCLUDED_TYPES,
defaultValue = {})
private Set<String> excluded;

Set<Class<? extends Annotation>> classTypes = new HashSet<>();
Expand Down
@@ -1,12 +1,7 @@
// Dstl (c) Crown Copyright 2017
package uk.gov.dstl.baleen.annotators.coreference;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.Objects;
import java.util.*;

import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
Expand All @@ -21,35 +16,13 @@
import uk.gov.dstl.baleen.annotators.coreference.impl.MentionDetector;
import uk.gov.dstl.baleen.annotators.coreference.impl.data.Cluster;
import uk.gov.dstl.baleen.annotators.coreference.impl.data.Mention;
import uk.gov.dstl.baleen.annotators.coreference.impl.enhancers.AcronymEnhancer;
import uk.gov.dstl.baleen.annotators.coreference.impl.enhancers.AnimacyEnhancer;
import uk.gov.dstl.baleen.annotators.coreference.impl.enhancers.GenderEnhancer;
import uk.gov.dstl.baleen.annotators.coreference.impl.enhancers.MentionEnhancer;
import uk.gov.dstl.baleen.annotators.coreference.impl.enhancers.MultiplicityEnhancer;
import uk.gov.dstl.baleen.annotators.coreference.impl.enhancers.PersonEnhancer;
import uk.gov.dstl.baleen.annotators.coreference.impl.sieves.CoreferenceSieve;
import uk.gov.dstl.baleen.annotators.coreference.impl.sieves.ExactStringMatchSieve;
import uk.gov.dstl.baleen.annotators.coreference.impl.sieves.ExtractReferenceTargets;
import uk.gov.dstl.baleen.annotators.coreference.impl.sieves.InSentencePronounSieve;
import uk.gov.dstl.baleen.annotators.coreference.impl.sieves.PreciseConstructsSieve;
import uk.gov.dstl.baleen.annotators.coreference.impl.sieves.PronounResolutionSieve;
import uk.gov.dstl.baleen.annotators.coreference.impl.sieves.ProperHeadMatchSieve;
import uk.gov.dstl.baleen.annotators.coreference.impl.sieves.RelaxedHeadMatchSieve;
import uk.gov.dstl.baleen.annotators.coreference.impl.sieves.RelaxedStringMatchSieve;
import uk.gov.dstl.baleen.annotators.coreference.impl.sieves.StrictHeadMatchSieve;
import uk.gov.dstl.baleen.annotators.coreference.impl.enhancers.*;
import uk.gov.dstl.baleen.annotators.coreference.impl.sieves.*;
import uk.gov.dstl.baleen.core.pipelines.orderers.AnalysisEngineAction;
import uk.gov.dstl.baleen.resources.SharedGenderMultiplicityResource;
import uk.gov.dstl.baleen.resources.SharedStopwordResource;
import uk.gov.dstl.baleen.types.Base;
import uk.gov.dstl.baleen.types.common.CommsIdentifier;
import uk.gov.dstl.baleen.types.common.DocumentReference;
import uk.gov.dstl.baleen.types.common.Frequency;
import uk.gov.dstl.baleen.types.common.Money;
import uk.gov.dstl.baleen.types.common.Nationality;
import uk.gov.dstl.baleen.types.common.Organisation;
import uk.gov.dstl.baleen.types.common.Person;
import uk.gov.dstl.baleen.types.common.Url;
import uk.gov.dstl.baleen.types.common.Vehicle;
import uk.gov.dstl.baleen.types.common.*;
import uk.gov.dstl.baleen.types.geo.Coordinate;
import uk.gov.dstl.baleen.types.language.PhraseChunk;
import uk.gov.dstl.baleen.types.language.Sentence;
Expand Down Expand Up @@ -150,7 +123,7 @@ public class SieveCoreference extends BaleenAnnotator {

/**
* The stoplist to use. If the stoplist matches one of the enum's provided in {@link
* uk.gov.dstl.baleen.resources.SharedStopwordResource#StopwordList}, then that list will be
* uk.gov.dstl.baleen.resources.SharedStopwordResource.StopwordList}, then that list will be
* loaded.
*
* <p>Otherwise, the string is taken to be a file path and that file is used. The format of the
Expand Down

0 comments on commit bb88621

Please sign in to comment.