Skip to content

Commit

Permalink
Migration to LMDBjava #75
Browse files Browse the repository at this point in the history
  • Loading branch information
lfoppiano committed Apr 18, 2018
1 parent 76e0160 commit 482a825
Show file tree
Hide file tree
Showing 37 changed files with 2,730 additions and 2,737 deletions.
7 changes: 5 additions & 2 deletions data/config/kb.yaml
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
# entity-fishing configuration for the (upper) conceptual KB layer

# path to the LMDB data
dbDirectory: data/db/db-kb
#dbDirectory: /Volumes/SEAGATE1TB/nerd/0.0.3/nerd-data/db/db-kb
#dbDirectory: /Volumes/LaCie/workspace/nerd/0.0.3/nerd-data/db/db-kb
dbDirectory: /Volumes/LaCie/workspace/nerd/0.0.4/nerd-data/db/db-kb

# path to the wikidata files (dump, list of properties, mapping)
# note that these files are only used when creating the LMDB data,
# if the LMDB data are already provided, these files will not be used
#dataDirectory: /mnt/data/wikipedia/latest/wikidata/
dataDirectory: /home/lopez/resources/wikidata/
dataDirectory: /Volumes/SEAGATE1TB/nerd/0.0.3/wikidata/
dataDirectory: /Volumes/LaCie/workspace/nerd/0.0.3/wikidata/
10 changes: 5 additions & 5 deletions data/config/wikipedia-de.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@
langCode: de

# path to the LMDB data
dbDirectory: data/db/db-de

# path to the embeddings
embeddingsDirectory: data/embeddings/de
#dbDirectory: /Volumes/SEAGATE1TB/nerd/0.0.3/nerd-data/db/db-de
#dbDirectory: /Volumes/LaCie/workspace/nerd/0.0.3/nerd-data/db/db-de
dbDirectory: /Volumes/LaCie/workspace/nerd/0.0.4/nerd-data/db/db-de

# path to the compiled CSV wikipedia, infobox, wikidata files
# note that these files are only used when creating the LMDB data,
# if the LMDB data are already provided, these files will not be used
dataDirectory: /mnt/data/wikipedia/latest/de/
#dataDirectory: /Volumes/SEAGATE1TB/nerd/0.0.3/wikipedia/de/
dataDirectory: /Volumes/LaCie/workspace/nerd/0.0.3/wikipedia/de/
#dataDirectory: /home/lopez/resources/wikipedia/de/

# path to the stopwords file to consider for the target language
Expand Down
10 changes: 5 additions & 5 deletions data/config/wikipedia-en.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@
langCode: en

# path to the LMDB data
dbDirectory: data/db/db-en

# path to the embeddings
embeddingsDirectory: data/embeddings/en
#dbDirectory: /Volumes/SEAGATE1TB/nerd/0.0.3/nerd-data/db/db-en
#dbDirectory: /Volumes/LaCie/workspace/nerd/0.0.3/nerd-data/db/db-en
dbDirectory: /Volumes/LaCie/workspace/nerd/0.0.4/nerd-data/db/db-en

# path to the compiled CSV wikipedia, infobox, wikidata files
# note that these files are only used when creating the LMDB data,
# if the LMDB data are already provided, these files will not be used
dataDirectory: /mnt/data/wikipedia/latest/en/
#dataDirectory: /Volumes/SEAGATE1TB/nerd/0.0.3/wikipedia/en/
dataDirectory: /Volumes/LaCie/workspace/nerd/0.0.3/wikipedia/en/
#dataDirectory: /home/lopez/resources/wikipedia/en/

# path to the stopwords file to consider for the target language
Expand Down
10 changes: 5 additions & 5 deletions data/config/wikipedia-es.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@
langCode: es

# path to the LMDB data
dbDirectory: data/db/db-es

# path to the embeddings
embeddingsDirectory: data/embeddings/es
#dbDirectory: /Volumes/SEAGATE1TB/nerd/0.0.3/nerd-data/db/db-es
#dbDirectory: /Volumes/LaCie/workspace/nerd/0.0.3/nerd-data/db/db-es
dbDirectory: /Volumes/LaCie/workspace/nerd/0.0.4/nerd-data/db/db-es

# path to the compiled CSV wikipedia, infobox, wikidata files
# note that these files are only used when creating the LMDB data,
# if the LMDB data are already provided, these files will not be used
dataDirectory: /mnt/data/wikipedia/latest/es/
#dataDirectory: /Volumes/SEAGATE1TB/nerd/0.0.3/wikipedia/es/
dataDirectory: /Volumes/LaCie/workspace/nerd/0.0.3/wikipedia/es/
#dataDirectory: /home/lopez/resources/wikipedia/es/

# path to the stopwords file to consider for the target language
Expand Down
10 changes: 5 additions & 5 deletions data/config/wikipedia-fr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@
langCode: fr

# path to the LMDB data
dbDirectory: data/db/db-fr

# path to the embeddings
embeddingsDirectory: data/embeddings/fr
#dbDirectory: /Volumes/SEAGATE1TB/nerd/0.0.3/nerd-data/db/db-fr
#dbDirectory: /Volumes/LaCie/workspace/nerd/0.0.3/nerd-data/db/db-fr
dbDirectory: /Volumes/LaCie/workspace/nerd/0.0.4/nerd-data/db/db-fr

# path to the compiled CSV wikipedia, infobox, wikidata files
# note that these files are only used when creating the LMDB data,
# if the LMDB data are already provided, these files will not be used
dataDirectory: /mnt/data/wikipedia/latest/fr/
#dataDirectory: /Volumes/SEAGATE1TB/nerd/0.0.3/wikipedia/fr/
dataDirectory: /Volumes/LaCie/workspace/nerd/0.0.3/wikipedia/fr/
#dataDirectory: /home/lopez/resources/wikipedia/fr/

# path to the stopwords file to consider for the target language
Expand Down
10 changes: 5 additions & 5 deletions data/config/wikipedia-it.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@
langCode: it

# path to the LMDB data
dbDirectory: data/db/db-it

# path to the embeddings
embeddingsDirectory: data/embeddings/it
#dbDirectory: /Volumes/SEAGATE1TB/nerd/0.0.3/nerd-data/db/db-it
#dbDirectory: /Volumes/LaCie/workspace/nerd/0.0.3/nerd-data/db/db-it
dbDirectory: /Volumes/LaCie/workspace/nerd/0.0.4/nerd-data/db/db-it

# path to the compiled CSV wikipedia, infobox, wikidata files
# note that these files are only used when creating the LMDB data,
# if the LMDB data are already provided, these files will not be used
dataDirectory: /mnt/data/wikipedia/latest/it/
#dataDirectory: /Volumes/SEAGATE1TB/nerd/0.0.3/wikipedia/it/
dataDirectory: /Volumes/LaCie/workspace/nerd/0.0.3/wikipedia/it/
#dataDirectory: /home/lopez/resources/wikipedia/it/

# path to the stopwords file to consider for the target language
Expand Down
14 changes: 13 additions & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -683,6 +683,12 @@
<artifactId>jersey-server</artifactId>
<version>1.8</version>
<scope>compile</scope>
<exclusions>
<exclusion>
<groupId>asm</groupId>
<artifactId>asm</artifactId>
</exclusion>
</exclusions>
</dependency>

<dependency>
Expand Down Expand Up @@ -854,7 +860,7 @@
</dependency>

<!-- LMDB -->
<dependency>
<!--<dependency>
<groupId>org.deephacks.lmdbjni</groupId>
<artifactId>lmdbjni</artifactId>
<version>${lmdbjni.version}</version>
Expand All @@ -873,6 +879,12 @@
<groupId>org.deephacks.lmdbjni</groupId>
<artifactId>lmdbjni-win64</artifactId>
<version>${lmdbjni.version}</version>
</dependency>-->

<dependency>
<groupId>org.lmdbjava</groupId>
<artifactId>lmdbjava</artifactId>
<version>0.6.0</version>
</dependency>

<!-- TBD: update to the latest version! -->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,6 @@
* A machine learning model for ranking a list of ambiguous candidates for a given mention.
*/
public class NerdRanker extends NerdModel {
/**
* The class Logger
*/
private static final Logger logger = LoggerFactory.getLogger(NerdRanker.class);

// ranker model files
Expand All @@ -74,7 +71,7 @@ public class NerdRanker extends NerdModel {
private LowerKnowledgeBase wikipedia = null;

static public int EMBEDDINGS_WINDOW_SIZE = 10; // size of word window to be considered when calculating
// embeddings-based similiarity
// embeddings-based similarity

public NerdRanker(LowerKnowledgeBase wikipedia) {
this.wikipedia = wikipedia;
Expand Down

0 comments on commit 482a825

Please sign in to comment.