Merge pull request #33 from kbastani/1.1.0-RELEASE

Upgrading Neo4j and Spark
neo4j-contrib · Mar 26, 2015 · 2b39b6b · 2b39b6b
2 parents d5c535b + f71b090
commit 2b39b6b
Show file tree

Hide file tree

Showing 12 changed files with 56 additions and 58 deletions.
diff --git a/README.md b/README.md
@@ -23,33 +23,33 @@ The Neo4j Mazerunner service in this image is a [unmanaged extension](http://neo
 Installation requires 3 docker image deployments, each containing a separate linked component.
 
 * *Hadoop HDFS* (sequenceiq/hadoop-docker:2.4.1)
-* *Neo4j Graph Database* (kbastani/docker-neo4j:latest)
-* *Apache Spark Service* (kbastani/neo4j-graph-analytics:latest)
+* *Neo4j Graph Database* (kbastani/docker-neo4j:2.2.0)
+* *Apache Spark Service* (kbastani/neo4j-graph-analytics:1.1.0)
 
 Pull the following docker images:
 
     docker pull sequenceiq/hadoop-docker:2.4.1
-    docker pull kbastani/docker-neo4j:latest
-    docker pull kbastani/neo4j-graph-analytics:latest
+    docker pull kbastani/docker-neo4j:2.2.0
+    docker pull kbastani/neo4j-graph-analytics:1.1.0
 
 After each image has been downloaded to your Docker server, run the following commands in order to create the linked containers.
 
     # Create HDFS
     docker run -i -t --name hdfs sequenceiq/hadoop-docker:2.4.1 /etc/bootstrap.sh -bash
-    
+
     # Create Mazerunner Apache Spark Service
-    docker run -i -t --name mazerunner --link hdfs:hdfs kbastani/neo4j-graph-analytics
-    
+    docker run -i -t --name mazerunner --link hdfs:hdfs kbastani/neo4j-graph-analytics:1.1.0
+
     # Create Neo4j database with links to HDFS and Mazerunner
     # Replace <user> and <neo4j-path>
     # with the location to your existing Neo4j database store directory
-    docker run -d -P -v /Users/<user>/<neo4j-path>/data:/opt/data --name graphdb --link mazerunner:mazerunner --link hdfs:hdfs kbastani/docker-neo4j
+    docker run -d -P -v /Users/<user>/<neo4j-path>/data:/opt/data --name graphdb --link mazerunner:mazerunner --link hdfs:hdfs kbastani/docker-neo4j:2.2.0
 
 ### Use Existing Neo4j Database
 
 To use an existing Neo4j database, make sure that the database store directory, typically `data/graph.db`, is available on your host OS. Read the [setup guide](https://github.com/kbastani/docker-neo4j#start-neo4j-container) for *kbastani/docker-neo4j* for additional details.
 
-> Note: The kbastani/docker-neo4j image is running Neo4j 2.1.7. If you point it to an older database store, that database may become unable to be attached to a previous version of Neo4j. Make sure you back up your store files before proceeding.
+> Note: The kbastani/docker-neo4j:2.2.0 image is running Neo4j 2.2.0. If you point it to an older database store, that database may become unable to be attached to a previous version of Neo4j. Make sure you back up your store files before proceeding.
 
 ### Use New Neo4j Database
 
@@ -89,9 +89,9 @@ The result of the analysis will set the property with `{analysis}` as the key on
 To begin graph analysis jobs on a particular metric, HTTP GET request on the following Neo4j server endpoints:
 
 ### PageRank
-    
+
     http://172.17.0.21:7474/service/mazerunner/analysis/pagerank/FOLLOWS
-    
+
 * Gets all nodes connected by the `FOLLOWS` relationship and updates each node with the property key `pagerank`.
 
 * The value of the `pagerank` property is a float data type, ex. `pagerank: 3.14159265359`.
@@ -101,15 +101,15 @@ To begin graph analysis jobs on a particular metric, HTTP GET request on the fol
 ### Closeness Centrality (New)
 
     http://172.17.0.21:7474/service/mazerunner/analysis/closeness_centrality/FOLLOWS
-    
+
 * Gets all nodes connected by the `FOLLOWS` relationship and updates each node with the property key `closeness_centrality`.
 
 * The value of the `closeness_centrality` property is a float data type, ex. `pagerank: 0.1337`.
 
 * A key node centrality measure in networks is closeness centrality (Freeman, 1978; Opsahl et al., 2010; Wasserman and Faust, 1994). It is defined as the inverse of farness, which in turn, is the sum of distances to all other nodes.
 
 ### Triangle Counting
-    
+
     http://172.17.0.21:7474/service/mazerunner/analysis/triangle_count/FOLLOWS
 
 * Gets all nodes connected by the `FOLLOWS` relationship and updates each node with the property key `triangle_count`.
@@ -123,7 +123,7 @@ To begin graph analysis jobs on a particular metric, HTTP GET request on the fol
 ### Connected Components
 
     http://172.17.0.21:7474/service/mazerunner/analysis/connected_components/FOLLOWS
-    
+
 * Gets all nodes connected by the `FOLLOWS` relationship and updates each node with the property key `connected_components`.
 
 * The value of `connected_components` property is an integer data type, ex. `connected_components: 181`.
@@ -132,10 +132,10 @@ To begin graph analysis jobs on a particular metric, HTTP GET request on the fol
 
 * Connected components are used to find isolated clusters, that is, a group of nodes that can reach every other node in the group through a *bidirectional* traversal.
 
-### Strongly Connected Components 
-    
+### Strongly Connected Components
+
     http://172.17.0.21:7474/service/mazerunner/analysis/strongly_connected_components/FOLLOWS
-    
+
 * Gets all nodes connected by the `FOLLOWS` relationship and updates each node with the property key `strongly_connected_components`.
 
 * The value of `strongly_connected_components` property is an integer data type, ex. `strongly_connected_components: 26`.

diff --git a/docker/bin/lib/spark-1.0-driver.jar → ...er/bin/lib/spark-1.1.0-RELEASE-driver.jar b/docker/bin/lib/spark-1.0-driver.jar → ...er/bin/lib/spark-1.1.0-RELEASE-driver.jar
diff --git a/src/extension/pom.xml b/src/extension/pom.xml
@@ -6,10 +6,10 @@
 
     <groupId>org.mazerunner</groupId>
     <artifactId>extension</artifactId>
-    <version>1.0</version>
+    <version>1.1.0-RELEASE</version>
 
     <properties>
-        <neo4j.version>2.1.7</neo4j.version>
+        <neo4j.version>2.2.0</neo4j.version>
         <joda.version>2.3</joda.version>
         <guava.version>17.0</guava.version>
         <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
@@ -30,14 +30,16 @@
         <dependency>
             <groupId>org.neo4j</groupId>
             <artifactId>neo4j-kernel</artifactId>
+            <version>2.2.0</version>
             <type>test-jar</type>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.neo4j</groupId>
+            <artifactId>neo4j-io</artifactId>
             <version>${neo4j.version}</version>
-            <exclusions>
-                <exclusion>
-                    <groupId>ch.qos.logback</groupId>
-                    <artifactId>logback-classic</artifactId>
-                </exclusion>
-            </exclusions>
+            <type>test-jar</type>
+            <scope>test</scope>
         </dependency>
         <dependency>
             <groupId>org.neo4j</groupId>

diff --git a/src/extension/src/main/java/config/ConfigurationLoader.java b/src/extension/src/main/java/config/ConfigurationLoader.java
@@ -85,7 +85,7 @@ public void initializeTest()
     {
         hadoopSitePath = "/etc/hadoop/core-site.xml";
         hadoopHdfsPath = "/etc/hadoop/hdfs-site.xml";
-        hadoopHdfsUri = "hdfs://0.0.0.0:8020";
+        hadoopHdfsUri = "hdfs://0.0.0.0:9000";
         mazerunnerRelationshipType = "CONNECTED_TO";
         rabbitmqNodename = "localhost";
     }

diff --git a/src/extension/src/main/java/translation/ParallelBatchTransaction.java b/src/extension/src/main/java/translation/ParallelBatchTransaction.java
@@ -5,7 +5,6 @@
 import org.neo4j.graphdb.GraphDatabaseService;
 import org.neo4j.graphdb.Node;
 import org.neo4j.graphdb.Transaction;
-import org.neo4j.kernel.GraphDatabaseAPI;
 
 import java.io.IOException;
 import java.util.Spliterator;
@@ -87,7 +86,7 @@ protected void compute() {
      */
     protected void computeDirectly() throws IOException {
 
-        Transaction tx = ((GraphDatabaseAPI)db).tx().unforced().begin();
+        Transaction tx = db.beginTx();
 
         Node partitionNode = null;
 

diff --git a/src/extension/src/main/java/translation/Writer.java b/src/extension/src/main/java/translation/Writer.java
@@ -12,7 +12,6 @@
 import org.neo4j.graphdb.*;
 import org.neo4j.graphdb.traversal.Evaluators;
 import org.neo4j.helpers.collection.IteratorUtil;
-import org.neo4j.kernel.GraphDatabaseAPI;
 import org.neo4j.tooling.GlobalGraphOperations;
 
 import java.io.BufferedReader;
@@ -197,9 +196,9 @@ public static Path exportSubgraphToHDFSParallel(GraphDatabaseService db) throws
     }
 
     public static void writeBlockForNode(Node n, GraphDatabaseService db, BufferedWriter bufferedWriter, int reportBlockSize, String relationshipType) throws IOException {
-        Transaction tx = ((GraphDatabaseAPI)db).tx().unforced().begin();
+        Transaction tx = db.beginTx();
         Iterator<Relationship> rels = n.getRelationships(withName(relationshipType), Direction.OUTGOING).iterator();
-//        Stream<Relationship> relStream = StreamSupport.stream(Spliterators.spliteratorUnknownSize(rels.iterator(), Spliterator.NONNULL), true);
+
         while(rels.hasNext()) {
             try {
                 Relationship rel = rels.next();

diff --git a/src/extension/src/test/java/translation/WriterTest.java b/src/extension/src/test/java/translation/WriterTest.java
@@ -16,7 +16,6 @@
 import org.neo4j.graphdb.GraphDatabaseService;
 import org.neo4j.graphdb.Node;
 import org.neo4j.graphdb.Transaction;
-import org.neo4j.kernel.GraphDatabaseAPI;
 import org.neo4j.test.TestGraphDatabaseFactory;
 
 import java.io.*;
@@ -109,7 +108,7 @@ public void testParallelUpdate() throws Exception {
 
         GraphDatabaseService db = setUpDb();
 
-        Transaction tx = ((GraphDatabaseAPI)db).tx().unforced().begin();
+        Transaction tx = db.beginTx();
 
 
         // Use test configurations
@@ -173,7 +172,7 @@ public static void writeListFile(String path, String nodeList) throws IOExceptio
 
     private static GraphDatabaseService setUpDb()
     {
-        return new TestGraphDatabaseFactory().newImpermanentDatabaseBuilder().newGraphDatabase();
+        return new TestGraphDatabaseFactory().newImpermanentDatabase();
     }
 
     public void testSendProcessorMessage() throws Exception {

diff --git a/src/spark/pom.xml b/src/spark/pom.xml
@@ -4,17 +4,9 @@
          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
     <modelVersion>4.0.0</modelVersion>
 
-    <repositories>
-        <repository>
-            <id>orgapachespark-1043</id>
-            <name>Apache Spark 1.1.1</name>
-            <url>https://repository.apache.org/content/repositories/orgapachespark-1043/</url>
-        </repository>
-    </repositories>
-
     <groupId>org.mazerunner</groupId>
     <artifactId>spark</artifactId>
-    <version>1.0</version>
+    <version>1.1.0-RELEASE</version>
 
     <properties>
         <jetty.version>7.6.9.v20130131</jetty.version>
@@ -83,7 +75,7 @@
         <dependency>
             <groupId>org.apache.spark</groupId>
             <artifactId>spark-core_2.10</artifactId>
-            <version>1.1.0</version>
+            <version>1.3.0</version>
             <exclusions>
                 <exclusion>
                     <groupId>ch.qos.logback</groupId>
@@ -94,7 +86,7 @@
         <dependency>
             <groupId>org.apache.spark</groupId>
             <artifactId>spark-graphx_2.10</artifactId>
-            <version>1.1.0</version>
+            <version>1.3.0</version>
             <exclusions>
                 <exclusion>
                     <groupId>ch.qos.logback</groupId>

diff --git a/src/spark/src/main/java/org/mazerunner/core/config/ConfigurationLoader.java b/src/spark/src/main/java/org/mazerunner/core/config/ConfigurationLoader.java
@@ -110,9 +110,9 @@ public void initializeTest()
     {
         hadoopSitePath = "/Users/kennybastani/hadoop-1.0.4/conf/core-site.xml";
         hadoopHdfsPath = "/Users/kennybastani/hadoop-1.0.4/conf/hdfs-site.xml";
-        hadoopHdfsUri = "hdfs://ip-172-31-5-251.us-west-1.compute.internal:9000";
+        hadoopHdfsUri = "hdfs://0.0.0.0:9000";
         mazerunnerRelationshipType = "CONNECTED_TO";
-        rabbitmqNodename = "ec2-54-183-26-46.us-west-1.compute.amazonaws.com";
+        rabbitmqNodename = "localhost";
         sparkHost = "local";
         appName = "mazerunner";
         executorMemory = "13g";

diff --git a/src/spark/src/main/java/org/mazerunner/core/messaging/Worker.java b/src/spark/src/main/java/org/mazerunner/core/messaging/Worker.java
@@ -46,7 +46,7 @@ public class Worker {
     private String sparkMaster = "local";
 
     @Option(name="--hadoop.hdfs",usage="The HDFS URL (e.g. hdfs://0.0.0.0:9000).", metaVar = "<url>")
-    private String hadoopHdfs = "hdfs://10.0.0.4:8020";
+    private String hadoopHdfs = "hdfs://0.0.0.0:9000";
 
     @Option(name="--spark.driver.host",usage="The host name of the Spark driver (eg. ec2-54-67-91-4.us-west-1.compute.amazonaws.com)", metaVar = "<url>")
     private String driverHost = "mazerunner";

diff --git a/src/spark/src/test/java/org/mazerunner/core/processor/GraphProcessorTest.java b/src/spark/src/test/java/org/mazerunner/core/processor/GraphProcessorTest.java
@@ -2,6 +2,8 @@
 
 import junit.framework.TestCase;
 import org.apache.spark.api.java.JavaSparkContext;
+import org.junit.After;
+import org.junit.Before;
 import org.junit.Test;
 import org.mazerunner.core.algorithms;
 import org.mazerunner.core.config.ConfigurationLoader;
@@ -14,6 +16,18 @@
 import java.util.List;
 
 public class GraphProcessorTest extends TestCase {
+    JavaSparkContext javaSparkContext;
+
+    @Before
+    public void setUp() {
+        ConfigurationLoader.testPropertyAccess = true;
+        javaSparkContext = GraphProcessor.initializeSparkContext();
+    }
+
+    @After
+    public void tearDown() {
+        GraphProcessor.javaSparkContext.close();
+    }
 
     @Test
     public void testProcessEdgeList() throws Exception {
@@ -30,7 +44,7 @@ public void testProcessEdgeList() throws Exception {
                 "3 0"
         )).iterator());
 
-        GraphProcessor.processEdgeList(new ProcessorMessage(path, GraphProcessor.TRIANGLE_COUNT, ProcessorMode.Partitioned));
+        GraphProcessor.processEdgeList(new ProcessorMessage(path, GraphProcessor.TRIANGLE_COUNT, ProcessorMode.Unpartitioned));
     }
 
     @Test
@@ -58,8 +72,8 @@ public void testShortestPaths() throws Exception {
 
         // Test writing the PageRank result to HDFS path
         FileUtil.writeListFile(path, nodeList.iterator());
-        JavaSparkContext javaSparkContext = GraphProcessor.initializeSparkContext();
-        Iterable<String> results = algorithms.closenessCentrality(javaSparkContext.sc(), path);
+
+        Iterable<String> results = algorithms.closenessCentrality(GraphProcessor.javaSparkContext.sc(), path);
         results.iterator().forEachRemaining(System.out::print);
     }
 }
diff --git a/src/spark/src/test/scala/hdfs/FileUtilTest.java b/src/spark/src/test/scala/hdfs/FileUtilTest.java