diff --git a/README.md b/README.md index 2210a3e..00530b1 100644 --- a/README.md +++ b/README.md @@ -23,33 +23,33 @@ The Neo4j Mazerunner service in this image is a [unmanaged extension](http://neo Installation requires 3 docker image deployments, each containing a separate linked component. * *Hadoop HDFS* (sequenceiq/hadoop-docker:2.4.1) -* *Neo4j Graph Database* (kbastani/docker-neo4j:latest) -* *Apache Spark Service* (kbastani/neo4j-graph-analytics:latest) +* *Neo4j Graph Database* (kbastani/docker-neo4j:2.2.0) +* *Apache Spark Service* (kbastani/neo4j-graph-analytics:1.1.0) Pull the following docker images: docker pull sequenceiq/hadoop-docker:2.4.1 - docker pull kbastani/docker-neo4j:latest - docker pull kbastani/neo4j-graph-analytics:latest + docker pull kbastani/docker-neo4j:2.2.0 + docker pull kbastani/neo4j-graph-analytics:1.1.0 After each image has been downloaded to your Docker server, run the following commands in order to create the linked containers. # Create HDFS docker run -i -t --name hdfs sequenceiq/hadoop-docker:2.4.1 /etc/bootstrap.sh -bash - + # Create Mazerunner Apache Spark Service - docker run -i -t --name mazerunner --link hdfs:hdfs kbastani/neo4j-graph-analytics - + docker run -i -t --name mazerunner --link hdfs:hdfs kbastani/neo4j-graph-analytics:1.1.0 + # Create Neo4j database with links to HDFS and Mazerunner # Replace and # with the location to your existing Neo4j database store directory - docker run -d -P -v /Users///data:/opt/data --name graphdb --link mazerunner:mazerunner --link hdfs:hdfs kbastani/docker-neo4j + docker run -d -P -v /Users///data:/opt/data --name graphdb --link mazerunner:mazerunner --link hdfs:hdfs kbastani/docker-neo4j:2.2.0 ### Use Existing Neo4j Database To use an existing Neo4j database, make sure that the database store directory, typically `data/graph.db`, is available on your host OS. Read the [setup guide](https://github.com/kbastani/docker-neo4j#start-neo4j-container) for *kbastani/docker-neo4j* for additional details. -> Note: The kbastani/docker-neo4j image is running Neo4j 2.1.7. If you point it to an older database store, that database may become unable to be attached to a previous version of Neo4j. Make sure you back up your store files before proceeding. +> Note: The kbastani/docker-neo4j:2.2.0 image is running Neo4j 2.2.0. If you point it to an older database store, that database may become unable to be attached to a previous version of Neo4j. Make sure you back up your store files before proceeding. ### Use New Neo4j Database @@ -89,9 +89,9 @@ The result of the analysis will set the property with `{analysis}` as the key on To begin graph analysis jobs on a particular metric, HTTP GET request on the following Neo4j server endpoints: ### PageRank - + http://172.17.0.21:7474/service/mazerunner/analysis/pagerank/FOLLOWS - + * Gets all nodes connected by the `FOLLOWS` relationship and updates each node with the property key `pagerank`. * The value of the `pagerank` property is a float data type, ex. `pagerank: 3.14159265359`. @@ -101,7 +101,7 @@ To begin graph analysis jobs on a particular metric, HTTP GET request on the fol ### Closeness Centrality (New) http://172.17.0.21:7474/service/mazerunner/analysis/closeness_centrality/FOLLOWS - + * Gets all nodes connected by the `FOLLOWS` relationship and updates each node with the property key `closeness_centrality`. * The value of the `closeness_centrality` property is a float data type, ex. `pagerank: 0.1337`. @@ -109,7 +109,7 @@ To begin graph analysis jobs on a particular metric, HTTP GET request on the fol * A key node centrality measure in networks is closeness centrality (Freeman, 1978; Opsahl et al., 2010; Wasserman and Faust, 1994). It is defined as the inverse of farness, which in turn, is the sum of distances to all other nodes. ### Triangle Counting - + http://172.17.0.21:7474/service/mazerunner/analysis/triangle_count/FOLLOWS * Gets all nodes connected by the `FOLLOWS` relationship and updates each node with the property key `triangle_count`. @@ -123,7 +123,7 @@ To begin graph analysis jobs on a particular metric, HTTP GET request on the fol ### Connected Components http://172.17.0.21:7474/service/mazerunner/analysis/connected_components/FOLLOWS - + * Gets all nodes connected by the `FOLLOWS` relationship and updates each node with the property key `connected_components`. * The value of `connected_components` property is an integer data type, ex. `connected_components: 181`. @@ -132,10 +132,10 @@ To begin graph analysis jobs on a particular metric, HTTP GET request on the fol * Connected components are used to find isolated clusters, that is, a group of nodes that can reach every other node in the group through a *bidirectional* traversal. -### Strongly Connected Components - +### Strongly Connected Components + http://172.17.0.21:7474/service/mazerunner/analysis/strongly_connected_components/FOLLOWS - + * Gets all nodes connected by the `FOLLOWS` relationship and updates each node with the property key `strongly_connected_components`. * The value of `strongly_connected_components` property is an integer data type, ex. `strongly_connected_components: 26`. diff --git a/docker/bin/lib/spark-1.0-driver.jar b/docker/bin/lib/spark-1.1.0-RELEASE-driver.jar similarity index 88% rename from docker/bin/lib/spark-1.0-driver.jar rename to docker/bin/lib/spark-1.1.0-RELEASE-driver.jar index e05e978..ed1c789 100644 Binary files a/docker/bin/lib/spark-1.0-driver.jar and b/docker/bin/lib/spark-1.1.0-RELEASE-driver.jar differ diff --git a/src/extension/pom.xml b/src/extension/pom.xml index 0b0aa9c..9a9f500 100644 --- a/src/extension/pom.xml +++ b/src/extension/pom.xml @@ -6,10 +6,10 @@ org.mazerunner extension - 1.0 + 1.1.0-RELEASE - 2.1.7 + 2.2.0 2.3 17.0 UTF-8 @@ -30,14 +30,16 @@ org.neo4j neo4j-kernel + 2.2.0 test-jar + test + + + org.neo4j + neo4j-io ${neo4j.version} - - - ch.qos.logback - logback-classic - - + test-jar + test org.neo4j diff --git a/src/extension/src/main/java/config/ConfigurationLoader.java b/src/extension/src/main/java/config/ConfigurationLoader.java index 085f33f..c9e0157 100644 --- a/src/extension/src/main/java/config/ConfigurationLoader.java +++ b/src/extension/src/main/java/config/ConfigurationLoader.java @@ -85,7 +85,7 @@ public void initializeTest() { hadoopSitePath = "/etc/hadoop/core-site.xml"; hadoopHdfsPath = "/etc/hadoop/hdfs-site.xml"; - hadoopHdfsUri = "hdfs://0.0.0.0:8020"; + hadoopHdfsUri = "hdfs://0.0.0.0:9000"; mazerunnerRelationshipType = "CONNECTED_TO"; rabbitmqNodename = "localhost"; } diff --git a/src/extension/src/main/java/translation/ParallelBatchTransaction.java b/src/extension/src/main/java/translation/ParallelBatchTransaction.java index 91ef102..1a47077 100644 --- a/src/extension/src/main/java/translation/ParallelBatchTransaction.java +++ b/src/extension/src/main/java/translation/ParallelBatchTransaction.java @@ -5,7 +5,6 @@ import org.neo4j.graphdb.GraphDatabaseService; import org.neo4j.graphdb.Node; import org.neo4j.graphdb.Transaction; -import org.neo4j.kernel.GraphDatabaseAPI; import java.io.IOException; import java.util.Spliterator; @@ -87,7 +86,7 @@ protected void compute() { */ protected void computeDirectly() throws IOException { - Transaction tx = ((GraphDatabaseAPI)db).tx().unforced().begin(); + Transaction tx = db.beginTx(); Node partitionNode = null; diff --git a/src/extension/src/main/java/translation/Writer.java b/src/extension/src/main/java/translation/Writer.java index b03fa44..0a3f2e0 100644 --- a/src/extension/src/main/java/translation/Writer.java +++ b/src/extension/src/main/java/translation/Writer.java @@ -12,7 +12,6 @@ import org.neo4j.graphdb.*; import org.neo4j.graphdb.traversal.Evaluators; import org.neo4j.helpers.collection.IteratorUtil; -import org.neo4j.kernel.GraphDatabaseAPI; import org.neo4j.tooling.GlobalGraphOperations; import java.io.BufferedReader; @@ -197,9 +196,9 @@ public static Path exportSubgraphToHDFSParallel(GraphDatabaseService db) throws } public static void writeBlockForNode(Node n, GraphDatabaseService db, BufferedWriter bufferedWriter, int reportBlockSize, String relationshipType) throws IOException { - Transaction tx = ((GraphDatabaseAPI)db).tx().unforced().begin(); + Transaction tx = db.beginTx(); Iterator rels = n.getRelationships(withName(relationshipType), Direction.OUTGOING).iterator(); -// Stream relStream = StreamSupport.stream(Spliterators.spliteratorUnknownSize(rels.iterator(), Spliterator.NONNULL), true); + while(rels.hasNext()) { try { Relationship rel = rels.next(); diff --git a/src/extension/src/test/java/translation/WriterTest.java b/src/extension/src/test/java/translation/WriterTest.java index e038f15..28d0467 100644 --- a/src/extension/src/test/java/translation/WriterTest.java +++ b/src/extension/src/test/java/translation/WriterTest.java @@ -16,7 +16,6 @@ import org.neo4j.graphdb.GraphDatabaseService; import org.neo4j.graphdb.Node; import org.neo4j.graphdb.Transaction; -import org.neo4j.kernel.GraphDatabaseAPI; import org.neo4j.test.TestGraphDatabaseFactory; import java.io.*; @@ -109,7 +108,7 @@ public void testParallelUpdate() throws Exception { GraphDatabaseService db = setUpDb(); - Transaction tx = ((GraphDatabaseAPI)db).tx().unforced().begin(); + Transaction tx = db.beginTx(); // Use test configurations @@ -173,7 +172,7 @@ public static void writeListFile(String path, String nodeList) throws IOExceptio private static GraphDatabaseService setUpDb() { - return new TestGraphDatabaseFactory().newImpermanentDatabaseBuilder().newGraphDatabase(); + return new TestGraphDatabaseFactory().newImpermanentDatabase(); } public void testSendProcessorMessage() throws Exception { diff --git a/src/spark/pom.xml b/src/spark/pom.xml index 2f7b9e9..0d2c8dd 100644 --- a/src/spark/pom.xml +++ b/src/spark/pom.xml @@ -4,17 +4,9 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> 4.0.0 - - - orgapachespark-1043 - Apache Spark 1.1.1 - https://repository.apache.org/content/repositories/orgapachespark-1043/ - - - org.mazerunner spark - 1.0 + 1.1.0-RELEASE 7.6.9.v20130131 @@ -83,7 +75,7 @@ org.apache.spark spark-core_2.10 - 1.1.0 + 1.3.0 ch.qos.logback @@ -94,7 +86,7 @@ org.apache.spark spark-graphx_2.10 - 1.1.0 + 1.3.0 ch.qos.logback diff --git a/src/spark/src/main/java/org/mazerunner/core/config/ConfigurationLoader.java b/src/spark/src/main/java/org/mazerunner/core/config/ConfigurationLoader.java index 14ec6ad..14904c2 100644 --- a/src/spark/src/main/java/org/mazerunner/core/config/ConfigurationLoader.java +++ b/src/spark/src/main/java/org/mazerunner/core/config/ConfigurationLoader.java @@ -110,9 +110,9 @@ public void initializeTest() { hadoopSitePath = "/Users/kennybastani/hadoop-1.0.4/conf/core-site.xml"; hadoopHdfsPath = "/Users/kennybastani/hadoop-1.0.4/conf/hdfs-site.xml"; - hadoopHdfsUri = "hdfs://ip-172-31-5-251.us-west-1.compute.internal:9000"; + hadoopHdfsUri = "hdfs://0.0.0.0:9000"; mazerunnerRelationshipType = "CONNECTED_TO"; - rabbitmqNodename = "ec2-54-183-26-46.us-west-1.compute.amazonaws.com"; + rabbitmqNodename = "localhost"; sparkHost = "local"; appName = "mazerunner"; executorMemory = "13g"; diff --git a/src/spark/src/main/java/org/mazerunner/core/messaging/Worker.java b/src/spark/src/main/java/org/mazerunner/core/messaging/Worker.java index 3d4b793..7a7aa78 100644 --- a/src/spark/src/main/java/org/mazerunner/core/messaging/Worker.java +++ b/src/spark/src/main/java/org/mazerunner/core/messaging/Worker.java @@ -46,7 +46,7 @@ public class Worker { private String sparkMaster = "local"; @Option(name="--hadoop.hdfs",usage="The HDFS URL (e.g. hdfs://0.0.0.0:9000).", metaVar = "") - private String hadoopHdfs = "hdfs://10.0.0.4:8020"; + private String hadoopHdfs = "hdfs://0.0.0.0:9000"; @Option(name="--spark.driver.host",usage="The host name of the Spark driver (eg. ec2-54-67-91-4.us-west-1.compute.amazonaws.com)", metaVar = "") private String driverHost = "mazerunner"; diff --git a/src/spark/src/test/java/org/mazerunner/core/processor/GraphProcessorTest.java b/src/spark/src/test/java/org/mazerunner/core/processor/GraphProcessorTest.java index 952b0c0..1ccb2b8 100644 --- a/src/spark/src/test/java/org/mazerunner/core/processor/GraphProcessorTest.java +++ b/src/spark/src/test/java/org/mazerunner/core/processor/GraphProcessorTest.java @@ -2,6 +2,8 @@ import junit.framework.TestCase; import org.apache.spark.api.java.JavaSparkContext; +import org.junit.After; +import org.junit.Before; import org.junit.Test; import org.mazerunner.core.algorithms; import org.mazerunner.core.config.ConfigurationLoader; @@ -14,6 +16,18 @@ import java.util.List; public class GraphProcessorTest extends TestCase { + JavaSparkContext javaSparkContext; + + @Before + public void setUp() { + ConfigurationLoader.testPropertyAccess = true; + javaSparkContext = GraphProcessor.initializeSparkContext(); + } + + @After + public void tearDown() { + GraphProcessor.javaSparkContext.close(); + } @Test public void testProcessEdgeList() throws Exception { @@ -30,7 +44,7 @@ public void testProcessEdgeList() throws Exception { "3 0" )).iterator()); - GraphProcessor.processEdgeList(new ProcessorMessage(path, GraphProcessor.TRIANGLE_COUNT, ProcessorMode.Partitioned)); + GraphProcessor.processEdgeList(new ProcessorMessage(path, GraphProcessor.TRIANGLE_COUNT, ProcessorMode.Unpartitioned)); } @Test @@ -58,8 +72,8 @@ public void testShortestPaths() throws Exception { // Test writing the PageRank result to HDFS path FileUtil.writeListFile(path, nodeList.iterator()); - JavaSparkContext javaSparkContext = GraphProcessor.initializeSparkContext(); - Iterable results = algorithms.closenessCentrality(javaSparkContext.sc(), path); + + Iterable results = algorithms.closenessCentrality(GraphProcessor.javaSparkContext.sc(), path); results.iterator().forEachRemaining(System.out::print); } } \ No newline at end of file diff --git a/src/spark/src/test/scala/hdfs/FileUtilTest.java b/src/spark/src/test/scala/hdfs/FileUtilTest.java deleted file mode 100644 index d5f5484..0000000 --- a/src/spark/src/test/scala/hdfs/FileUtilTest.java +++ /dev/null @@ -1,7 +0,0 @@ -package hdfs; - -import junit.framework.TestCase; - -public class FileUtilTest extends TestCase { - -} \ No newline at end of file