Skip to content

Commit

Permalink
[#1090] PageRank algorithm bugfix (#1095)
Browse files Browse the repository at this point in the history
fixes #1090
  • Loading branch information
foerster-finsternis authored and Kevin Gómez committed Nov 26, 2018
1 parent 95f7c90 commit 21281bd
Show file tree
Hide file tree
Showing 3 changed files with 94 additions and 20 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -49,32 +49,55 @@ public class PageRank extends GradoopGellyAlgorithm<NullValue, NullValue> {
private final int iterations;

/**
* Constructor for Page Rank with fixed number of iterations.
* Whether to include "zero-degree" vertices in the PageRank computation and result. These
* vertices only affect the scores of other vertices indirectly through influencing the initial
* proportional score of {@code (1 - damping factor) / number of vertices}.
* If set to {@code false}, these vertices will NOT be part of the computation and the returned
* result graph.
*/
private final boolean includeZeroDegrees;

/**
* Constructor for Page Rank with fixed number of iterations and {@link #includeZeroDegrees}
* set to {@code false}.
*
* @param propertyKey Property key to store the rank in.
* @param propertyKey Property key to store the page rank in.
* @param dampingFactor Damping factor.
* @param iterations Number of iterations.
*/
public PageRank(String propertyKey, double dampingFactor, int iterations) {
this(propertyKey, dampingFactor, iterations, false);
}

/**
* Constructor for Page Rank with fixed number of iterations.
*
* @param propertyKey Property key to store the page rank in.
* @param dampingFactor Damping factor.
* @param iterations Number of iterations.
* @param includeZeroDegrees Whether to include "zero-degree" vertices in the PageRank
* computation and result.
*/
public PageRank(String propertyKey, double dampingFactor, int iterations,
boolean includeZeroDegrees) {
super(new VertexToGellyVertexWithNullValue(), new EdgeToGellyEdgeWithNullValue());
this.propertyKey = propertyKey;
this.dampingFactor = dampingFactor;
this.iterations = iterations;
this.includeZeroDegrees = includeZeroDegrees;
}

@Override
public LogicalGraph executeInGelly(Graph<GradoopId, NullValue, NullValue> graph)
throws Exception {
DataSet<Vertex> newVertices =
new org.apache.flink.graph.library.linkanalysis.PageRank<GradoopId, NullValue, NullValue>(
dampingFactor, iterations)
.run(graph)
dampingFactor, iterations).setIncludeZeroDegreeVertices(includeZeroDegrees).run(graph)
.join(currentGraph.getVertices())
.where(new PageRankResultKey())
.equalTo(new Id<>())
.where(new PageRankResultKey()).equalTo(new Id<>())
.with(new PageRankToAttribute(propertyKey));
return currentGraph.getConfig().getLogicalGraphFactory().fromDataSets(newVertices,
currentGraph.getEdges());
return currentGraph.getConfig().getLogicalGraphFactory().fromDataSets(
currentGraph.getGraphHead(), newVertices, currentGraph.getEdges());
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
* and a number of maximum iterations. It computes a per-vertex score which is the sum of the
* PageRank-scores transmitted over all in-edges. The score of each vertex is divided evenly
* among its out-edges.
* The PageRank-algorithm is called with {@code setIncludeZeroDegreeVertices(true)}.
*
* If vertices got different PageRank-scores, all scores are scaled in a range between 0 and 1.
* Then it retains all vertices with a PageRank-score greater or equal/smaller than a given
Expand Down Expand Up @@ -112,7 +113,7 @@ public PageRankSampling(double dampeningFactor, int maxIteration, double thresho
public LogicalGraph sample(LogicalGraph graph) {

LogicalGraph pageRankGraph = new PageRank(
PAGE_RANK_SCORE_PROPERTY_KEY, dampeningFactor, maxIteration).execute(graph);
PAGE_RANK_SCORE_PROPERTY_KEY, dampeningFactor, maxIteration, true).execute(graph);

graph = graph.getConfig().getLogicalGraphFactory().fromDataSets(
graph.getGraphHead(), pageRankGraph.getVertices(), pageRankGraph.getEdges());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
import org.gradoop.common.model.impl.pojo.Vertex;
import org.gradoop.flink.model.GradoopFlinkTestBase;
import org.gradoop.flink.model.impl.epgm.LogicalGraph;
import org.gradoop.flink.util.FlinkAsciiGraphLoader;
import org.junit.Before;
import org.junit.Test;

import java.util.List;
Expand All @@ -30,24 +32,72 @@
public class PageRankTest extends GradoopFlinkTestBase {

/**
* The property key used to store the page rank result.
* Property key to store the page rank in.
*/
public static final String PROPERTY_KEY = "pageRank";
private final String propertyKey = "pageRankScore";

/**
* Execute the {@link PageRank} operator and check if the property was set for all vertices.
* graph for testing
*/
private LogicalGraph testGraph;

/**
* Initialize the graph for testing
*/
@Before
public void prepareTestGraph() {
String graphString = "graph[" +
"(v0 {id:0, value:\"A\"})" +
"(v1 {id:1, value:\"B\"})" +
"(v2 {id:2, value:\"C\"})" +
"(v3 {id:3, value:\"D\"})" +
"(v0)-[e0]->(v1)" +
"(v1)-[e1]->(v0)" +
"(v0)-[e2]->(v2)" +
"(v2)-[e3]->(v0)" +
"(v1)-[e4]->(v2)" +
"(v2)-[e5]->(v1)" +
"]";
FlinkAsciiGraphLoader loader = getLoaderFromString(graphString);
testGraph = loader.getLogicalGraphByVariable("graph");
}

/**
* Check PageRank for excluded "zero-degree" vertices
*
* @throws Exception If the execution fails.
*/
@Test
public void testPageRankExecution() throws Exception {
LogicalGraph input = getSocialNetworkLoader().getLogicalGraphByVariable("g0");
long inputVertexCount = input.getVertices().count();
LogicalGraph result = new PageRank(PROPERTY_KEY, 0.3, 20).execute(input);
List<Vertex> resultVertices = result.getVertices().collect();
assertEquals(inputVertexCount, resultVertices.size());
for (Vertex vertex : resultVertices) {
assertTrue(vertex.hasProperty(PROPERTY_KEY));
public void testPageRankWithoutZeroDegrees() throws Exception {
LogicalGraph resultGraph = new PageRank(propertyKey, 0.3, 20)
.execute(testGraph);
checkPageRankProperty(resultGraph);
assertEquals(resultGraph.getVertices().count(), 3L);
}

/**
* Check PageRank for included "zero-degree" vertices
*
* @throws Exception If the execution fails.
*/
@Test
public void testPageRankWithZeroDegrees() throws Exception {
LogicalGraph resultGraph = new PageRank(propertyKey, 0.3, 20, true)
.execute(testGraph);
checkPageRankProperty(resultGraph);
assertEquals(resultGraph.getVertices().count(), testGraph.getVertices().count());
}

/**
* Checks if the PageRank property exists and its value was initialized
*
* @param graph The result graph
*/
private void checkPageRankProperty(LogicalGraph graph) throws Exception {
List<Vertex> vertices = graph.getVertices().collect();
for (Vertex vertex : vertices) {
assertTrue(vertex.hasProperty(propertyKey));
assertTrue(vertex.getPropertyValue(propertyKey).getDouble() > 0d);
}
}
}

0 comments on commit 21281bd

Please sign in to comment.