Skip to content

Commit

Permalink
[8cS8e3IT] Optimise apoc.meta.* procedures (#589)
Browse files Browse the repository at this point in the history
  • Loading branch information
loveleif authored Feb 21, 2024
1 parent d2f871b commit fe271e2
Show file tree
Hide file tree
Showing 13 changed files with 509 additions and 153 deletions.
36 changes: 36 additions & 0 deletions benchmark/build.gradle
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
plugins {
id 'java'
id "me.champeau.jmh" version "0.7.2"
}

repositories {
mavenCentral()
}

dependencies {
jmh project(':common')
jmh project(':core')
jmh "com.neo4j:neo4j-enterprise:$neo4jVersionEffective"
jmh "org.neo4j:neo4j:$neo4jVersionEffective"
}

jmh {
iterations = 5 // Number of measurement iterations to do.
benchmarkMode = ['thrpt'] // Benchmark mode. Available modes are: [Throughput/thrpt, AverageTime/avgt, SampleTime/sample, SingleShotTime/ss, All/all]
fork = 1 // How many times to forks a single benchmark. Use 0 to disable forking altogether
failOnError = false // Should JMH fail immediately if any benchmark had experienced the unrecoverable error?
forceGC = false // Should JMH force GC between iterations?
humanOutputFile = project.file("${project.buildDir}/reports/jmh/human.txt") // human-readable output file
resultsFile = project.file("${project.buildDir}/reports/jmh/results.txt") // results file
timeOnIteration = '5s' // Time to spend at each measurement iteration.
resultFormat = 'CSV' // Result format type (one of CSV, JSON, NONE, SCSV, TEXT)
synchronizeIterations = false // Synchronize iterations?
threads = 1 // Number of worker threads to run with.
jmhTimeout = '30s' // Timeout for benchmark iteration.
verbosity = 'NORMAL' // Verbosity mode. Available modes are: [SILENT, NORMAL, EXTRA]
warmup = '5s' // Time to spend at each warmup iteration.
warmupBatchSize = 1 // Warmup batch size: number of benchmark method calls per operation.
warmupForks = 1 // How many warmup forks to make for a single benchmark. 0 to disable warmup forks.
warmupIterations = 5 // Number of warmup iterations to do.
zip64 = true // Use ZIP64 format for bigger archives
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
package org.neo4j.apoc.benchmark;

import apoc.meta.Meta;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Level;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.TearDown;
import org.openjdk.jmh.runner.Runner;
import org.openjdk.jmh.runner.RunnerException;
import org.openjdk.jmh.runner.options.OptionsBuilder;
import org.openjdk.jmh.runner.options.TimeValue;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Map;
import java.util.Random;
import java.util.stream.IntStream;

import org.neo4j.graphdb.GraphDatabaseService;
import org.neo4j.graphdb.Label;
import org.neo4j.graphdb.Node;
import org.neo4j.graphdb.RelationshipType;

/**
* Benchmarks apoc.meta.stats.
* Note, this was thrown together very quickly to have some form of ground for optimisations done for a support card.
* Probably all kinds of problems here.
*/
@State(Scope.Thread)
public class ApocMetaStatsBenchmark {
private EmbeddedNeo4j embeddedNeo4j;
private GraphDatabaseService db;

@Benchmark
@BenchmarkMode(Mode.Throughput)
public Object benchmarkMetaStats() {
return db.executeTransactionally("CALL apoc.meta.stats()", Map.of(), r -> r.stream().toList());
}

@Setup(Level.Trial)
public void setup() throws IOException {
System.out.println("Starting...");
final var embeddedNeo4j = EmbeddedNeo4j.start();
System.out.println("Started in " + embeddedNeo4j.directory);
embeddedNeo4j.registerProcedure(Meta.class);
this.db = embeddedNeo4j.db;
this.embeddedNeo4j = embeddedNeo4j;
System.out.println("Creating data...");
createData();
}

private void createData() {
final int labelCount = 30;
final int totNodeCount = 10000;
final int relTypeCount = 30;
final int totRelCount = 10000;
final var rand = new Random(23);
final var labels = IntStream.range(0, labelCount).mapToObj(i -> Label.label("Label" + i)).toList();
final var types = IntStream.range(0, relTypeCount).mapToObj(i -> RelationshipType.withName("Type" + i)).toList();

try (final var tx = db.beginTx()) {
final var nodes = new ArrayList<Node>();
for (int i = 0; i < totNodeCount; ++i) {
final var ls = IntStream.range(0, rand.nextInt(10)).mapToObj(x -> labels.get(rand.nextInt(labels.size()))).toArray(Label[]::new);
nodes.add(tx.createNode(ls));
}
System.out.println("Created nodes " + totNodeCount);
int relCount = 0;
while (relCount < totRelCount) {
final var a = nodes.get(rand.nextInt(nodes.size()));
final var b = nodes.get(rand.nextInt(nodes.size()));
final var t = types.get(rand.nextInt(types.size()));
if (a.getRelationships().stream().noneMatch(r -> r.isType(t) && r.getEndNode().equals(b))) {
a.createRelationshipTo(b, t);
++relCount;
}
}
System.out.println("Created relationships " + totRelCount);
tx.commit();
}
System.out.println("Created data");
}

@TearDown(Level.Trial)
public void tearDown() {
embeddedNeo4j.managementService.shutdown();
}
}
58 changes: 58 additions & 0 deletions benchmark/src/jmh/java/org/neo4j/apoc/benchmark/EmbeddedNeo4j.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
package org.neo4j.apoc.benchmark;

import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;

import org.neo4j.dbms.api.DatabaseManagementService;
import org.neo4j.dbms.api.DatabaseManagementServiceBuilder;
import org.neo4j.exceptions.KernelException;
import org.neo4j.graphdb.GraphDatabaseService;
import org.neo4j.kernel.api.procedure.GlobalProcedures;
import org.neo4j.kernel.internal.GraphDatabaseAPI;

import static org.neo4j.configuration.GraphDatabaseSettings.DEFAULT_DATABASE_NAME;

public class EmbeddedNeo4j {

public final GraphDatabaseService db;
public final DatabaseManagementService managementService;
public final Path directory;

private EmbeddedNeo4j(GraphDatabaseService db, DatabaseManagementService managementService, Path directory) {
this.db = db;
this.managementService = managementService;
this.directory = directory;
}

public static EmbeddedNeo4j start() throws IOException {
final var path = Files.createTempDirectory("neo4j-bench");
final var managementService = new DatabaseManagementServiceBuilder(path).build();

final var db = managementService.database(DEFAULT_DATABASE_NAME);
registerShutdownHook(managementService);
return new EmbeddedNeo4j(db, managementService, path);
}

public void registerProcedure(Class<?>... procedures) {
final var globalProcedures = ((GraphDatabaseAPI) db).getDependencyResolver().resolveDependency(GlobalProcedures.class);
for (Class<?> procedure : procedures) {
try {
globalProcedures.registerProcedure(procedure);
globalProcedures.registerFunction(procedure);
globalProcedures.registerAggregationFunction(procedure);
} catch (KernelException e) {
throw new RuntimeException("Failed to register " + procedure, e);
}
}
}

private static void registerShutdownHook(final DatabaseManagementService managementService) {
Runtime.getRuntime().addShutdownHook(new Thread() {
@Override
public void run() {
managementService.shutdown();
}
});
}
}
45 changes: 45 additions & 0 deletions common/src/main/java/org/neo4j/cypher/export/DatabaseSubGraph.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@
import org.neo4j.graphdb.Transaction;
import org.neo4j.graphdb.schema.ConstraintDefinition;
import org.neo4j.graphdb.schema.IndexDefinition;
import org.neo4j.internal.kernel.api.Read;
import org.neo4j.internal.kernel.api.TokenRead;
import org.neo4j.kernel.api.KernelTransaction;

public class DatabaseSubGraph implements SubGraph {
private final Transaction transaction;
Expand All @@ -40,6 +43,10 @@ public DatabaseSubGraph(Transaction transaction) {
this.transaction = transaction;
}

public static SubGraph optimizedForCount(Transaction transaction, KernelTransaction kernelTransaction) {
return new CountOptimisedDatabaseSubGraph(transaction, kernelTransaction);
}

@Override
public Iterable<Node> getNodes() {
return transaction.getAllNodes();
Expand Down Expand Up @@ -117,3 +124,41 @@ public Iterator<Node> findNodes(Label label) {
return transaction.findNodes(label);
}
}

/**
* Implementation of DatabaseSubGraph that uses internal kernel APIs directly for better performance when retrieving counts.
* The default implementation can cause a lot of subqueries that requires time for planning.
*/
class CountOptimisedDatabaseSubGraph extends DatabaseSubGraph {
private final TokenRead tokenRead;
private final Read read;

public CountOptimisedDatabaseSubGraph(Transaction transaction, KernelTransaction kernelTx) {
super(transaction);
this.tokenRead = kernelTx.tokenRead();
this.read = kernelTx.dataRead();
}

@Override
public long countsForNode(Label label) {
return read.countsForNode(tokenRead.nodeLabel(label.name()));
}

@Override
public long countsForRelationship(RelationshipType type, Label end) {
return read.countsForRelationship(
TokenRead.ANY_LABEL, tokenRead.relationshipType(type.name()), tokenRead.nodeLabel(end.name()));
}

@Override
public long countsForRelationship(Label start, RelationshipType type) {
return read.countsForRelationship(
tokenRead.nodeLabel(start.name()), tokenRead.relationshipType(type.name()), TokenRead.ANY_LABEL);
}

@Override
public long countsForRelationship(RelationshipType type) {
return read.countsForRelationship(
TokenRead.ANY_LABEL, tokenRead.relationshipType(type.name()), TokenRead.ANY_LABEL);
}
}
11 changes: 5 additions & 6 deletions core/src/main/java/apoc/convert/Json.java
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,8 @@ public List<Object> fromJsonList(
}

@Procedure("apoc.convert.toTree")
@Description("Returns a stream of `MAP` values, representing the given `PATH` values as a tree with at least one root.")
@Description(
"Returns a stream of `MAP` values, representing the given `PATH` values as a tree with at least one root.")
public Stream<MapResult> toTree(
@Name("paths") List<Path> paths,
@Name(value = "lowerCaseRels", defaultValue = "true") boolean lowerCaseRels,
Expand Down Expand Up @@ -203,10 +204,8 @@ public Stream<MapResult> toTree(
List<Map<String, Object>> list = (List) nMap.get(typeName);
// Check that this combination of rel and node doesn't already exist
Optional<Map<String, Object>> optMap = list.stream()
.filter(elem ->
elem.get("_elementId").equals(m.getElementId())
&& elem.get(typeName + "._elementId").equals(r.getElementId())
)
.filter(elem -> elem.get("_elementId").equals(m.getElementId())
&& elem.get(typeName + "._elementId").equals(r.getElementId()))
.findFirst();
if (!optMap.isPresent()) {
Map<String, Object> mMap = toMap(m, nodes);
Expand Down Expand Up @@ -273,7 +272,7 @@ private Map<String, Object> toMap(Node n, Map<String, List<String>> nodeFilters)
Map<String, Object> props = n.getAllProperties();
Map<String, Object> result = new LinkedHashMap<>(props.size() + 2);
String type = Util.labelString(n);
result.put("_id", n.getId());;
result.put("_id", n.getId());
result.put("_elementId", n.getElementId());
result.put("_type", type);
if (nodeFilters.containsKey(type)) { // Check if list contains LABEL
Expand Down
Loading

0 comments on commit fe271e2

Please sign in to comment.