From fe271e27b7f37d4f12db9a9650c60e39dbc370cd Mon Sep 17 00:00:00 2001 From: Love Kristofer Leifland Date: Wed, 21 Feb 2024 09:41:56 +0100 Subject: [PATCH] [8cS8e3IT] Optimise apoc.meta.* procedures (#589) --- benchmark/build.gradle | 36 +++ .../benchmark/ApocMetaStatsBenchmark.java | 92 ++++++++ .../neo4j/apoc/benchmark/EmbeddedNeo4j.java | 58 +++++ .../neo4j/cypher/export/DatabaseSubGraph.java | 45 ++++ core/src/main/java/apoc/convert/Json.java | 11 +- core/src/main/java/apoc/meta/Meta.java | 59 +++-- .../java/apoc/convert/ConvertJsonTest.java | 207 +++++++++++------- core/src/test/java/apoc/meta/MetaTest.java | 109 +++++++-- core/src/test/java/apoc/util/UtilsTest.java | 5 +- .../test/java/apoc/it/core/ExportCsvIT.java | 5 +- .../core/GraphRefactoringEnterpriseTest.java | 12 +- .../core/TriggerEnterpriseFeaturesTest.java | 20 +- settings.gradle | 3 +- 13 files changed, 509 insertions(+), 153 deletions(-) create mode 100644 benchmark/build.gradle create mode 100644 benchmark/src/jmh/java/org/neo4j/apoc/benchmark/ApocMetaStatsBenchmark.java create mode 100644 benchmark/src/jmh/java/org/neo4j/apoc/benchmark/EmbeddedNeo4j.java diff --git a/benchmark/build.gradle b/benchmark/build.gradle new file mode 100644 index 000000000..e54d99cf6 --- /dev/null +++ b/benchmark/build.gradle @@ -0,0 +1,36 @@ +plugins { + id 'java' + id "me.champeau.jmh" version "0.7.2" +} + +repositories { + mavenCentral() +} + +dependencies { + jmh project(':common') + jmh project(':core') + jmh "com.neo4j:neo4j-enterprise:$neo4jVersionEffective" + jmh "org.neo4j:neo4j:$neo4jVersionEffective" +} + +jmh { + iterations = 5 // Number of measurement iterations to do. + benchmarkMode = ['thrpt'] // Benchmark mode. Available modes are: [Throughput/thrpt, AverageTime/avgt, SampleTime/sample, SingleShotTime/ss, All/all] + fork = 1 // How many times to forks a single benchmark. Use 0 to disable forking altogether + failOnError = false // Should JMH fail immediately if any benchmark had experienced the unrecoverable error? + forceGC = false // Should JMH force GC between iterations? + humanOutputFile = project.file("${project.buildDir}/reports/jmh/human.txt") // human-readable output file + resultsFile = project.file("${project.buildDir}/reports/jmh/results.txt") // results file + timeOnIteration = '5s' // Time to spend at each measurement iteration. + resultFormat = 'CSV' // Result format type (one of CSV, JSON, NONE, SCSV, TEXT) + synchronizeIterations = false // Synchronize iterations? + threads = 1 // Number of worker threads to run with. + jmhTimeout = '30s' // Timeout for benchmark iteration. + verbosity = 'NORMAL' // Verbosity mode. Available modes are: [SILENT, NORMAL, EXTRA] + warmup = '5s' // Time to spend at each warmup iteration. + warmupBatchSize = 1 // Warmup batch size: number of benchmark method calls per operation. + warmupForks = 1 // How many warmup forks to make for a single benchmark. 0 to disable warmup forks. + warmupIterations = 5 // Number of warmup iterations to do. + zip64 = true // Use ZIP64 format for bigger archives +} \ No newline at end of file diff --git a/benchmark/src/jmh/java/org/neo4j/apoc/benchmark/ApocMetaStatsBenchmark.java b/benchmark/src/jmh/java/org/neo4j/apoc/benchmark/ApocMetaStatsBenchmark.java new file mode 100644 index 000000000..67064a410 --- /dev/null +++ b/benchmark/src/jmh/java/org/neo4j/apoc/benchmark/ApocMetaStatsBenchmark.java @@ -0,0 +1,92 @@ +package org.neo4j.apoc.benchmark; + +import apoc.meta.Meta; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.TearDown; +import org.openjdk.jmh.runner.Runner; +import org.openjdk.jmh.runner.RunnerException; +import org.openjdk.jmh.runner.options.OptionsBuilder; +import org.openjdk.jmh.runner.options.TimeValue; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Map; +import java.util.Random; +import java.util.stream.IntStream; + +import org.neo4j.graphdb.GraphDatabaseService; +import org.neo4j.graphdb.Label; +import org.neo4j.graphdb.Node; +import org.neo4j.graphdb.RelationshipType; + +/** + * Benchmarks apoc.meta.stats. + * Note, this was thrown together very quickly to have some form of ground for optimisations done for a support card. + * Probably all kinds of problems here. + */ +@State(Scope.Thread) +public class ApocMetaStatsBenchmark { + private EmbeddedNeo4j embeddedNeo4j; + private GraphDatabaseService db; + + @Benchmark + @BenchmarkMode(Mode.Throughput) + public Object benchmarkMetaStats() { + return db.executeTransactionally("CALL apoc.meta.stats()", Map.of(), r -> r.stream().toList()); + } + + @Setup(Level.Trial) + public void setup() throws IOException { + System.out.println("Starting..."); + final var embeddedNeo4j = EmbeddedNeo4j.start(); + System.out.println("Started in " + embeddedNeo4j.directory); + embeddedNeo4j.registerProcedure(Meta.class); + this.db = embeddedNeo4j.db; + this.embeddedNeo4j = embeddedNeo4j; + System.out.println("Creating data..."); + createData(); + } + + private void createData() { + final int labelCount = 30; + final int totNodeCount = 10000; + final int relTypeCount = 30; + final int totRelCount = 10000; + final var rand = new Random(23); + final var labels = IntStream.range(0, labelCount).mapToObj(i -> Label.label("Label" + i)).toList(); + final var types = IntStream.range(0, relTypeCount).mapToObj(i -> RelationshipType.withName("Type" + i)).toList(); + + try (final var tx = db.beginTx()) { + final var nodes = new ArrayList(); + for (int i = 0; i < totNodeCount; ++i) { + final var ls = IntStream.range(0, rand.nextInt(10)).mapToObj(x -> labels.get(rand.nextInt(labels.size()))).toArray(Label[]::new); + nodes.add(tx.createNode(ls)); + } + System.out.println("Created nodes " + totNodeCount); + int relCount = 0; + while (relCount < totRelCount) { + final var a = nodes.get(rand.nextInt(nodes.size())); + final var b = nodes.get(rand.nextInt(nodes.size())); + final var t = types.get(rand.nextInt(types.size())); + if (a.getRelationships().stream().noneMatch(r -> r.isType(t) && r.getEndNode().equals(b))) { + a.createRelationshipTo(b, t); + ++relCount; + } + } + System.out.println("Created relationships " + totRelCount); + tx.commit(); + } + System.out.println("Created data"); + } + + @TearDown(Level.Trial) + public void tearDown() { + embeddedNeo4j.managementService.shutdown(); + } +} \ No newline at end of file diff --git a/benchmark/src/jmh/java/org/neo4j/apoc/benchmark/EmbeddedNeo4j.java b/benchmark/src/jmh/java/org/neo4j/apoc/benchmark/EmbeddedNeo4j.java new file mode 100644 index 000000000..64df1316e --- /dev/null +++ b/benchmark/src/jmh/java/org/neo4j/apoc/benchmark/EmbeddedNeo4j.java @@ -0,0 +1,58 @@ +package org.neo4j.apoc.benchmark; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + +import org.neo4j.dbms.api.DatabaseManagementService; +import org.neo4j.dbms.api.DatabaseManagementServiceBuilder; +import org.neo4j.exceptions.KernelException; +import org.neo4j.graphdb.GraphDatabaseService; +import org.neo4j.kernel.api.procedure.GlobalProcedures; +import org.neo4j.kernel.internal.GraphDatabaseAPI; + +import static org.neo4j.configuration.GraphDatabaseSettings.DEFAULT_DATABASE_NAME; + +public class EmbeddedNeo4j { + + public final GraphDatabaseService db; + public final DatabaseManagementService managementService; + public final Path directory; + + private EmbeddedNeo4j(GraphDatabaseService db, DatabaseManagementService managementService, Path directory) { + this.db = db; + this.managementService = managementService; + this.directory = directory; + } + + public static EmbeddedNeo4j start() throws IOException { + final var path = Files.createTempDirectory("neo4j-bench"); + final var managementService = new DatabaseManagementServiceBuilder(path).build(); + + final var db = managementService.database(DEFAULT_DATABASE_NAME); + registerShutdownHook(managementService); + return new EmbeddedNeo4j(db, managementService, path); + } + + public void registerProcedure(Class... procedures) { + final var globalProcedures = ((GraphDatabaseAPI) db).getDependencyResolver().resolveDependency(GlobalProcedures.class); + for (Class procedure : procedures) { + try { + globalProcedures.registerProcedure(procedure); + globalProcedures.registerFunction(procedure); + globalProcedures.registerAggregationFunction(procedure); + } catch (KernelException e) { + throw new RuntimeException("Failed to register " + procedure, e); + } + } + } + + private static void registerShutdownHook(final DatabaseManagementService managementService) { + Runtime.getRuntime().addShutdownHook(new Thread() { + @Override + public void run() { + managementService.shutdown(); + } + }); + } +} diff --git a/common/src/main/java/org/neo4j/cypher/export/DatabaseSubGraph.java b/common/src/main/java/org/neo4j/cypher/export/DatabaseSubGraph.java index 3d09a38fe..9b8502a86 100644 --- a/common/src/main/java/org/neo4j/cypher/export/DatabaseSubGraph.java +++ b/common/src/main/java/org/neo4j/cypher/export/DatabaseSubGraph.java @@ -32,6 +32,9 @@ import org.neo4j.graphdb.Transaction; import org.neo4j.graphdb.schema.ConstraintDefinition; import org.neo4j.graphdb.schema.IndexDefinition; +import org.neo4j.internal.kernel.api.Read; +import org.neo4j.internal.kernel.api.TokenRead; +import org.neo4j.kernel.api.KernelTransaction; public class DatabaseSubGraph implements SubGraph { private final Transaction transaction; @@ -40,6 +43,10 @@ public DatabaseSubGraph(Transaction transaction) { this.transaction = transaction; } + public static SubGraph optimizedForCount(Transaction transaction, KernelTransaction kernelTransaction) { + return new CountOptimisedDatabaseSubGraph(transaction, kernelTransaction); + } + @Override public Iterable getNodes() { return transaction.getAllNodes(); @@ -117,3 +124,41 @@ public Iterator findNodes(Label label) { return transaction.findNodes(label); } } + +/** + * Implementation of DatabaseSubGraph that uses internal kernel APIs directly for better performance when retrieving counts. + * The default implementation can cause a lot of subqueries that requires time for planning. + */ +class CountOptimisedDatabaseSubGraph extends DatabaseSubGraph { + private final TokenRead tokenRead; + private final Read read; + + public CountOptimisedDatabaseSubGraph(Transaction transaction, KernelTransaction kernelTx) { + super(transaction); + this.tokenRead = kernelTx.tokenRead(); + this.read = kernelTx.dataRead(); + } + + @Override + public long countsForNode(Label label) { + return read.countsForNode(tokenRead.nodeLabel(label.name())); + } + + @Override + public long countsForRelationship(RelationshipType type, Label end) { + return read.countsForRelationship( + TokenRead.ANY_LABEL, tokenRead.relationshipType(type.name()), tokenRead.nodeLabel(end.name())); + } + + @Override + public long countsForRelationship(Label start, RelationshipType type) { + return read.countsForRelationship( + tokenRead.nodeLabel(start.name()), tokenRead.relationshipType(type.name()), TokenRead.ANY_LABEL); + } + + @Override + public long countsForRelationship(RelationshipType type) { + return read.countsForRelationship( + TokenRead.ANY_LABEL, tokenRead.relationshipType(type.name()), TokenRead.ANY_LABEL); + } +} diff --git a/core/src/main/java/apoc/convert/Json.java b/core/src/main/java/apoc/convert/Json.java index c05442107..fed10cd70 100644 --- a/core/src/main/java/apoc/convert/Json.java +++ b/core/src/main/java/apoc/convert/Json.java @@ -170,7 +170,8 @@ public List fromJsonList( } @Procedure("apoc.convert.toTree") - @Description("Returns a stream of `MAP` values, representing the given `PATH` values as a tree with at least one root.") + @Description( + "Returns a stream of `MAP` values, representing the given `PATH` values as a tree with at least one root.") public Stream toTree( @Name("paths") List paths, @Name(value = "lowerCaseRels", defaultValue = "true") boolean lowerCaseRels, @@ -203,10 +204,8 @@ public Stream toTree( List> list = (List) nMap.get(typeName); // Check that this combination of rel and node doesn't already exist Optional> optMap = list.stream() - .filter(elem -> - elem.get("_elementId").equals(m.getElementId()) - && elem.get(typeName + "._elementId").equals(r.getElementId()) - ) + .filter(elem -> elem.get("_elementId").equals(m.getElementId()) + && elem.get(typeName + "._elementId").equals(r.getElementId())) .findFirst(); if (!optMap.isPresent()) { Map mMap = toMap(m, nodes); @@ -273,7 +272,7 @@ private Map toMap(Node n, Map> nodeFilters) Map props = n.getAllProperties(); Map result = new LinkedHashMap<>(props.size() + 2); String type = Util.labelString(n); - result.put("_id", n.getId());; + result.put("_id", n.getId()); result.put("_elementId", n.getElementId()); result.put("_type", type); if (nodeFilters.containsKey(type)) { // Check if list contains LABEL diff --git a/core/src/main/java/apoc/meta/Meta.java b/core/src/main/java/apoc/meta/Meta.java index 45ab6e714..a32089184 100644 --- a/core/src/main/java/apoc/meta/Meta.java +++ b/core/src/main/java/apoc/meta/Meta.java @@ -284,11 +284,11 @@ public MetaStats( * The StatsCallback interface defines callback methods for collecting label and relationship statistics. */ interface StatsCallback { - void label(int labelId, String labelName, long count); + void label(String labelName, long count); - void rel(int typeId, String typeName, long count); + void rel(String typeName, long count); - void rel(int typeId, String typeName, int labelId, String labelName, long out, long in); + void rel(String typeName, String labelName, long out, long in); } @NotThreadSafe @@ -305,7 +305,7 @@ public long count( @Name(value = "nodes", defaultValue = "[]") List nodes, @Name(value = "config", defaultValue = "{}") Map config) { MetaConfig conf = new MetaConfig(config); - final DatabaseSubGraph subGraph = new DatabaseSubGraph(transaction); + final var subGraph = DatabaseSubGraph.optimizedForCount(transaction, kernelTx); Stream