Optimize group-by and join for single key scenario (apache#11630)

Jackie-Jiang · Sep 20, 2023 · f6cb672 · f6cb672
1 parent 47eff88
commit f6cb672
Show file tree

Hide file tree

Showing 23 changed files with 439 additions and 314 deletions.
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/util/DataBlockExtractUtils.java b/pinot-core/src/main/java/org/apache/pinot/core/util/DataBlockExtractUtils.java
@@ -105,7 +105,7 @@ private static Object extractValue(DataBlock dataBlock, ColumnDataType storedTyp
     }
   }
 
-  public static List<Key> extractKeys(DataBlock dataBlock, int[] keyIds) {
+  public static Key[] extractKeys(DataBlock dataBlock, int[] keyIds) {
     DataSchema dataSchema = dataBlock.getDataSchema();
     int numKeys = keyIds.length;
     ColumnDataType[] storedTypes = new ColumnDataType[numKeys];
@@ -115,7 +115,7 @@ public static List<Key> extractKeys(DataBlock dataBlock, int[] keyIds) {
       nullBitmaps[colId] = dataBlock.getNullRowIds(keyIds[colId]);
     }
     int numRows = dataBlock.getNumberOfRows();
-    List<Key> keys = new ArrayList<>(numRows);
+    Key[] keys = new Key[numRows];
     for (int rowId = 0; rowId < numRows; rowId++) {
       Object[] values = new Object[numKeys];
       for (int colId = 0; colId < numKeys; colId++) {
@@ -124,13 +124,12 @@ public static List<Key> extractKeys(DataBlock dataBlock, int[] keyIds) {
           values[colId] = extractValue(dataBlock, storedTypes[colId], rowId, keyIds[colId]);
         }
       }
-      keys.add(new Key(values));
+      keys[rowId] = new Key(values);
     }
     return keys;
   }
 
-  public static List<Key> extractKeys(DataBlock dataBlock, int[] keyIds, int numMatchedRows,
-      RoaringBitmap matchedBitmap) {
+  public static Key[] extractKeys(DataBlock dataBlock, int[] keyIds, int numMatchedRows, RoaringBitmap matchedBitmap) {
     DataSchema dataSchema = dataBlock.getDataSchema();
     int numKeys = keyIds.length;
     ColumnDataType[] storedTypes = new ColumnDataType[numKeys];
@@ -139,9 +138,9 @@ public static List<Key> extractKeys(DataBlock dataBlock, int[] keyIds, int numMa
       storedTypes[colId] = dataSchema.getColumnDataType(keyIds[colId]).getStoredType();
       nullBitmaps[colId] = dataBlock.getNullRowIds(keyIds[colId]);
     }
-    List<Key> keys = new ArrayList<>(numMatchedRows);
+    Key[] keys = new Key[numMatchedRows];
     PeekableIntIterator iterator = matchedBitmap.getIntIterator();
-    for (int i = 0; i < numMatchedRows; i++) {
+    for (int matchedRowId = 0; matchedRowId < numMatchedRows; matchedRowId++) {
       int rowId = iterator.next();
       Object[] values = new Object[numKeys];
       for (int colId = 0; colId < numKeys; colId++) {
@@ -150,7 +149,7 @@ public static List<Key> extractKeys(DataBlock dataBlock, int[] keyIds, int numMa
           values[colId] = extractValue(dataBlock, storedTypes[colId], rowId, keyIds[colId]);
         }
       }
-      keys.add(new Key(values));
+      keys[matchedRowId] = new Key(values);
     }
     return keys;
   }
@@ -175,6 +174,29 @@ public static Object[] extractColumn(DataBlock dataBlock, int colId) {
     return values;
   }
 
+  public static Object[] extractColumn(DataBlock dataBlock, int colId, int numMatchedRows,
+      RoaringBitmap matchedBitmap) {
+    DataSchema dataSchema = dataBlock.getDataSchema();
+    ColumnDataType storedType = dataSchema.getColumnDataType(colId).getStoredType();
+    RoaringBitmap nullBitmap = dataBlock.getNullRowIds(colId);
+    Object[] values = new Object[numMatchedRows];
+    PeekableIntIterator iterator = matchedBitmap.getIntIterator();
+    if (nullBitmap == null) {
+      for (int matchedRowId = 0; matchedRowId < numMatchedRows; matchedRowId++) {
+        int rowId = iterator.next();
+        values[matchedRowId] = extractValue(dataBlock, storedType, rowId, colId);
+      }
+    } else {
+      for (int matchedRowId = 0; matchedRowId < numMatchedRows; matchedRowId++) {
+        int rowId = iterator.next();
+        if (!nullBitmap.contains(rowId)) {
+          values[matchedRowId] = extractValue(dataBlock, storedType, rowId, colId);
+        }
+      }
+    }
+    return values;
+  }
+
   public static int[] extractIntColumn(DataType storedType, DataBlock dataBlock, int colId,
       @Nullable RoaringBitmap nullBitmap) {
     int numRows = dataBlock.getNumberOfRows();

diff --git a/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/logical/PlanFragmenter.java b/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/logical/PlanFragmenter.java
@@ -26,8 +26,6 @@
 import org.apache.calcite.rel.logical.PinotRelExchangeType;
 import org.apache.pinot.query.planner.PlanFragment;
 import org.apache.pinot.query.planner.PlanFragmentMetadata;
-import org.apache.pinot.query.planner.partitioning.FieldSelectionKeySelector;
-import org.apache.pinot.query.planner.partitioning.KeySelector;
 import org.apache.pinot.query.planner.plannode.AggregateNode;
 import org.apache.pinot.query.planner.plannode.ExchangeNode;
 import org.apache.pinot.query.planner.plannode.FilterNode;
@@ -132,23 +130,20 @@ public PlanNode visitExchange(ExchangeNode node, Context context) {
     int nextPlanFragmentId = ++context._currentPlanFragmentId;
     PlanNode nextPlanFragmentRoot = node.getInputs().get(0).visit(this, context);
 
-    List<Integer> distributionKeys = node.getDistributionKeys();
-    RelDistribution.Type distributionType = node.getDistributionType();
     PinotRelExchangeType exchangeType = node.getExchangeType();
-
-    // make an exchange sender and receiver node pair
-    // only HASH_DISTRIBUTED requires a partition key selector; so all other types (SINGLETON and BROADCAST)
-    // of exchange will not carry a partition key selector.
-    KeySelector<Object[], Object[]> keySelector = distributionType == RelDistribution.Type.HASH_DISTRIBUTED
-        ? new FieldSelectionKeySelector(distributionKeys) : null;
+    RelDistribution.Type distributionType = node.getDistributionType();
+    // NOTE: Only HASH_DISTRIBUTED requires distribution keys
+    // TODO: Revisit ExchangeNode creation logic to avoid using HASH_DISTRIBUTED with empty distribution keys
+    List<Integer> distributionKeys =
+        distributionType == RelDistribution.Type.HASH_DISTRIBUTED ? node.getDistributionKeys() : null;
 
     PlanNode mailboxSender =
-        new MailboxSendNode(nextPlanFragmentId, nextPlanFragmentRoot.getDataSchema(),
-            currentPlanFragmentId, distributionType, exchangeType, keySelector, node.getCollations(),
-            node.isSortOnSender());
-    PlanNode mailboxReceiver = new MailboxReceiveNode(currentPlanFragmentId, nextPlanFragmentRoot.getDataSchema(),
-        nextPlanFragmentId, distributionType, exchangeType, keySelector,
-        node.getCollations(), node.isSortOnSender(), node.isSortOnReceiver(), mailboxSender);
+        new MailboxSendNode(nextPlanFragmentId, nextPlanFragmentRoot.getDataSchema(), currentPlanFragmentId,
+            distributionType, exchangeType, distributionKeys, node.getCollations(), node.isSortOnSender());
+    PlanNode mailboxReceiver =
+        new MailboxReceiveNode(currentPlanFragmentId, nextPlanFragmentRoot.getDataSchema(), nextPlanFragmentId,
+            distributionType, exchangeType, distributionKeys, node.getCollations(), node.isSortOnSender(),
+            node.isSortOnReceiver(), mailboxSender);
     mailboxSender.addInput(nextPlanFragmentRoot);
 
     context._planFragmentIdToRootNodeMap.put(nextPlanFragmentId,

diff --git a/...-planner/src/main/java/org/apache/pinot/query/planner/logical/RelToPlanNodeConverter.java b/...-planner/src/main/java/org/apache/pinot/query/planner/logical/RelToPlanNodeConverter.java
@@ -48,7 +48,6 @@
 import org.apache.calcite.sql.type.SqlTypeName;
 import org.apache.pinot.common.utils.DataSchema;
 import org.apache.pinot.common.utils.DataSchema.ColumnDataType;
-import org.apache.pinot.query.planner.partitioning.FieldSelectionKeySelector;
 import org.apache.pinot.query.planner.plannode.AggregateNode;
 import org.apache.pinot.query.planner.plannode.ExchangeNode;
 import org.apache.pinot.query.planner.plannode.FilterNode;
@@ -180,8 +179,7 @@ private static PlanNode convertLogicalJoin(LogicalJoin node, int currentStageId)
 
     // Parse out all equality JOIN conditions
     JoinInfo joinInfo = node.analyzeCondition();
-    JoinNode.JoinKeys joinKeys = new JoinNode.JoinKeys(new FieldSelectionKeySelector(joinInfo.leftKeys),
-        new FieldSelectionKeySelector(joinInfo.rightKeys));
+    JoinNode.JoinKeys joinKeys = new JoinNode.JoinKeys(joinInfo.leftKeys, joinInfo.rightKeys);
     List<RexExpression> joinClause =
         joinInfo.nonEquiConditions.stream().map(RexExpressionUtils::fromRexNode).collect(Collectors.toList());
     return new JoinNode(currentStageId, toDataSchema(node.getRowType()), toDataSchema(node.getLeft().getRowType()),

diff --git a/...y-planner/src/main/java/org/apache/pinot/query/planner/logical/ShuffleRewriteVisitor.java b/...y-planner/src/main/java/org/apache/pinot/query/planner/logical/ShuffleRewriteVisitor.java
@@ -23,9 +23,8 @@
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
+import javax.annotation.Nullable;
 import org.apache.calcite.rel.RelDistribution;
-import org.apache.pinot.query.planner.partitioning.FieldSelectionKeySelector;
-import org.apache.pinot.query.planner.partitioning.KeySelector;
 import org.apache.pinot.query.planner.plannode.AggregateNode;
 import org.apache.pinot.query.planner.plannode.ExchangeNode;
 import org.apache.pinot.query.planner.plannode.FilterNode;
@@ -105,14 +104,14 @@ public Set<Integer> visitJoin(JoinNode node, Void context) {
     Set<Integer> rightPks = node.getInputs().get(1).visit(this, context);
 
     // Currently, JOIN criteria is guaranteed to only have one FieldSelectionKeySelector
-    FieldSelectionKeySelector leftJoinKey = (FieldSelectionKeySelector) node.getJoinKeys().getLeftJoinKeySelector();
-    FieldSelectionKeySelector rightJoinKey = (FieldSelectionKeySelector) node.getJoinKeys().getRightJoinKeySelector();
+    List<Integer> leftJoinKeys = node.getJoinKeys().getLeftKeys();
+    List<Integer> rightJoinKeys = node.getJoinKeys().getRightKeys();
 
     int leftDataSchemaSize = node.getInputs().get(0).getDataSchema().size();
     Set<Integer> partitionKeys = new HashSet<>();
-    for (int i = 0; i < leftJoinKey.getColumnIndices().size(); i++) {
-      int leftIdx = leftJoinKey.getColumnIndices().get(i);
-      int rightIdx = rightJoinKey.getColumnIndices().get(i);
+    for (int i = 0; i < leftJoinKeys.size(); i++) {
+      int leftIdx = leftJoinKeys.get(i);
+      int rightIdx = rightJoinKeys.get(i);
       if (leftPKs.contains(leftIdx)) {
         partitionKeys.add(leftIdx);
       }
@@ -133,24 +132,24 @@ public Set<Integer> visitJoin(JoinNode node, Void context) {
   @Override
   public Set<Integer> visitMailboxReceive(MailboxReceiveNode node, Void context) {
     Set<Integer> oldPartitionKeys = node.getSender().visit(this, context);
-    KeySelector<Object[], Object[]> selector = node.getPartitionKeySelector();
+    List<Integer> distributionKeys = node.getDistributionKeys();
 
-    if (canSkipShuffle(oldPartitionKeys, selector)) {
+    if (canSkipShuffle(oldPartitionKeys, distributionKeys)) {
       node.setDistributionType(RelDistribution.Type.SINGLETON);
       return oldPartitionKeys;
-    } else if (selector == null) {
+    } else if (distributionKeys == null) {
       return new HashSet<>();
     } else {
-      return new HashSet<>(((FieldSelectionKeySelector) selector).getColumnIndices());
+      return new HashSet<>(distributionKeys);
     }
   }
 
   @Override
   public Set<Integer> visitMailboxSend(MailboxSendNode node, Void context) {
     Set<Integer> oldPartitionKeys = node.getInputs().get(0).visit(this, context);
-    KeySelector<Object[], Object[]> selector = node.getPartitionKeySelector();
+    List<Integer> distributionKeys = node.getDistributionKeys();
 
-    if (canSkipShuffle(oldPartitionKeys, selector)) {
+    if (canSkipShuffle(oldPartitionKeys, distributionKeys)) {
       node.setDistributionType(RelDistribution.Type.SINGLETON);
       return oldPartitionKeys;
     } else {
@@ -185,10 +184,9 @@ public Set<Integer> visitValue(ValueNode node, Void context) {
     return new HashSet<>();
   }
 
-  private static boolean canSkipShuffle(Set<Integer> partitionKeys, KeySelector<Object[], Object[]> keySelector) {
-    if (!partitionKeys.isEmpty() && keySelector != null) {
-      Set<Integer> targetSet = new HashSet<>(((FieldSelectionKeySelector) keySelector).getColumnIndices());
-      return targetSet.containsAll(partitionKeys);
+  private static boolean canSkipShuffle(Set<Integer> partitionKeys, @Nullable List<Integer> distributionKeys) {
+    if (!partitionKeys.isEmpty() && distributionKeys != null) {
+      return distributionKeys.containsAll(partitionKeys);
     }
     return false;
   }

diff --git a/...y-planner/src/main/java/org/apache/pinot/query/planner/partitioning/EmptyKeySelector.java b/...y-planner/src/main/java/org/apache/pinot/query/planner/partitioning/EmptyKeySelector.java
@@ -0,0 +1,37 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.query.planner.partitioning;
+
+public class EmptyKeySelector implements KeySelector<Integer> {
+  private EmptyKeySelector() {
+  }
+
+  public static final EmptyKeySelector INSTANCE = new EmptyKeySelector();
+  private static final Integer PLACE_HOLDER = 0;
+
+  @Override
+  public Integer getKey(Object[] row) {
+    return PLACE_HOLDER;
+  }
+
+  @Override
+  public int computeHash(Object[] input) {
+    return 0;
+  }
+}
diff --git a/...-query-planner/src/main/java/org/apache/pinot/query/planner/partitioning/KeySelector.java b/...-query-planner/src/main/java/org/apache/pinot/query/planner/partitioning/KeySelector.java
@@ -18,28 +18,30 @@
  */
 package org.apache.pinot.query.planner.partitioning;
 
-
 /**
  * The {@code KeySelector} provides a partitioning function to encode a specific input data type into a key.
  *
  * <p>This key selector is used for computation such as GROUP BY or equality JOINs.
  *
  * <p>Key selector should always produce the same selection hash key when the same input is provided.
  */
-public interface KeySelector<IN, OUT> {
+public interface KeySelector<T> {
+  String DEFAULT_HASH_ALGORITHM = "absHashCode";
 
   /**
-   * Extract the key out of an input data construct.
-   *
-   * @param input input data.
-   * @return the key of the input data.
+   * Extracts the key out of the given row.
    */
-  OUT getKey(IN input);
+  T getKey(Object[] row);
 
-  int computeHash(IN input);
+  /**
+   * Computes the hash of the given row.
+   */
+  int computeHash(Object[] input);
 
   /**
-   * @return the hash-algorithm used for distributing rows
+   * Returns the hash algorithm used to compute the hash.
    */
-  String hashAlgorithm();
+  default String hashAlgorithm() {
+    return DEFAULT_HASH_ALGORITHM;
+  }
 }
diff --git a/...planner/src/main/java/org/apache/pinot/query/planner/partitioning/KeySelectorFactory.java b/...planner/src/main/java/org/apache/pinot/query/planner/partitioning/KeySelectorFactory.java
@@ -0,0 +1,42 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.query.planner.partitioning;
+
+import java.util.List;
+
+
+public class KeySelectorFactory {
+  private KeySelectorFactory() {
+  }
+
+  public static KeySelector<?> getKeySelector(List<Integer> keyIds) {
+    int numKeys = keyIds.size();
+    if (numKeys == 0) {
+      return EmptyKeySelector.INSTANCE;
+    } else if (numKeys == 1) {
+      return new SingleColumnKeySelector(keyIds.get(0));
+    } else {
+      int[] ids = new int[numKeys];
+      for (int i = 0; i < numKeys; i++) {
+        ids[i] = keyIds.get(i);
+      }
+      return new MultiColumnKeySelector(ids);
+    }
+  }
+}