Skip to content

Commit

Permalink
[Enhancement] table level stats meta support incremental column stats…
Browse files Browse the repository at this point in the history
… for external table (#52038)
  • Loading branch information
Youngwb authored Oct 18, 2024
1 parent fe00c0b commit 642e8f7
Show file tree
Hide file tree
Showing 5 changed files with 109 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ public static List<String> showBasicStatsMeta(ConnectContext context,

public static List<String> showExternalBasicStatsMeta(ConnectContext context,
ExternalBasicStatsMeta basicStatsMeta) throws MetaNotFoundException {
List<String> row = Lists.newArrayList("", "", "ALL", "", "", "", "");
List<String> row = Lists.newArrayList("", "", "ALL", "", "", "", "", "", "");
String catalogName = basicStatsMeta.getCatalogName();
String dbName = basicStatsMeta.getDbName();
String tableName = basicStatsMeta.getTableName();
Expand Down Expand Up @@ -135,6 +135,7 @@ public static List<String> showExternalBasicStatsMeta(ConnectContext context,
row.set(3, basicStatsMeta.getType().name());
row.set(4, basicStatsMeta.getUpdateTime().format(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss")));
row.set(5, basicStatsMeta.getProperties() == null ? "{}" : basicStatsMeta.getProperties().toString());
row.set(7, basicStatsMeta.getColumnStatsString());

return row;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,10 @@ public Map<StatsMetaKey, ExternalBasicStatsMeta> getExternalBasicStatsMetaMap()
return externalBasicStatsMetaMap;
}

public ExternalBasicStatsMeta getExternalTableBasicStatsMeta(String catalogName, String dbName, String tableName) {
return externalBasicStatsMetaMap.get(new StatsMetaKey(catalogName, dbName, tableName));
}

public HistogramStatsMeta getHistogramMeta(long tableId, String column) {
return histogramStatsMetaMap.get(Pair.create(tableId, column));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,20 @@

package com.starrocks.statistic;

import com.google.common.collect.Maps;
import com.google.gson.annotations.SerializedName;
import com.starrocks.common.io.Text;
import com.starrocks.common.io.Writable;
import com.starrocks.persist.gson.GsonUtils;
import org.apache.commons.collections4.MapUtils;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.time.LocalDateTime;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

public class ExternalBasicStatsMeta implements Writable {
@SerializedName("catalogName")
Expand All @@ -35,6 +38,8 @@ public class ExternalBasicStatsMeta implements Writable {
@SerializedName("tableName")
private String tableName;

// Deprecated by columnStatsMetaMap
@Deprecated
@SerializedName("columns")
private List<String> columns;

Expand All @@ -47,6 +52,9 @@ public class ExternalBasicStatsMeta implements Writable {
@SerializedName("properties")
private Map<String, String> properties;

@SerializedName("columnStats")
private Map<String, ColumnStatsMeta> columnStatsMetaMap = Maps.newConcurrentMap();

public ExternalBasicStatsMeta() {}

public ExternalBasicStatsMeta(String catalogName, String dbName, String tableName, List<String> columns,
Expand Down Expand Up @@ -100,4 +108,37 @@ public static ExternalBasicStatsMeta read(DataInput in) throws IOException {
String s = Text.readString(in);
return GsonUtils.GSON.fromJson(s, ExternalBasicStatsMeta.class);
}

public void setProperties(Map<String, String> properties) {
this.properties = properties;
}

public void setUpdateTime(LocalDateTime updateTime) {
this.updateTime = updateTime;
}

public void setAnalyzeType(StatsConstants.AnalyzeType analyzeType) {
this.type = analyzeType;
}

public void addColumnStatsMeta(ColumnStatsMeta columnStatsMeta) {
this.columnStatsMetaMap.put(columnStatsMeta.getColumnName(), columnStatsMeta);
}

public Map<String, ColumnStatsMeta> getColumnStatsMetaMap() {
return columnStatsMetaMap;
}

public String getColumnStatsString() {
if (MapUtils.isEmpty(columnStatsMetaMap)) {
return "";
}
return columnStatsMetaMap.values().stream()
.map(ColumnStatsMeta::simpleString).collect(Collectors.joining(","));
}

public ExternalBasicStatsMeta clone() {
String json = GsonUtils.GSON.toJson(this);
return GsonUtils.GSON.fromJson(json, ExternalBasicStatsMeta.class);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -425,9 +425,23 @@ public AnalyzeStatus collectStatistics(ConnectContext statsConnectCtx,
refreshAsync);
} else {
// for external table
ExternalBasicStatsMeta externalBasicStatsMeta = new ExternalBasicStatsMeta(statsJob.getCatalogName(),
db.getFullName(), table.getName(), statsJob.getColumnNames(), statsJob.getType(),
analyzeStatus.getStartTime(), statsJob.getProperties());
ExternalBasicStatsMeta externalBasicStatsMeta = analyzeMgr.getExternalTableBasicStatsMeta(
statsJob.getCatalogName(), db.getFullName(), table.getName());
if (externalBasicStatsMeta == null) {
externalBasicStatsMeta = new ExternalBasicStatsMeta(statsJob.getCatalogName(), db.getFullName(),
table.getName(), Lists.newArrayList(statsJob.getColumnNames()), statsJob.getType(),
analyzeStatus.getEndTime(), statsJob.getProperties());
} else {
externalBasicStatsMeta = externalBasicStatsMeta.clone();
externalBasicStatsMeta.setUpdateTime(analyzeStatus.getEndTime());
externalBasicStatsMeta.setProperties(statsJob.getProperties());
externalBasicStatsMeta.setAnalyzeType(statsJob.getType());
}
for (String column : ListUtils.emptyIfNull(statsJob.getColumnNames())) {
ColumnStatsMeta meta =
new ColumnStatsMeta(column, statsJob.getType(), analyzeStatus.getEndTime());
externalBasicStatsMeta.addColumnStatsMeta(meta);
}
GlobalStateMgr.getCurrentState().getAnalyzeMgr().addExternalBasicStatsMeta(externalBasicStatsMeta);
GlobalStateMgr.getCurrentState().getAnalyzeMgr()
.refreshConnectorTableBasicStatisticsCache(statsJob.getCatalogName(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,11 @@
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.starrocks.catalog.Database;
import com.starrocks.catalog.HiveTable;
import com.starrocks.catalog.OlapTable;
import com.starrocks.catalog.Partition;
import com.starrocks.catalog.Table;
import com.starrocks.catalog.Type;
import com.starrocks.common.AnalysisException;
import com.starrocks.common.DdlException;
import com.starrocks.common.jmockit.Deencapsulation;
Expand Down Expand Up @@ -218,6 +220,49 @@ public void testExecuteAnalyze(@Mocked StatisticExecutor statisticExecutor) {
Assert.assertFalse(stmt.getAnalyzeTypeDesc().isHistogram());
}

@Test
public void testCollectStatistics() {
ExternalAnalyzeStatus status = new ExternalAnalyzeStatus(1, "test_catalog",
"test_db", "test_table",
"test123", Lists.newArrayList("col1", "col2"), StatsConstants.AnalyzeType.FULL,
StatsConstants.ScheduleType.ONCE, Maps.newHashMap(), LocalDateTime.MIN);

Database database = new Database(1, "test_db");
Table table = HiveTable.builder().setTableName("test_table").build();
StatisticsCollectJob statisticsCollectJob = new ExternalFullStatisticsCollectJob("test_catalog",
database, table, List.of(), Lists.newArrayList("col1", "col2"),
Lists.newArrayList(Type.INT, Type.INT),
StatsConstants.AnalyzeType.FULL, StatsConstants.ScheduleType.ONCE, Maps.newHashMap());

new MockUp<ExternalFullStatisticsCollectJob>() {
@Mock
public void collect(ConnectContext context, AnalyzeStatus analyzeStatus) throws Exception {
}
};

StatisticExecutor statisticExecutor = new StatisticExecutor();
statisticExecutor.collectStatistics(connectContext, statisticsCollectJob, status, false);

ExternalBasicStatsMeta externalBasicStatsMeta = GlobalStateMgr.getCurrentState().getAnalyzeMgr().
getExternalTableBasicStatsMeta("test_catalog", "test_db", "test_table");
Assert.assertEquals(externalBasicStatsMeta.getColumnStatsMetaMap().size(), 2);
Assert.assertTrue(externalBasicStatsMeta.getColumnStatsMetaMap().containsKey("col1"));
Assert.assertTrue(externalBasicStatsMeta.getColumnStatsMetaMap().containsKey("col2"));

status = new ExternalAnalyzeStatus(1, "test_catalog",
"test_db", "test_table",
"test123", Lists.newArrayList("col1", "col3"), StatsConstants.AnalyzeType.FULL,
StatsConstants.ScheduleType.ONCE, Maps.newHashMap(), LocalDateTime.MIN);
statisticsCollectJob = new ExternalFullStatisticsCollectJob("test_catalog",
database, table, List.of(), Lists.newArrayList("col1", "col3"),
Lists.newArrayList(Type.INT, Type.STRING),
StatsConstants.AnalyzeType.FULL, StatsConstants.ScheduleType.ONCE, Maps.newHashMap());
statisticExecutor.collectStatistics(connectContext, statisticsCollectJob, status, false);
externalBasicStatsMeta = GlobalStateMgr.getCurrentState().getAnalyzeMgr().
getExternalTableBasicStatsMeta("test_catalog", "test_db", "test_table");
Assert.assertEquals(externalBasicStatsMeta.getColumnStatsMetaMap().size(), 3);
}

@Test
public void testDropHistogramStmt() {
new MockUp<StatisticExecutor>() {
Expand Down

0 comments on commit 642e8f7

Please sign in to comment.