-
Notifications
You must be signed in to change notification settings - Fork 1.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Enhancement] merge full/sample statistics collect #52693
Changes from all commits
92f8bdd
dfd2c5d
c4c06b6
23b2c98
fd9ec67
1d2828d
c968f6d
a20457e
2debef9
211d64e
a780463
03f455e
14f17af
e2a8618
6aa77f9
a0be951
200ec30
8e5b0d8
6103fae
ccec066
5a7c666
605415b
bc130e4
674474a
1861a46
8498893
b85c7d3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1479,6 +1479,7 @@ private void executeAnalyze(AnalyzeStmt analyzeStmt, AnalyzeStatus analyzeStatus | |
statsConnectCtx.getSessionVariable().setStatisticCollectParallelism( | ||
context.getSessionVariable().getStatisticCollectParallelism()); | ||
statsConnectCtx.setThreadLocalInfo(); | ||
statsConnectCtx.setStatisticsConnection(true); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it would be set in the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not all statistic SQL need set the variable, only collect job need |
||
try { | ||
executeAnalyze(statsConnectCtx, analyzeStmt, analyzeStatus, db, table); | ||
} finally { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,186 @@ | ||
// Copyright 2021-present StarRocks, Inc. All rights reserved. | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// https://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
package com.starrocks.statistic; | ||
|
||
import com.google.common.collect.Lists; | ||
import com.starrocks.analysis.Expr; | ||
import com.starrocks.analysis.TableName; | ||
import com.starrocks.catalog.Database; | ||
import com.starrocks.catalog.OlapTable; | ||
import com.starrocks.catalog.Table; | ||
import com.starrocks.catalog.Type; | ||
import com.starrocks.common.Config; | ||
import com.starrocks.common.DdlException; | ||
import com.starrocks.common.util.DebugUtil; | ||
import com.starrocks.common.util.UUIDUtil; | ||
import com.starrocks.qe.ConnectContext; | ||
import com.starrocks.qe.OriginStatement; | ||
import com.starrocks.qe.QueryState; | ||
import com.starrocks.qe.StmtExecutor; | ||
import com.starrocks.server.GlobalStateMgr; | ||
import com.starrocks.sql.ast.InsertStmt; | ||
import com.starrocks.sql.ast.QueryStatement; | ||
import com.starrocks.sql.ast.StatementBase; | ||
import com.starrocks.sql.ast.ValuesRelation; | ||
import com.starrocks.statistic.base.PartitionSampler; | ||
import com.starrocks.statistic.hyper.HyperQueryJob; | ||
import org.apache.commons.lang.StringUtils; | ||
import org.apache.logging.log4j.LogManager; | ||
import org.apache.logging.log4j.Logger; | ||
|
||
import java.util.List; | ||
import java.util.Map; | ||
|
||
public class HyperStatisticsCollectJob extends StatisticsCollectJob { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think is hyper, because Full/Sample always use it |
||
private static final Logger LOG = LogManager.getLogger(HyperStatisticsCollectJob.class); | ||
|
||
private final List<Long> partitionIdList; | ||
|
||
private final int batchRowsLimit; | ||
private final List<String> sqlBuffer = Lists.newArrayList(); | ||
private final List<List<Expr>> rowsBuffer = Lists.newArrayList(); | ||
|
||
public HyperStatisticsCollectJob(Database db, Table table, List<Long> partitionIdList, List<String> columnNames, | ||
List<Type> columnTypes, StatsConstants.AnalyzeType type, | ||
StatsConstants.ScheduleType scheduleType, Map<String, String> properties) { | ||
super(db, table, columnNames, columnTypes, type, scheduleType, properties); | ||
this.partitionIdList = partitionIdList; | ||
this.batchRowsLimit = (int) Math.max(1, Config.statistic_full_collect_buffer / 33 / 1024); | ||
} | ||
|
||
@Override | ||
public void collect(ConnectContext context, AnalyzeStatus analyzeStatus) throws Exception { | ||
if (table.isTemporaryTable()) { | ||
context.setSessionId(((OlapTable) table).getSessionId()); | ||
} | ||
context.getSessionVariable().setEnableAnalyzePhasePruneColumns(true); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is it necessary to restore these variables? what if the connection is reused by other jobs ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think don't need, because ConnectContext of statistics is create by itself |
||
context.getSessionVariable().setPipelineDop(context.getSessionVariable().getStatisticCollectParallelism()); | ||
|
||
int splitSize = Math.max(1, batchRowsLimit / columnNames.size()); | ||
List<HyperQueryJob> queryJobs; | ||
if (type == StatsConstants.AnalyzeType.FULL) { | ||
queryJobs = HyperQueryJob.createFullQueryJobs(context, db, table, columnNames, columnTypes, | ||
partitionIdList, splitSize); | ||
} else { | ||
PartitionSampler sampler = PartitionSampler.create(table, partitionIdList, properties); | ||
queryJobs = HyperQueryJob.createSampleQueryJobs(context, db, table, columnNames, columnTypes, | ||
partitionIdList, splitSize, sampler); | ||
} | ||
|
||
long queryTotals = 0; | ||
long queryFailures = 0; | ||
long insertFailures = 0; | ||
|
||
for (int i = 0; i < queryJobs.size(); i++) { | ||
HyperQueryJob queryJob = queryJobs.get(i); | ||
try { | ||
queryJob.queryStatistics(); | ||
rowsBuffer.addAll(queryJob.getStatisticsData()); | ||
sqlBuffer.addAll(queryJob.getStatisticsValueSQL()); | ||
|
||
queryTotals += queryJob.getTotals(); | ||
queryFailures += queryJob.getFailures(); | ||
} catch (Exception e) { | ||
LOG.warn("query statistics task failed in job: {}, {}", this, queryJob, e); | ||
throw e; | ||
} | ||
|
||
if (queryFailures > Config.statistic_full_statistics_failure_tolerance_ratio * queryTotals) { | ||
String message = String.format("query statistic job failed due to " + | ||
"too many failed tasks: %d/%d, the last failure is %s", | ||
queryFailures, queryTotals, queryJob.getLastFailure()); | ||
LOG.warn(message, queryJob.getLastFailure()); | ||
throw new RuntimeException(message, queryJob.getLastFailure()); | ||
} | ||
|
||
try { | ||
flushInsertStatisticsData(context); | ||
} catch (Exception e) { | ||
insertFailures++; | ||
if (insertFailures > Config.statistic_full_statistics_failure_tolerance_ratio * queryJobs.size()) { | ||
String message = String.format("insert statistic job failed due to " + | ||
"too many failed tasks: %d/%d, the last failure is %s", | ||
insertFailures, queryJobs.size(), e); | ||
LOG.warn(message, queryJob.getLastFailure()); | ||
throw new RuntimeException(message, queryJob.getLastFailure()); | ||
} else { | ||
LOG.warn("insert statistics task failed in job: {}, {}", this, queryJob, e); | ||
} | ||
} finally { | ||
rowsBuffer.clear(); | ||
sqlBuffer.clear(); | ||
} | ||
analyzeStatus.setProgress((i + 1) * 100L / queryJobs.size()); | ||
GlobalStateMgr.getCurrentState().getAnalyzeMgr().addAnalyzeStatus(analyzeStatus); | ||
} | ||
} | ||
|
||
private void flushInsertStatisticsData(ConnectContext context) throws Exception { | ||
if (rowsBuffer.isEmpty()) { | ||
return; | ||
} | ||
|
||
int count = 0; | ||
int maxRetryTimes = 5; | ||
StatementBase insertStmt = createInsertStmt(); | ||
do { | ||
LOG.debug("statistics insert sql size:" + rowsBuffer.size()); | ||
StmtExecutor executor = new StmtExecutor(context, insertStmt); | ||
|
||
context.setExecutor(executor); | ||
context.setQueryId(UUIDUtil.genUUID()); | ||
context.setStartTime(); | ||
executor.execute(); | ||
|
||
if (context.getState().getStateType() == QueryState.MysqlStateType.ERR) { | ||
LOG.warn("Statistics insert fail | {} | Error Message [{}]", DebugUtil.printId(context.getQueryId()), | ||
context.getState().getErrorMessage()); | ||
if (StringUtils.contains(context.getState().getErrorMessage(), "Too many versions")) { | ||
Thread.sleep(Config.statistic_collect_too_many_version_sleep); | ||
count++; | ||
} else { | ||
throw new DdlException(context.getState().getErrorMessage()); | ||
} | ||
} else { | ||
return; | ||
} | ||
} while (count < maxRetryTimes); | ||
|
||
throw new DdlException(context.getState().getErrorMessage()); | ||
} | ||
|
||
private StatementBase createInsertStmt() { | ||
String sql = "INSERT INTO _statistics_.column_statistics values " + String.join(", ", sqlBuffer) + ";"; | ||
List<String> names = Lists.newArrayList("column_0", "column_1", "column_2", "column_3", | ||
"column_4", "column_5", "column_6", "column_7", "column_8", "column_9", | ||
"column_10", "column_11", "column_12"); | ||
QueryStatement qs = new QueryStatement(new ValuesRelation(rowsBuffer, names)); | ||
InsertStmt insert = new InsertStmt(new TableName("_statistics_", "column_statistics"), qs); | ||
insert.setOrigStmt(new OriginStatement(sql, 0)); | ||
return insert; | ||
} | ||
|
||
@Override | ||
public String toString() { | ||
return "HyperStatisticsCollectJob{" + "type=" + type + | ||
", scheduleType=" + scheduleType + | ||
", db=" + db + | ||
", table=" + table + | ||
", partitionIdList=" + partitionIdList + | ||
", columnNames=" + columnNames + | ||
", properties=" + properties + | ||
'}'; | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can you put some comment on it ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
it's temp config on main, I will remove it in next PR