From e2983e0a9ac4d13872b49fb77db64bd341e19101 Mon Sep 17 00:00:00 2001 From: "mergify[bot]" <37929162+mergify[bot]@users.noreply.github.com> Date: Mon, 14 Oct 2024 15:52:36 +0800 Subject: [PATCH] [BugFix] unify the behavior of default AutoCollector (backport #51723) (#51871) Co-authored-by: Murphy <96611012+murphyatwork@users.noreply.github.com> --- .../statistic/StatisticAutoCollector.java | 27 ++++++++++++------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/fe/fe-core/src/main/java/com/starrocks/statistic/StatisticAutoCollector.java b/fe/fe-core/src/main/java/com/starrocks/statistic/StatisticAutoCollector.java index 6f65ceab1915d..22cfecacad32a 100644 --- a/fe/fe-core/src/main/java/com/starrocks/statistic/StatisticAutoCollector.java +++ b/fe/fe-core/src/main/java/com/starrocks/statistic/StatisticAutoCollector.java @@ -77,6 +77,7 @@ protected void runAfterCatalogReady() { public List runJobs() { List result = Lists.newArrayList(); + // TODO: define the priority in the job instead List allNativeAnalyzeJobs = GlobalStateMgr.getCurrentState().getAnalyzeMgr().getAllNativeAnalyzeJobList(); allNativeAnalyzeJobs.sort((o1, o2) -> Long.compare(o2.getId(), o1.getId())); @@ -102,12 +103,8 @@ public List runJobs() { if (Config.enable_collect_full_statistic) { LOG.info("auto collect full statistic on all databases start"); - List allJobs = StatisticsCollectJobFactory.buildStatisticsCollectJob( - new NativeAnalyzeJob(StatsConstants.DEFAULT_ALL_ID, StatsConstants.DEFAULT_ALL_ID, null, null, - AnalyzeType.FULL, ScheduleType.SCHEDULE, - Maps.newHashMap(), - ScheduleStatus.PENDING, - LocalDateTime.MIN)); + List allJobs = + StatisticsCollectJobFactory.buildStatisticsCollectJob(createDefaultJobAnalyzeAll()); for (StatisticsCollectJob statsJob : allJobs) { // user-created analyze job has a higher priority if (statsJob.isAnalyzeTable() && analyzeTableSet.contains(statsJob.getTable().getId())) { @@ -149,18 +146,28 @@ public List runJobs() { return result; } + /** + * Choose user-created jobs first, fallback to default job if it doesn't exist + */ private void initDefaultJob() { - // Add a default sample job if wasn't collect List allNativeAnalyzeJobs = GlobalStateMgr.getCurrentState().getAnalyzeMgr().getAllNativeAnalyzeJobList(); if (allNativeAnalyzeJobs.stream().anyMatch(j -> j.getScheduleType() == ScheduleType.SCHEDULE)) { return; } - NativeAnalyzeJob nativeAnalyzeJob = new NativeAnalyzeJob(StatsConstants.DEFAULT_ALL_ID, StatsConstants.DEFAULT_ALL_ID, - Collections.emptyList(), Collections.emptyList(), AnalyzeType.SAMPLE, ScheduleType.SCHEDULE, + NativeAnalyzeJob job = createDefaultJobAnalyzeAll(); + GlobalStateMgr.getCurrentState().getAnalyzeMgr().addAnalyzeJob(job); + } + + /** + * Create a default job to analyze all tables in the system + */ + private NativeAnalyzeJob createDefaultJobAnalyzeAll() { + AnalyzeType analyzeType = Config.enable_collect_full_statistic ? AnalyzeType.FULL : AnalyzeType.SAMPLE; + return new NativeAnalyzeJob(StatsConstants.DEFAULT_ALL_ID, StatsConstants.DEFAULT_ALL_ID, + Collections.emptyList(), Collections.emptyList(), analyzeType, ScheduleType.SCHEDULE, Maps.newHashMap(), ScheduleStatus.PENDING, LocalDateTime.MIN); - GlobalStateMgr.getCurrentState().getAnalyzeMgr().addAnalyzeJob(nativeAnalyzeJob); } private boolean checkoutAnalyzeTime(LocalTime now) {