diff --git a/config/config.go b/config/config.go index 611c1453..62598b4b 100644 --- a/config/config.go +++ b/config/config.go @@ -221,6 +221,7 @@ type ClickHouse struct { TagsAdaptiveQueries int `toml:"tags-adaptive-queries" json:"tags-adaptive-queries" comment:"Tags adaptive queries (based on load average) for increase/decrease concurrent queries"` TagsLimiter limiter.ServerLimiter `toml:"-" json:"-"` + WildcardMinDistance int `toml:"wildcard-min-distance" json:"wildcard-min-distance" comment:"If a wildcard appears both at the start and the end of a plain query at a distance (in terms of nodes) less than wildcard-min-distance, then it will be discarded. This parameter can be used to discard expensive queries."` TagsMinInQuery int `toml:"tags-min-in-query" json:"tags-min-in-query" comment:"Minimum tags in seriesByTag query"` TagsMinInAutocomplete int `toml:"tags-min-in-autocomplete" json:"tags-min-in-autocomplete" comment:"Minimum tags in autocomplete query"` diff --git a/doc/config.md b/doc/config.md index e55560c7..396960e3 100644 --- a/doc/config.md +++ b/doc/config.md @@ -311,6 +311,8 @@ Only one tag used as filter for index field Tag1, see graphite_tagged table [str tags-concurrent-queries = 0 # Tags adaptive queries (based on load average) for increase/decrease concurrent queries tags-adaptive-queries = 0 + # If a wildcard appears both at the start and the end of a plain query at a distance (in terms of nodes) less than wildcard-min-distance, then it will be discarded. This parameter can be used to discard expensive queries. + wildcard-min-distance = 0 # Minimum tags in seriesByTag query tags-min-in-query = 0 # Minimum tags in autocomplete query diff --git a/finder/index.go b/finder/index.go index 6ec4a0f3..d187e9ac 100644 --- a/finder/index.go +++ b/finder/index.go @@ -155,15 +155,22 @@ func (idx *IndexFinder) whereFilter(query string, from int64, until int64) *wher return w } -func (idx *IndexFinder) validatePlainQuery(query string) error { +func (idx *IndexFinder) validatePlainQuery(query string, wildcardMinDistance int) error { if where.HasUnmatchedBrackets(query) { return errs.NewErrorWithCode("query has unmatched brackets", http.StatusBadRequest) } + + var maxDist = where.MaxWildcardDistance(query) + + if maxDist != -1 && maxDist < wildcardMinDistance { + return errs.NewErrorWithCode("query has wildcards way too early at the start and at the end of it", http.StatusBadRequest) + } + return nil } func (idx *IndexFinder) Execute(ctx context.Context, config *config.Config, query string, from int64, until int64, stat *FinderStat) (err error) { - err = idx.validatePlainQuery(query) + err = idx.validatePlainQuery(query, config.ClickHouse.WildcardMinDistance) if err != nil { return err } diff --git a/pkg/where/where.go b/pkg/where/where.go index e526ef45..23e5d76a 100644 --- a/pkg/where/where.go +++ b/pkg/where/where.go @@ -77,6 +77,19 @@ func IndexWildcard(target string) int { return strings.IndexAny(target, "[]{}*?") } +func MaxWildcardDistance(query string) int { + if !HasWildcard(query) { + return -1 + } + + w := IndexWildcard(query) + firstWildcardNode := strings.Count(query[:w], ".") + w = IndexLastWildcard(query) + lastWildcardNode := strings.Count(query[w:], ".") + + return max(firstWildcardNode, lastWildcardNode) +} + func NonRegexpPrefix(expr string) string { s := regexp.QuoteMeta(expr) for i := 0; i < len(expr); i++ { diff --git a/pkg/where/where_test.go b/pkg/where/where_test.go index 545f805f..ca09a004 100644 --- a/pkg/where/where_test.go +++ b/pkg/where/where_test.go @@ -72,3 +72,23 @@ func TestNonRegexpPrefix(t *testing.T) { assert.Equal(t, test.prefix, prefix, testName) } } + +func TestMaxWildcardDistance(t *testing.T) { + table := []struct { + glob string + dist int + }{ + {`a.b.c.d.e`, -1}, + {`test.*.foo.bar`, 2}, + {`test.foo.*.*.bar.count`, 2}, + {`test.foo.bar.*.bar.foo.test`, 3}, + {`test.foo.bar.foobar.*.middle.*.foobar.bar.foo.test`, 4}, + {`*.test.foo.bar.*`, 0}, + } + + for _, test := range table { + testName := fmt.Sprintf("glob: %#v", test.glob) + dist := MaxWildcardDistance(test.glob) + assert.Equal(t, test.dist, dist, testName) + } +} diff --git a/tests/wildcard_min_distance/carbon-clickhouse.conf.tpl b/tests/wildcard_min_distance/carbon-clickhouse.conf.tpl new file mode 100644 index 00000000..41d7ce56 --- /dev/null +++ b/tests/wildcard_min_distance/carbon-clickhouse.conf.tpl @@ -0,0 +1,45 @@ +[common] + +[data] +path = "/etc/carbon-clickhouse/data" +chunk-interval = "1s" +chunk-auto-interval = "" + +[upload.graphite_index] +type = "index" +table = "graphite_index" +url = "{{ .CLICKHOUSE_URL }}/" +timeout = "2m30s" +cache-ttl = "1h" + +[upload.graphite_tags] +type = "tagged" +table = "graphite_tags" +threads = 3 +url = "{{ .CLICKHOUSE_URL }}/" +timeout = "2m30s" +cache-ttl = "1h" + +[upload.graphite_reverse] +type = "points-reverse" +table = "graphite_reverse" +url = "{{ .CLICKHOUSE_URL }}/" +timeout = "2m30s" +zero-timestamp = false + +[upload.graphite] +type = "points" +table = "graphite" +url = "{{ .CLICKHOUSE_URL }}/" +timeout = "2m30s" +zero-timestamp = false + +[tcp] +listen = ":2003" +enabled = true +drop-future = "0s" +drop-past = "0s" + +[logging] +file = "/etc/carbon-clickhouse/carbon-clickhouse.log" +level = "debug" diff --git a/tests/wildcard_min_distance/graphite-clickhouse.conf.tpl b/tests/wildcard_min_distance/graphite-clickhouse.conf.tpl new file mode 100644 index 00000000..5c189bba --- /dev/null +++ b/tests/wildcard_min_distance/graphite-clickhouse.conf.tpl @@ -0,0 +1,35 @@ +[common] +listen = "{{ .GCH_ADDR }}" +max-cpu = 0 +max-metrics-in-render-answer = 10000 +max-metrics-per-target = 10000 +headers-to-log = [ "X-Ctx-Carbonapi-Uuid" ] + +[clickhouse] +url = "{{ .CLICKHOUSE_URL }}/?max_rows_to_read=500000000&max_result_bytes=1073741824&readonly=2&log_queries=1" +data-timeout = "30s" + +wildcard-min-distance = 1 + +index-table = "graphite_index" +index-use-daily = true +index-timeout = "1m" +internal-aggregation = true + +tagged-table = "graphite_tags" +tagged-autocomplete-days = 1 + +[[data-table]] +# # clickhouse table name +table = "graphite" +# # points in table are stored with reverse path +reverse = false +rollup-conf = "auto" + +[[logging]] +logger = "" +file = "{{ .GCH_DIR }}/graphite-clickhouse.log" +level = "info" +encoding = "json" +encoding-time = "iso8601" +encoding-duration = "seconds" diff --git a/tests/wildcard_min_distance/test.toml b/tests/wildcard_min_distance/test.toml new file mode 100644 index 00000000..a5ebd003 --- /dev/null +++ b/tests/wildcard_min_distance/test.toml @@ -0,0 +1,192 @@ +[test] +precision = "10s" + +[[test.clickhouse]] +version = "21.3" +dir = "tests/clickhouse/rollup" + +[[test.clickhouse]] +version = "22.8" +dir = "tests/clickhouse/rollup" + +[[test.clickhouse]] +version = "24.2" +dir = "tests/clickhouse/rollup" + +[test.carbon_clickhouse] +template = "carbon-clickhouse.conf.tpl" + +[[test.graphite_clickhouse]] +template = "graphite-clickhouse.conf.tpl" + +[[test.input]] +name = "team_one.prod.test.metric_one" +points = [{value = 1.0, time = "rnow-10"}] + +[[test.input]] +name = "team_two.stage.test.metric_one" +points = [{value = 1.0, time = "rnow-10"}] + +[[test.input]] +name = "team_one.dev.test.metric_two" +points = [{value = 1.0, time = "rnow-10"}] + +[[test.input]] +name = "team_one.dev.nontest.metric_one" +points = [{value = 1.0, time = "rnow-10"}] + +[[test.render_checks]] +from = "rnow-10" +until = "rnow+1" +timeout = "1h" +targets = [ + "team_one.prod.test.metric_one", +] + +[[test.render_checks.result]] +name = "team_one.prod.test.metric_one" +path = "team_one.prod.test.metric_one" +consolidation = "avg" +start = "rnow-10" +stop = "rnow+10" +step = 10 +req_start = "rnow-10" +req_stop = "rnow+10" +values = [1.0, nan] + + +[[test.render_checks]] +from = "rnow-10" +until = "rnow+1" +timeout = "1h" +targets = [ + "*.dev.test.metric_two", +] + +[[test.render_checks.result]] +name = "team_one.dev.test.metric_two" +path = "*.dev.test.metric_two" +consolidation = "avg" +start = "rnow-10" +stop = "rnow+10" +step = 10 +req_start = "rnow-10" +req_stop = "rnow+10" +values = [1.0, nan] + +[[test.render_checks]] +from = "rnow-10" +until = "rnow+1" +timeout = "1h" +targets = [ + "*.*.test.metric_one", +] + +[[test.render_checks.result]] +name = "team_one.prod.test.metric_one" +path = "*.*.test.metric_one" +consolidation = "avg" +start = "rnow-10" +stop = "rnow+10" +step = 10 +req_start = "rnow-10" +req_stop = "rnow+10" +values = [1.0, nan] + +[[test.render_checks.result]] +name = "team_two.stage.test.metric_one" +path = "*.*.test.metric_one" +consolidation = "avg" +start = "rnow-10" +stop = "rnow+10" +step = 10 +req_start = "rnow-10" +req_stop = "rnow+10" +values = [1.0, nan] + + +[[test.render_checks]] +from = "rnow-10" +until = "rnow+1" +timeout = "1h" +targets = [ + "team_two.stage.test.*", +] + +[[test.render_checks.result]] +name = "team_two.stage.test.metric_one" +path = "team_two.stage.test.*" +consolidation = "avg" +start = "rnow-10" +stop = "rnow+10" +step = 10 +req_start = "rnow-10" +req_stop = "rnow+10" +values = [1.0, nan] + +[[test.render_checks]] +from = "rnow-10" +until = "rnow+1" +timeout = "1h" +targets = [ + "team_one.*.test.*", +] + +[[test.render_checks.result]] +name = "team_one.prod.test.metric_one" +path = "team_one.*.test.*" +consolidation = "avg" +start = "rnow-10" +stop = "rnow+10" +step = 10 +req_start = "rnow-10" +req_stop = "rnow+10" +values = [1.0, nan] + +[[test.render_checks.result]] +name = "team_one.dev.test.metric_two" +path = "team_one.*.test.*" +consolidation = "avg" +start = "rnow-10" +stop = "rnow+10" +step = 10 +req_start = "rnow-10" +req_stop = "rnow+10" +values = [1.0, nan] + +[[test.render_checks]] +from = "rnow-10" +until = "rnow+1" +timeout = "1h" +targets = [ + "*.prod.test.*", +] +error_regexp = "^400: query has wildcards way too early at the start and at the end of it" + +[[test.render_checks]] +from = "rnow-10" +until = "rnow+1" +timeout = "1h" +targets = [ + "*.*.test.*", +] +error_regexp = "^400: query has wildcards way too early at the start and at the end of it" + +[[test.render_checks]] +from = "rnow-10" +until = "rnow+1" +timeout = "1h" +targets = [ + "*.*.*.*", +] +error_regexp = "^400: query has wildcards way too early at the start and at the end of it" + + +[[test.render_checks]] +from = "rnow-10" +until = "rnow+1" +timeout = "1h" +targets = [ + "*.*", +] +error_regexp = "^400: query has wildcards way too early at the start and at the end of it" \ No newline at end of file