From 63d37149040308a50154f76123dbc77ee01dba53 Mon Sep 17 00:00:00 2001 From: Florent Poinsard Date: Tue, 26 Nov 2024 11:35:57 -0600 Subject: [PATCH] More advanced refactoring of the Run method Signed-off-by: Florent Poinsard --- go/summarize/markdown.go | 216 +++++++++++++ go/summarize/reading.go | 72 +++-- go/summarize/reading_test.go | 59 ---- go/summarize/summarize-keys.go | 283 +----------------- go/summarize/summarize-keys_test.go | 36 ++- go/summarize/summarize-trace.go | 20 +- go/summarize/summarize.go | 96 ++++-- go/summarize/summarize_test.go | 14 +- .../bigger_slow_log_avg-rows-examined.md | 2 +- go/testdata/bigger_slow_log_avg-time.md | 2 +- .../bigger_slow_log_total-rows-examined.md | 2 +- go/testdata/bigger_slow_log_total-time.md | 2 +- go/testdata/bigger_slow_log_usage-count.md | 2 +- go/testdata/keys-summary.md | 2 +- 14 files changed, 393 insertions(+), 415 deletions(-) create mode 100644 go/summarize/markdown.go delete mode 100644 go/summarize/reading_test.go diff --git a/go/summarize/markdown.go b/go/summarize/markdown.go new file mode 100644 index 0000000..93bbfa8 --- /dev/null +++ b/go/summarize/markdown.go @@ -0,0 +1,216 @@ +/* +Copyright 2024 The Vitess Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package summarize + +import ( + "fmt" + "maps" + "slices" + "sort" + "strconv" + + "vitess.io/vitess/go/vt/vtgate/planbuilder/operators" + + "github.com/vitessio/vt/go/keys" + "github.com/vitessio/vt/go/markdown" +) + +func renderHotQueries(md *markdown.MarkDown, queries []keys.QueryAnalysisResult, metricReader getMetric) { + if len(queries) == 0 { + return + } + + hasTime := false + // Sort the queries in descending order of hotness + sort.Slice(queries, func(i, j int) bool { + if queries[i].QueryTime != 0 { + hasTime = true + } + return metricReader(queries[i]) > metricReader(queries[j]) + }) + + if !hasTime { + return + } + + md.PrintHeader("Top Queries", 2) + + // Prepare table headers and rows + headers := []string{"Query ID", "Usage Count", "Total Query Time (ms)", "Avg Query Time (ms)", "Total Rows Examined"} + var rows [][]string + + for i, query := range queries { + queryID := fmt.Sprintf("Q%d", i+1) + avgQueryTime := query.QueryTime / float64(query.UsageCount) + rows = append(rows, []string{ + queryID, + strconv.Itoa(query.UsageCount), + fmt.Sprintf("%.2f", query.QueryTime), + fmt.Sprintf("%.2f", avgQueryTime), + strconv.Itoa(query.RowsExamined), + }) + } + + // Print the table + md.PrintTable(headers, rows) + + // After the table, list the full queries with their IDs + md.PrintHeader("Query Details", 3) + for i, query := range queries { + queryID := fmt.Sprintf("Q%d", i+1) + md.PrintHeader(queryID, 4) + md.Println("```sql") + md.Println(query.QueryStructure) + md.Println("```") + md.NewLine() + } +} + +func renderTableUsage(md *markdown.MarkDown, tableSummaries []*TableSummary, includeRowCount bool) { + if len(tableSummaries) == 0 { + return + } + + sort.Slice(tableSummaries, func(i, j int) bool { + if tableSummaries[i].UseCount() == tableSummaries[j].UseCount() { + return tableSummaries[i].Table < tableSummaries[j].Table + } + return tableSummaries[i].UseCount() > tableSummaries[j].UseCount() + }) + + md.PrintHeader("Tables", 2) + renderTableOverview(md, tableSummaries, includeRowCount) + + md.PrintHeader("Column Usage", 3) + for _, summary := range tableSummaries { + renderColumnUsageTable(md, summary) + } +} + +func renderTableOverview(md *markdown.MarkDown, tableSummaries []*TableSummary, includeRowCount bool) { + headers := []string{"Table Name", "Reads", "Writes"} + if includeRowCount { + headers = append(headers, "Number of Rows") + } + var rows [][]string + for _, summary := range tableSummaries { + thisRow := []string{ + summary.Table, + strconv.Itoa(summary.ReadQueryCount), + strconv.Itoa(summary.WriteQueryCount), + } + if includeRowCount { + thisRow = append(thisRow, strconv.Itoa(summary.RowCount)) + } + + rows = append(rows, thisRow) + } + md.PrintTable(headers, rows) +} + +func renderColumnUsageTable(md *markdown.MarkDown, summary *TableSummary) { + md.PrintHeader(fmt.Sprintf("Table: `%s` (%d reads and %d writes)", summary.Table, summary.ReadQueryCount, summary.WriteQueryCount), 4) + + headers := []string{"Column", "Position", "Used %"} + var rows [][]string + var lastName string + for colInfo, usage := range summary.GetColumns() { + name := colInfo.Name + if lastName == name { + name = "" + } else { + lastName = name + } + rows = append(rows, []string{ + name, + colInfo.Pos.String(), + fmt.Sprintf("%.0f%%", usage.Percentage), + }) + } + + md.PrintTable(headers, rows) +} + +func renderTablesJoined(md *markdown.MarkDown, summary *Summary) { + if len(summary.queryGraph) > 0 { + md.PrintHeader("Tables Joined", 2) + } + + type joinDetails struct { + Tbl1, Tbl2 string + Occurrences int + predicates []operators.JoinPredicate + } + + var joins []joinDetails + for tables, predicates := range summary.queryGraph { + occurrences := 0 + for _, count := range predicates { + occurrences += count + } + joinPredicates := slices.Collect(maps.Keys(predicates)) + sort.Slice(joinPredicates, func(i, j int) bool { + return joinPredicates[i].String() < joinPredicates[j].String() + }) + joins = append(joins, joinDetails{ + Tbl1: tables.Tbl1, + Tbl2: tables.Tbl2, + Occurrences: occurrences, + predicates: joinPredicates, + }) + } + + sort.Slice(joins, func(i, j int) bool { + if joins[i].Occurrences != joins[j].Occurrences { + return joins[i].Occurrences > joins[j].Occurrences + } + if joins[i].Tbl1 != joins[j].Tbl1 { + return joins[i].Tbl1 < joins[j].Tbl1 + } + return joins[i].Tbl2 < joins[j].Tbl2 + }) + + md.Println("```") + for _, join := range joins { + md.Printf("%s ↔ %s (Occurrences: %d)\n", join.Tbl1, join.Tbl2, join.Occurrences) + for i, pred := range join.predicates { + var s string + if i == len(join.predicates)-1 { + s = "└─" + } else { + s = "├─" + } + md.Printf("%s %s\n", s, pred.String()) + } + md.NewLine() + } + md.Println("```") +} + +func renderFailures(md *markdown.MarkDown, failures []FailuresSummary) { + if len(failures) == 0 { + return + } + md.PrintHeader("Failures", 2) + + headers := []string{"Error", "Count"} + var rows [][]string + for _, failure := range failures { + rows = append(rows, []string{failure.Error, strconv.Itoa(failure.Count)}) + } + md.PrintTable(headers, rows) +} diff --git a/go/summarize/reading.go b/go/summarize/reading.go index 4020059..a7918d2 100644 --- a/go/summarize/reading.go +++ b/go/summarize/reading.go @@ -19,49 +19,35 @@ package summarize import ( "encoding/json" "errors" - "io" "os" "sort" "strconv" "github.com/vitessio/vt/go/keys" + "github.com/vitessio/vt/go/schema" ) -func readTraceFile(fi fileInfo) (readingSummary, error) { +func readTraceFile(fi fileInfo) traceSummary { switch fi.fileType { case traceFile: - return readTracedQueryFile(fi.filename), nil - case keysFile: - return readAnalysedQueryFile(fi.filename), nil + return readTracedQueryFile(fi.filename) default: - return readingSummary{}, errors.New("unknown file format") + panic("Unsupported file type") } } -func getDecoderAndDelim(file *os.File) (*json.Decoder, json.Delim) { - // Create a decoder - decoder := json.NewDecoder(file) - - // Read the opening bracket - val, err := decoder.Token() - if err != nil { - exit("Error reading json: " + err.Error()) - } - delim, ok := val.(json.Delim) - if !ok { - exit("Error reading json: expected delimiter") - } - - // Reset the file pointer to the beginning - _, err = file.Seek(0, io.SeekStart) - if err != nil { - exit("Error rewinding file: " + err.Error()) +func readFile(fi fileInfo) (func(s *Summary) error, error) { + switch fi.fileType { + case keysFile: + return readAnalysedQueryFile(fi.filename), nil + case dbInfoFile: + return readDBInfoFile(fi.filename), nil + default: + return nil, errors.New("unknown file format") } - decoder = json.NewDecoder(file) - return decoder, delim } -func readTracedQueryFile(fileName string) readingSummary { +func readTracedQueryFile(fileName string) traceSummary { c, err := os.ReadFile(fileName) if err != nil { exit("Error opening file: " + err.Error()) @@ -89,13 +75,13 @@ func readTracedQueryFile(fileName string) readingSummary { return a < b }) - return readingSummary{ + return traceSummary{ Name: fileName, TracedQueries: to.Queries, } } -func readAnalysedQueryFile(fileName string) readingSummary { +func readAnalysedQueryFile(fileName string) func(s *Summary) error { c, err := os.ReadFile(fileName) if err != nil { exit("Error opening file: " + err.Error()) @@ -107,8 +93,30 @@ func readAnalysedQueryFile(fileName string) readingSummary { exit("Error parsing json: " + err.Error()) } - return readingSummary{ - Name: fileName, - AnalysedQueries: &ko, + return func(s *Summary) error { + s.analyzedFiles = append(s.analyzedFiles, fileName) + summarizeKeysQueries(s, &ko) + return nil + } +} + +func readDBInfoFile(fileName string) func(s *Summary) error { + schemaInfo, err := schema.Load(fileName) + if err != nil { + panic(err) + } + + return func(s *Summary) error { + s.analyzedFiles = append(s.analyzedFiles, fileName) + s.hasRowCount = true + for _, ti := range schemaInfo.Tables { + table := s.GetTable(ti.Name) + if table == nil { + table = &TableSummary{Table: ti.Name} + s.AddTable(table) + } + table.RowCount = ti.Rows + } + return nil } } diff --git a/go/summarize/reading_test.go b/go/summarize/reading_test.go deleted file mode 100644 index fb116a1..0000000 --- a/go/summarize/reading_test.go +++ /dev/null @@ -1,59 +0,0 @@ -/* -Copyright 2024 The Vitess Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package summarize - -import ( - "encoding/json" - "fmt" - "os" - "testing" - - "github.com/stretchr/testify/require" -) - -func initFile(t *testing.T, firstToken rune) *os.File { - f, err := os.CreateTemp("", "") - require.NoError(t, err) - - require.NoError(t, os.WriteFile(f.Name(), []byte(fmt.Sprintf("%c{\"value\": 1}", firstToken)), 0o600)) - return f -} - -func TestGetDecoderAndDelim(t *testing.T) { - tests := []struct { - name string - wantDelim rune - }{ - { - name: "[ delim", - wantDelim: '[', - }, - { - name: "{ delim", - wantDelim: '{', - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - f := initFile(t, tt.wantDelim) - defer f.Close() - - _, delim := getDecoderAndDelim(f) - require.Equal(t, json.Delim(tt.wantDelim), delim) - }) - } -} diff --git a/go/summarize/summarize-keys.go b/go/summarize/summarize-keys.go index cbf1b8a..519e8b8 100644 --- a/go/summarize/summarize-keys.go +++ b/go/summarize/summarize-keys.go @@ -18,21 +18,15 @@ package summarize import ( "fmt" - "io" "iter" - "maps" "slices" "sort" - "strconv" - "time" "vitess.io/vitess/go/slice" "vitess.io/vitess/go/vt/sqlparser" "vitess.io/vitess/go/vt/vtgate/planbuilder/operators" "github.com/vitessio/vt/go/keys" - "github.com/vitessio/vt/go/markdown" - "github.com/vitessio/vt/go/schema" ) const HotQueryCount = 10 @@ -50,12 +44,6 @@ type ( Pos Position } - Summary struct { - tables []*TableSummary - failures []FailuresSummary - hotQueries []keys.QueryAnalysisResult - } - TableSummary struct { Table string ReadQueryCount int @@ -78,19 +66,6 @@ type ( queryGraph map[graphKey]map[operators.JoinPredicate]int ) -func (s *Summary) GetTable(name string) *TableSummary { - for _, table := range s.tables { - if table.Table == name { - return table - } - } - return nil -} - -func (s *Summary) AddTable(table *TableSummary) { - s.tables = append(s.tables, table) -} - const ( Join Position = iota JoinRange @@ -165,59 +140,6 @@ func (ts TableSummary) UseCount() int { return ts.ReadQueryCount + ts.WriteQueryCount } -// printKeysSummary goes over all the analysed queries, gathers information about column usage per table, -// and prints this summary information to the output. -func printKeysSummary(out io.Writer, fileName string, analysedQueries *keys.Output, now time.Time, hotMetric, schemaInfoPath string) { - var err error - summary := &Summary{} - - metricReader := getMetricForHotness(hotMetric) - - err = summarizeSchemaInfo(summary, schemaInfoPath) - if err != nil { - panic(err) - } - - summarizeKeysQueries(summary, analysedQueries, metricReader) - md := &markdown.MarkDown{} - msg := `# Query Analysis Report - -**Date of Analysis**: %s -**Analyzed File**: ` + "`%s`" + ` - -` - md.Printf(msg, now.Format(time.DateTime), fileName) - renderHotQueries(md, summary.hotQueries, metricReader) - renderTableUsage(summary.tables, md, schemaInfoPath != "") - renderTablesJoined(md, analysedQueries) - renderFailures(md, summary.failures) - - _, err = md.WriteTo(out) - if err != nil { - panic(err) - } -} - -func summarizeSchemaInfo(summary *Summary, schemaInfoPath string) error { - if schemaInfoPath == "" { - return nil - } - schemaInfo, err := schema.Load(schemaInfoPath) - if err != nil { - return err - } - for _, ti := range schemaInfo.Tables { - table := summary.GetTable(ti.Name) - if table == nil { - table = &TableSummary{Table: ti.Name} - summary.AddTable(table) - } - table.RowCount = ti.Rows - } - - return nil -} - type getMetric = func(q keys.QueryAnalysisResult) float64 func getMetricForHotness(metric string) getMetric { @@ -248,122 +170,6 @@ func getMetricForHotness(metric string) getMetric { } } -func renderHotQueries(md *markdown.MarkDown, queries []keys.QueryAnalysisResult, metricReader getMetric) { - if len(queries) == 0 { - return - } - - hasTime := false - // Sort the queries in descending order of hotness - sort.Slice(queries, func(i, j int) bool { - if queries[i].QueryTime != 0 { - hasTime = true - } - return metricReader(queries[i]) > metricReader(queries[j]) - }) - - if !hasTime { - return - } - - md.PrintHeader("Top Queries", 2) - - // Prepare table headers and rows - headers := []string{"Query ID", "Usage Count", "Total Query Time (ms)", "Avg Query Time (ms)", "Total Rows Examined"} - var rows [][]string - - for i, query := range queries { - queryID := fmt.Sprintf("Q%d", i+1) - avgQueryTime := query.QueryTime / float64(query.UsageCount) - rows = append(rows, []string{ - queryID, - strconv.Itoa(query.UsageCount), - fmt.Sprintf("%.2f", query.QueryTime), - fmt.Sprintf("%.2f", avgQueryTime), - strconv.Itoa(query.RowsExamined), - }) - } - - // Print the table - md.PrintTable(headers, rows) - - // After the table, list the full queries with their IDs - md.PrintHeader("Query Details", 3) - for i, query := range queries { - queryID := fmt.Sprintf("Q%d", i+1) - md.PrintHeader(queryID, 4) - md.Println("```sql") - md.Println(query.QueryStructure) - md.Println("```") - md.NewLine() - } -} - -func renderTableUsage(tableSummaries []*TableSummary, md *markdown.MarkDown, includeRowCount bool) { - if len(tableSummaries) == 0 { - return - } - - sort.Slice(tableSummaries, func(i, j int) bool { - if tableSummaries[i].UseCount() == tableSummaries[j].UseCount() { - return tableSummaries[i].Table < tableSummaries[j].Table - } - return tableSummaries[i].UseCount() > tableSummaries[j].UseCount() - }) - - md.PrintHeader("Tables", 2) - renderTableOverview(md, tableSummaries, includeRowCount) - - md.PrintHeader("Column Usage", 3) - for _, summary := range tableSummaries { - renderColumnUsageTable(md, summary) - } -} - -func renderTableOverview(md *markdown.MarkDown, tableSummaries []*TableSummary, includeRowCount bool) { - headers := []string{"Table Name", "Reads", "Writes"} - if includeRowCount { - headers = append(headers, "Number of Rows") - } - var rows [][]string - for _, summary := range tableSummaries { - thisRow := []string{ - summary.Table, - strconv.Itoa(summary.ReadQueryCount), - strconv.Itoa(summary.WriteQueryCount), - } - if includeRowCount { - thisRow = append(thisRow, strconv.Itoa(summary.RowCount)) - } - - rows = append(rows, thisRow) - } - md.PrintTable(headers, rows) -} - -func renderColumnUsageTable(md *markdown.MarkDown, summary *TableSummary) { - md.PrintHeader(fmt.Sprintf("Table: `%s` (%d reads and %d writes)", summary.Table, summary.ReadQueryCount, summary.WriteQueryCount), 4) - - headers := []string{"Column", "Position", "Used %"} - var rows [][]string - var lastName string - for colInfo, usage := range summary.GetColumns() { - name := colInfo.Name - if lastName == name { - name = "" - } else { - lastName = name - } - rows = append(rows, []string{ - name, - colInfo.Pos.String(), - fmt.Sprintf("%.0f%%", usage.Percentage), - }) - } - - md.PrintTable(headers, rows) -} - func (g queryGraph) AddJoinPredicate(key graphKey, pred operators.JoinPredicate) { if in, exists := g[key]; exists { in[pred]++ @@ -373,84 +179,6 @@ func (g queryGraph) AddJoinPredicate(key graphKey, pred operators.JoinPredicate) g[key] = map[operators.JoinPredicate]int{pred: 1} } -func renderTablesJoined(md *markdown.MarkDown, summary *keys.Output) { - g := make(queryGraph) - for _, query := range summary.Queries { - for _, pred := range query.JoinPredicates { - key := makeKey(pred.LHS, pred.RHS) - g.AddJoinPredicate(key, pred) - } - } - - if len(g) > 0 { - md.PrintHeader("Tables Joined", 2) - } - - type joinDetails struct { - Tbl1, Tbl2 string - Occurrences int - predicates []operators.JoinPredicate - } - - var joins []joinDetails - for tables, predicates := range g { - occurrences := 0 - for _, count := range predicates { - occurrences += count - } - joinPredicates := slices.Collect(maps.Keys(predicates)) - sort.Slice(joinPredicates, func(i, j int) bool { - return joinPredicates[i].String() < joinPredicates[j].String() - }) - joins = append(joins, joinDetails{ - Tbl1: tables.Tbl1, - Tbl2: tables.Tbl2, - Occurrences: occurrences, - predicates: joinPredicates, - }) - } - - sort.Slice(joins, func(i, j int) bool { - if joins[i].Occurrences != joins[j].Occurrences { - return joins[i].Occurrences > joins[j].Occurrences - } - if joins[i].Tbl1 != joins[j].Tbl1 { - return joins[i].Tbl1 < joins[j].Tbl1 - } - return joins[i].Tbl2 < joins[j].Tbl2 - }) - - md.Println("```") - for _, join := range joins { - md.Printf("%s ↔ %s (Occurrences: %d)\n", join.Tbl1, join.Tbl2, join.Occurrences) - for i, pred := range join.predicates { - var s string - if i == len(join.predicates)-1 { - s = "└─" - } else { - s = "├─" - } - md.Printf("%s %s\n", s, pred.String()) - } - md.NewLine() - } - md.Println("```") -} - -func renderFailures(md *markdown.MarkDown, failures []FailuresSummary) { - if len(failures) == 0 { - return - } - md.PrintHeader("Failures", 2) - - headers := []string{"Error", "Count"} - var rows [][]string - for _, failure := range failures { - rows = append(rows, []string{failure.Error, strconv.Itoa(failure.Count)}) - } - md.PrintTable(headers, rows) -} - // makeKey creates a graph key from two columns. The key is always sorted in ascending order. func makeKey(lhs, rhs operators.Column) graphKey { if lhs.Table < rhs.Table { @@ -460,7 +188,7 @@ func makeKey(lhs, rhs operators.Column) graphKey { return graphKey{rhs.Table, lhs.Table} } -func summarizeKeysQueries(summary *Summary, queries *keys.Output, metricReader getMetric) { +func summarizeKeysQueries(summary *Summary, queries *keys.Output) { tableSummaries := make(map[string]*TableSummary) tableUsageWriteCounts := make(map[string]int) tableUsageReadCounts := make(map[string]int) @@ -468,7 +196,7 @@ func summarizeKeysQueries(summary *Summary, queries *keys.Output, metricReader g // First pass: collect all data and count occurrences for _, query := range queries.Queries { gatherTableInfo(query, tableSummaries, tableUsageWriteCounts, tableUsageReadCounts) - checkQueryForHotness(&summary.hotQueries, query, metricReader) + checkQueryForHotness(&summary.hotQueries, query, summary.hotQueryFn) } // Second pass: calculate percentages @@ -511,6 +239,13 @@ func summarizeKeysQueries(summary *Summary, queries *keys.Output, metricReader g }) } summary.failures = failures + + for _, query := range queries.Queries { + for _, pred := range query.JoinPredicates { + key := makeKey(pred.LHS, pred.RHS) + summary.queryGraph.AddJoinPredicate(key, pred) + } + } } func checkQueryForHotness(hotQueries *[]keys.QueryAnalysisResult, query keys.QueryAnalysisResult, metricReader getMetric) { diff --git a/go/summarize/summarize-keys_test.go b/go/summarize/summarize-keys_test.go index 7c01bed..fc5fdc8 100644 --- a/go/summarize/summarize-keys_test.go +++ b/go/summarize/summarize-keys_test.go @@ -67,12 +67,31 @@ func TestTableSummary(t *testing.T) { } func TestSummarizeKeysFile(t *testing.T) { - file, err := readTraceFile(fileInfo{filename: "../testdata/keys-log.json", fileType: keysFile}) - require.NoError(t, err) sb := &strings.Builder{} now := time.Date(2024, time.January, 1, 1, 2, 3, 0, time.UTC) - printKeysSummary(sb, file.Name, file.AnalysedQueries, now, "", "../testdata/keys-schema-info.json") + fnKeys, err := readFile(fileInfo{ + filename: "../testdata/keys-log.json", + fileType: keysFile, + }) + require.NoError(t, err) + + fnSchemaInfo, err := readFile(fileInfo{ + filename: "../testdata/keys-schema-info.json", + fileType: dbInfoFile, + }) + require.NoError(t, err) + + s := NewSummary("") + + err = fnKeys(s) + require.NoError(t, err) + + err = fnSchemaInfo(s) + require.NoError(t, err) + + s.PrintMarkdown(sb, now) + expected, err := os.ReadFile("../testdata/keys-summary.md") require.NoError(t, err) assert.Equal(t, string(expected), sb.String()) @@ -92,11 +111,18 @@ func TestSummarizeKeysWithHotnessFile(t *testing.T) { for _, metric := range tests { t.Run(metric, func(t *testing.T) { - file, err := readTraceFile(fileInfo{filename: "../testdata/bigger_slow_query_log.json", fileType: keysFile}) + fn, err := readFile(fileInfo{filename: "../testdata/bigger_slow_query_log.json", fileType: keysFile}) require.NoError(t, err) sb := &strings.Builder{} now := time.Date(2024, time.January, 1, 1, 2, 3, 0, time.UTC) - printKeysSummary(sb, file.Name, file.AnalysedQueries, now, metric, "") + + s := NewSummary(metric) + + err = fn(s) + require.NoError(t, err) + + s.PrintMarkdown(sb, now) + expected, err := os.ReadFile(fmt.Sprintf("../testdata/bigger_slow_log_%s.md", metric)) require.NoError(t, err) assert.Equal(t, string(expected), sb.String()) diff --git a/go/summarize/summarize-trace.go b/go/summarize/summarize-trace.go index c650ca1..744ae58 100644 --- a/go/summarize/summarize-trace.go +++ b/go/summarize/summarize-trace.go @@ -62,9 +62,9 @@ func visit(trace Trace, f func(Trace)) { } } -func summarizeTraces(file readingSummary) map[string]QuerySummary { +func summarizeTraces(tq []TracedQuery) map[string]QuerySummary { summary := make(map[string]QuerySummary) - for _, traceElement := range file.TracedQueries { + for _, traceElement := range tq { summary[traceElement.Query] = summarizeTrace(traceElement) } return summary @@ -100,15 +100,15 @@ func summarizeTrace(t TracedQuery) QuerySummary { return summary } -func compareTraces(out io.Writer, termWidth int, highLighter Highlighter, file1, file2 readingSummary) { - summary1 := summarizeTraces(file1) - summary2 := summarizeTraces(file2) +func compareTraces(out io.Writer, termWidth int, highLighter Highlighter, tq1, tq2 traceSummary) { + summary1 := summarizeTraces(tq1.TracedQueries) + summary2 := summarizeTraces(tq2.TracedQueries) var significantChanges, totalQueries int var s1RouteCalls, s1DataSent, s1MemoryRows, s1ShardsQueried int var s2RouteCalls, s2DataSent, s2MemoryRows, s2ShardsQueried int - for _, q := range file1.TracedQueries { + for _, q := range tq1.TracedQueries { s1, ok1 := summary1[q.Query] s2, ok2 := summary2[q.Query] if !ok1 || !ok2 { @@ -117,7 +117,7 @@ func compareTraces(out io.Writer, termWidth int, highLighter Highlighter, file1, totalQueries++ table := tablewriter.NewWriter(out) - table.SetHeader([]string{"Metric", file1.Name, file2.Name, "Diff", "% Change"}) + table.SetHeader([]string{"Metric", tq1.Name, tq2.Name, "Diff", "% Change"}) table.SetAutoFormatHeaders(false) m1 := compareMetric(table, "Route Calls", s1.RouteCalls, s2.RouteCalls) @@ -180,9 +180,9 @@ func compareMetric(table *tablewriter.Table, metricName string, val1, val2 int) return percentChange < -significantChangeThreshold } -func printTraceSummary(out io.Writer, termWidth int, highLighter Highlighter, file readingSummary) { - summary := summarizeTraces(file) - for i, query := range file.TracedQueries { +func printTraceSummary(out io.Writer, termWidth int, highLighter Highlighter, tq traceSummary) { + summary := summarizeTraces(tq.TracedQueries) + for i, query := range tq.TracedQueries { if i > 0 { fmt.Fprintln(out) } diff --git a/go/summarize/summarize.go b/go/summarize/summarize.go index a3b4410..bc6c241 100644 --- a/go/summarize/summarize.go +++ b/go/summarize/summarize.go @@ -27,15 +27,23 @@ import ( "golang.org/x/term" "github.com/vitessio/vt/go/keys" + "github.com/vitessio/vt/go/markdown" ) type ( - readingSummary struct { - Name string + Summary struct { + tables []*TableSummary + failures []FailuresSummary + hotQueries []keys.QueryAnalysisResult + hotQueryFn getMetric + analyzedFiles []string + queryGraph queryGraph + hasRowCount bool + } - // Only one of these fields will be populated - TracedQueries []TracedQuery // Set when analyzing a 'vt tester --trace' output - AnalysedQueries *keys.Output // Set when analyzing a 'vt keys' output + traceSummary struct { + Name string + TracedQueries []TracedQuery } fileInfo struct { @@ -44,22 +52,23 @@ type ( } ) +func NewSummary(hotMetric string) *Summary { + return &Summary{ + queryGraph: make(queryGraph), + hotQueryFn: getMetricForHotness(hotMetric), + } +} + func Run(files []string, hotMetric string) { - var dbInfoPath string var filesToRead []fileInfo var hasTrace bool - // if we have tracefiles, handle them - // otherwise, create a summary and feed it to all json inputs - // move rendering to this spot - // todo: add file types for other json types. Right now just checks for dbinfo files, else defaults - for _, file := range files { typ, _ := getFileType(file) switch typ { case dbInfoFile: fmt.Printf("dbinfo file: %s\n", file) - dbInfoPath = file + filesToRead = append(filesToRead, fileInfo{filename: file, fileType: dbInfoFile}) case transactionFile: fmt.Printf("transaction file: %s\n", file) case traceFile: @@ -73,25 +82,70 @@ func Run(files []string, hotMetric string) { } checkTraceConditions(hasTrace, filesToRead, hotMetric) - rs := make([]readingSummary, len(filesToRead)) + if hasTrace { + if len(filesToRead) == 2 { + compareTraces(os.Stdout, terminalWidth(), highlightQuery, readTraceFile(filesToRead[0]), readTraceFile(filesToRead[1])) + } else { + printTraceSummary(os.Stdout, terminalWidth(), highlightQuery, readTraceFile(filesToRead[0])) + } + return + } + + s := NewSummary(hotMetric) + + rFuncs := make([]func(s *Summary) error, len(filesToRead)) var err error for i, f := range filesToRead { - rs[i], err = readTraceFile(f) + rFuncs[i], err = readFile(f) if err != nil { exit(err.Error()) } } - if hasTrace { - if len(rs) == 2 { - compareTraces(os.Stdout, terminalWidth(), highlightQuery, rs[0], rs[1]) - } else { - printTraceSummary(os.Stdout, terminalWidth(), highlightQuery, rs[0]) + for _, f := range rFuncs { + err = f(s) + if err != nil { + panic(err) } - return } + s.PrintMarkdown(os.Stdout, time.Now()) +} + +func (s *Summary) PrintMarkdown(out io.Writer, now time.Time) { + md := &markdown.MarkDown{} + msg := `# Query Analysis Report + +**Date of Analysis**: %s +**Analyzed Files**: ` + "%s" + ` + +` + + for i, file := range s.analyzedFiles { + s.analyzedFiles[i] = "`" + file + "`" + } + md.Printf(msg, now.Format(time.DateTime), strings.Join(s.analyzedFiles, ", ")) + renderHotQueries(md, s.hotQueries, s.hotQueryFn) + renderTableUsage(md, s.tables, s.hasRowCount) + renderTablesJoined(md, s) + renderFailures(md, s.failures) + + _, err := md.WriteTo(out) + if err != nil { + panic(err) + } +} + +func (s *Summary) GetTable(name string) *TableSummary { + for _, table := range s.tables { + if table.Table == name { + return table + } + } + return nil +} - printKeysSummary(os.Stdout, rs[0].Name, rs[0].AnalysedQueries, time.Now(), hotMetric, dbInfoPath) +func (s *Summary) AddTable(table *TableSummary) { + s.tables = append(s.tables, table) } func checkTraceConditions(hasTrace bool, filesToRead []fileInfo, hotMetric string) { diff --git a/go/summarize/summarize_test.go b/go/summarize/summarize_test.go index ed24032..1eb0089 100644 --- a/go/summarize/summarize_test.go +++ b/go/summarize/summarize_test.go @@ -21,11 +21,10 @@ import ( "testing" "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" ) -func tf1() readingSummary { - return readingSummary{ +func tf1() traceSummary { + return traceSummary{ Name: "test", TracedQueries: []TracedQuery{{ Query: "select * from music", @@ -74,8 +73,8 @@ func tf1() readingSummary { } } -func tf2() readingSummary { - return readingSummary{ +func tf2() traceSummary { + return traceSummary{ Name: "test", TracedQueries: []TracedQuery{{ Query: "select * from music", @@ -184,10 +183,9 @@ Summary: } func TestSummarizeTraceFile(t *testing.T) { - file, err := readTraceFile(fileInfo{filename: "../testdata/trace-log.json", fileType: traceFile}) - require.NoError(t, err) + tq := readTraceFile(fileInfo{filename: "../testdata/trace-log.json", fileType: traceFile}) sb := &strings.Builder{} - printTraceSummary(sb, 80, noHighlight, file) + printTraceSummary(sb, 80, noHighlight, tq) expected := `Query: INSERT INTO region (R_REGIONKEY, R_NAME, R_COMMENT) VALUES (1, 'ASIA',... Line # 80 +-------------+-----------+----------------+----------------+ diff --git a/go/testdata/bigger_slow_log_avg-rows-examined.md b/go/testdata/bigger_slow_log_avg-rows-examined.md index 968e154..6e4dbdf 100644 --- a/go/testdata/bigger_slow_log_avg-rows-examined.md +++ b/go/testdata/bigger_slow_log_avg-rows-examined.md @@ -1,7 +1,7 @@ # Query Analysis Report **Date of Analysis**: 2024-01-01 01:02:03 -**Analyzed File**: `../testdata/bigger_slow_query_log.json` +**Analyzed Files**: `../testdata/bigger_slow_query_log.json` ## Top Queries |Query ID|Usage Count|Total Query Time (ms)|Avg Query Time (ms)|Total Rows Examined| diff --git a/go/testdata/bigger_slow_log_avg-time.md b/go/testdata/bigger_slow_log_avg-time.md index 4d70414..ee2e478 100644 --- a/go/testdata/bigger_slow_log_avg-time.md +++ b/go/testdata/bigger_slow_log_avg-time.md @@ -1,7 +1,7 @@ # Query Analysis Report **Date of Analysis**: 2024-01-01 01:02:03 -**Analyzed File**: `../testdata/bigger_slow_query_log.json` +**Analyzed Files**: `../testdata/bigger_slow_query_log.json` ## Top Queries |Query ID|Usage Count|Total Query Time (ms)|Avg Query Time (ms)|Total Rows Examined| diff --git a/go/testdata/bigger_slow_log_total-rows-examined.md b/go/testdata/bigger_slow_log_total-rows-examined.md index ce5ddd1..e17cf17 100644 --- a/go/testdata/bigger_slow_log_total-rows-examined.md +++ b/go/testdata/bigger_slow_log_total-rows-examined.md @@ -1,7 +1,7 @@ # Query Analysis Report **Date of Analysis**: 2024-01-01 01:02:03 -**Analyzed File**: `../testdata/bigger_slow_query_log.json` +**Analyzed Files**: `../testdata/bigger_slow_query_log.json` ## Top Queries |Query ID|Usage Count|Total Query Time (ms)|Avg Query Time (ms)|Total Rows Examined| diff --git a/go/testdata/bigger_slow_log_total-time.md b/go/testdata/bigger_slow_log_total-time.md index ca8b4c4..03fa02d 100644 --- a/go/testdata/bigger_slow_log_total-time.md +++ b/go/testdata/bigger_slow_log_total-time.md @@ -1,7 +1,7 @@ # Query Analysis Report **Date of Analysis**: 2024-01-01 01:02:03 -**Analyzed File**: `../testdata/bigger_slow_query_log.json` +**Analyzed Files**: `../testdata/bigger_slow_query_log.json` ## Top Queries |Query ID|Usage Count|Total Query Time (ms)|Avg Query Time (ms)|Total Rows Examined| diff --git a/go/testdata/bigger_slow_log_usage-count.md b/go/testdata/bigger_slow_log_usage-count.md index 015a38a..a19fcf8 100644 --- a/go/testdata/bigger_slow_log_usage-count.md +++ b/go/testdata/bigger_slow_log_usage-count.md @@ -1,7 +1,7 @@ # Query Analysis Report **Date of Analysis**: 2024-01-01 01:02:03 -**Analyzed File**: `../testdata/bigger_slow_query_log.json` +**Analyzed Files**: `../testdata/bigger_slow_query_log.json` ## Top Queries |Query ID|Usage Count|Total Query Time (ms)|Avg Query Time (ms)|Total Rows Examined| diff --git a/go/testdata/keys-summary.md b/go/testdata/keys-summary.md index b95aaca..39ba9f6 100644 --- a/go/testdata/keys-summary.md +++ b/go/testdata/keys-summary.md @@ -1,7 +1,7 @@ # Query Analysis Report **Date of Analysis**: 2024-01-01 01:02:03 -**Analyzed File**: `../testdata/keys-log.json` +**Analyzed Files**: `../testdata/keys-log.json`, `../testdata/keys-schema-info.json` ## Tables |Table Name|Reads|Writes|Number of Rows|