QuesmaOrg · trzysiek · Sep 24, 2024 · Oct 7, 2024 · Oct 7, 2024 · Nov 13, 2024
@@ -9,6 +9,7 @@ import (
 )
 
 const PancakeTotalCountMetricName = "__quesma_total_count"
+const PancakeTotalCountColumnName = "metric____quesma_total_count_col_0"
 
 // Not a real aggregation, but it is a pancake that has alternative JSON rendering
 type PancakeQueryType struct {

@@ -6,6 +6,7 @@ import (
 	"context"
 	"errors"
 	"fmt"
+	"github.com/k0kubun/pp"
 	"quesma/model"
 	"quesma/model/bucket_aggregations"
 	"quesma/model/metrics_aggregations"
@@ -437,6 +438,7 @@ func (p *pancakeSqlQueryGenerator) generateQuery(aggregation *pancakeModel) (*mo
 	}
 
 	resultSelectCommand, optimizerName, err := p.generateSelectCommand(aggregation)
+	pp.Println(resultSelectCommand)
 	if err != nil {
 		return nil, err
 	}

@@ -162,7 +162,7 @@ func TestPancakeQueryGeneration(t *testing.T) {
 			if len(expectedMinusActual) != 0 {
 				pp.Println("EXPECTED diff", expectedMinusActual)
 			}
-			//pp.Println("ACTUAL", pancakeJson)
+			pp.Println("ACTUAL", pancakeJson)
 			//pp.Println("EXPECTED", expectedAggregationsPart)
 			assert.True(t, util.AlmostEmpty(actualMinusExpected, acceptableDifference))
 			assert.True(t, util.AlmostEmpty(expectedMinusActual, acceptableDifference))

@@ -4,6 +4,8 @@ package queryparser
 
 import (
 	"context"
+	"fmt"
+	"github.com/k0kubun/pp"
 	"quesma/clickhouse"
 	"quesma/logger"
 	"quesma/model"
@@ -137,11 +139,14 @@ func (cw *ClickhouseQueryTranslator) makeTotalCount(queries []*model.Query, resu
 	// a) we have count query -> we're done
 	// b) we have hits or facets -> we're done
 	// c) we don't have above: we return len(biggest resultset(all aggregations))
+	fmt.Printf("queries:\n%+v\n", queries)
 	totalCount := -1
 	relationCount := "eq"
 	for i, query := range queries {
+		fmt.Printf("%+v %+v", query, *query)
 		if query.Type != nil {
 			if _, isCount := query.Type.(typical_queries.Count); isCount {
+				fmt.Println(results[i])
 				if len(results[i]) > 0 && len(results[i][0].Cols) > 0 {
 					switch v := results[i][0].Cols[0].Value.(type) {
 					case uint64:
@@ -177,38 +182,38 @@ func (cw *ClickhouseQueryTranslator) makeTotalCount(queries []*model.Query, resu
 	for queryIdx, query := range queries {
 		if pancake, isPancake := query.Type.(PancakeQueryType); isPancake {
 			totalCountAgg := pancake.ReturnTotalCount()
-			if totalCountAgg != nil {
-				if len(results[queryIdx]) == 0 {
-					continue
-				}
-				firstRow := results[queryIdx][0]
-				for _, cell := range firstRow.Cols {
-					// FIXME THIS is hardcoded for now, as we don't have a way to get the name of the column
-					if cell.ColName == "metric____quesma_total_count_col_0" {
-						switch v := cell.Value.(type) {
-						case uint64:
-							totalCount = int(v)
-						case int:
-							totalCount = v
-						case int64:
-							totalCount = int(v)
-						default:
-							logger.ErrorWithCtx(cw.Ctx).Msgf("Unknown type of count %v %t", v, v)
-						}
+			if totalCountAgg == nil || len(results[queryIdx]) == 0 {
+				continue
+			}
+
+			firstRow := results[queryIdx][0]
+			for _, cell := range firstRow.Cols {
+				// FIXME THIS is hardcoded for now, as we don't have a way to get the name of the column
+				if cell.ColName == PancakeTotalCountColumnName {
+					switch v := cell.Value.(type) {
+					case uint64:
+						totalCount = int(v)
+					case int:
+						totalCount = v
+					case int64:
+						totalCount = int(v)
+					default:
+						logger.ErrorWithCtx(cw.Ctx).Msgf("Unknown type of count %v %t", v, v)
 					}
 				}
-				total = &model.Total{
-					Value:    totalCount,
-					Relation: "eq",
-				}
-				return
 			}
+			total = &model.Total{
+				Value:    totalCount,
+				Relation: "eq",
+			}
+			return
 		}
 	}
 
 	for i, query := range queries {
 		if _, hasHits := query.Type.(*typical_queries.Hits); hasHits {
 			totalCount = len(results[i])
+			fmt.Println("dupa", totalCount)
 			relation := "eq"
 			if query.SelectCommand.Limit != 0 && totalCount == query.SelectCommand.Limit {
 				relation = "gte"
@@ -230,6 +235,8 @@ func (cw *ClickhouseQueryTranslator) MakeSearchResponse(queries []*model.Query,
 	queries, ResultSets, total = cw.makeTotalCount(queries, ResultSets) // get hits and remove it from queries
 	queries, ResultSets, hits = cw.makeHits(queries, ResultSets)        // get hits and remove it from queries
 
+	pp.Println(hits)
+
 	aggregations, err := cw.MakeAggregationPartOfResponse(queries, ResultSets)
 
 	response := &model.SearchResp{

@@ -452,3 +452,33 @@ func TestMakeResponseSearchQueryIsProperJson(t *testing.T) {
 		_ = cw.MakeSearchResponse([]*model.Query{{Highlighter: NewEmptyHighlighter()}}, [][]model.QueryResultRow{{resultRow}})
 	}
 }
+
+func Test_makeTotalCount(t *testing.T) {
+	tests := []struct {
+		name          string
+		resultsFromDB [][]model.QueryResultRow
+		wantTotal     *model.Total
+	}{
+		{
+			name:          "a",
+			resultsFromDB: [][]model.QueryResultRow{},
+		},
+	}
+	cw := ClickhouseQueryTranslator{}
+	query := &model.Query{
+		SelectCommand: model.SelectCommand{
+			Columns: []model.Expr{
+				model.NewCountFunc(),
+				model.NewColumnRef("a"),
+			},
+		},
+		Type:          PancakeQueryType{},
+		OptimizeHints: model.NewQueryExecutionHints(),
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			cw.makeTotalCount([]*model.Query{query}, tt.resultsFromDB)
+		})
+	}
+}
@@ -2824,6 +2824,125 @@ var AggregationTests2 = []AggregationTestCase{
 			ORDER BY "metric__0__1_col_0" DESC, "aggr__0__key_0" ASC
 			LIMIT 6`,
 	},
+	{ // [55]
+		TestName: "terms order by percentile_ranks",
+		QueryRequestJson: `
+		{
+			"_source": {
+				"excludes": []
+			},
+			"aggs": {
+				"0": {
+					"aggs": {
+						"1": {
+							"percentile_ranks": {
+								"field": "DistanceKilometers",
+								"values": [
+									0, 50
+								]
+							}
+						}
+					},
+					"terms": {
+						"field": "Cancelled",
+						"order": {
+							"1.0": "desc"
+						},
+						"shard_size": 25,
+						"size": 5
+					}
+				}
+			},
+			"script_fields": {},
+			"size": 0,
+			"stored_fields": [
+				"*"
+			],
+			"track_total_hits": true
+		}`,
+		ExpectedResponse: `
+		{
+			"is_partial": false,
+			"is_running": false,
+			"start_time_in_millis": 1727114076973,
+			"expiration_time_in_millis": 1727546076973,
+			"completion_time_in_millis": 1727114076978,
+			"response": {
+				"took": 5,
+				"timed_out": false,
+				"_shards": {
+					"total": 1,
+					"successful": 1,
+					"skipped": 0,
+					"failed": 0
+				},
+				"hits": {
+					"total": {
+						"value": 212,
+						"relation": "eq"
+					},
+					"max_score": null,
+					"hits": []
+				},
+				"aggregations": {
+					"0": {
+						"doc_count_error_upper_bound": 0,
+						"sum_other_doc_count": 0,
+						"buckets": [
+							{
+								"1": {
+									"values": {
+										"0.0": 3.314917127071823,
+										"50.0": 6.441097753551789
+									}
+								},
+								"key": 0,
+								"doc_count": 181
+							},
+							{
+								"1": {
+									"values": {
+										"0.0": 3.225806451612903,
+										"50.0": 9.813812484840025
+									}
+								},
+								"key": 1,
+								"doc_count": 31
+							}
+						]
+					}
+				}
+			}
+		}`,
+		ExpectedPancakeResults: []model.QueryResultRow{
+			{Cols: []model.QueryResultCol{
+				model.NewQueryResultCol("aggr__0__parent_count", 212),
+				model.NewQueryResultCol("aggr__0__key_0", 0),
+				model.NewQueryResultCol("aggr__0__count", int64(181)),
+				model.NewQueryResultCol("aggr__0__order_1", 3.314917127071823),
+				model.NewQueryResultCol("metric__0__1_col_0", 3.314917127071823),
+				model.NewQueryResultCol("metric__0__1_col_1", 6.441097753551789),
+			}},
+			{Cols: []model.QueryResultCol{
+				model.NewQueryResultCol("aggr__0__parent_count", 212),
+				model.NewQueryResultCol("aggr__0__key_0", 1),
+				model.NewQueryResultCol("aggr__0__count", int64(31)),
+				model.NewQueryResultCol("aggr__0__order_1", 3.225806451612903),
+				model.NewQueryResultCol("metric__0__1_col_0", 3.225806451612903),
+				model.NewQueryResultCol("metric__0__1_col_1", 9.813812484840025),
+			}},
+		},
+		ExpectedPancakeSQL: `
+			SELECT sum(count(*)) OVER () AS "aggr__0__parent_count",
+			  "Cancelled" AS "aggr__0__key_0", count(*) AS "aggr__0__count",
+			  countIf("DistanceKilometers"<=0)/count(*)*100 AS "aggr__0__order_1",
+			  countIf("DistanceKilometers"<=0)/count(*)*100 AS "metric__0__1_col_0",
+			  countIf("DistanceKilometers"<=50)/count(*)*100 AS "metric__0__1_col_1"
+			FROM __quesma_table_name
+			GROUP BY "Cancelled" AS "aggr__0__key_0"
+			ORDER BY "aggr__0__order_1" DESC, "aggr__0__key_0" ASC
+			LIMIT 6`,
+	},
 	{ // [56]
 		TestName: "simple histogram with null values, no missing parameter",
 		QueryRequestJson: `