Skip to content

Commit

Permalink
ESQL: Add MV_PSERIES_WEIGHTED_SUM for score calculations used by se…
Browse files Browse the repository at this point in the history
…curity solution (elastic#109017)

* Create MV_RIEMANN_ZETA scalar multivalue function



---------

Co-authored-by: Nik Everett <[email protected]>
  • Loading branch information
machadoum and nik9000 authored Jul 31, 2024
1 parent 586405d commit f79c621
Show file tree
Hide file tree
Showing 24 changed files with 636 additions and 5 deletions.
6 changes: 6 additions & 0 deletions docs/changelog/109017.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 109017
summary: "ESQL: Add `MV_PSERIES_WEIGHTED_SUM` for score calculations used by security\
\ solution"
area: ES|QL
type: "feature"
issues: [ ]

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions docs/reference/esql/functions/mv-functions.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
* <<esql-mv_max>>
* <<esql-mv_median>>
* <<esql-mv_min>>
* <<esql-mv_pseries_weighted_sum>>
* <<esql-mv_sort>>
* <<esql-mv_slice>>
* <<esql-mv_sum>>
Expand All @@ -34,6 +35,7 @@ include::layout/mv_last.asciidoc[]
include::layout/mv_max.asciidoc[]
include::layout/mv_median.asciidoc[]
include::layout/mv_min.asciidoc[]
include::layout/mv_pseries_weighted_sum.asciidoc[]
include::layout/mv_slice.asciidoc[]
include::layout/mv_sort.asciidoc[]
include::layout/mv_sum.asciidoc[]
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ public class CsvTestsDataLoader {
private static final TestsDataset HOSTS = new TestsDataset("hosts", "mapping-hosts.json", "hosts.csv");
private static final TestsDataset APPS = new TestsDataset("apps", "mapping-apps.json", "apps.csv");
private static final TestsDataset LANGUAGES = new TestsDataset("languages", "mapping-languages.json", "languages.csv");
private static final TestsDataset ALERTS = new TestsDataset("alerts", "mapping-alerts.json", "alerts.csv");
private static final TestsDataset UL_LOGS = new TestsDataset("ul_logs", "mapping-ul_logs.json", "ul_logs.csv");
private static final TestsDataset SAMPLE_DATA = new TestsDataset("sample_data", "mapping-sample_data.json", "sample_data.csv");
private static final TestsDataset SAMPLE_DATA_STR = new TestsDataset(
Expand Down Expand Up @@ -106,6 +107,7 @@ public class CsvTestsDataLoader {
Map.entry(LANGUAGES.indexName, LANGUAGES),
Map.entry(UL_LOGS.indexName, UL_LOGS),
Map.entry(SAMPLE_DATA.indexName, SAMPLE_DATA),
Map.entry(ALERTS.indexName, ALERTS),
Map.entry(SAMPLE_DATA_STR.indexName, SAMPLE_DATA_STR),
Map.entry(SAMPLE_DATA_TS_LONG.indexName, SAMPLE_DATA_TS_LONG),
Map.entry(CLIENT_IPS.indexName, CLIENT_IPS),
Expand Down
11 changes: 11 additions & 0 deletions x-pack/plugin/esql/qa/testFixtures/src/main/resources/alerts.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
host.name:keyword,kibana.alert.risk_score:double
test-host-1,21.0
test-host-2,17.0
test-host-2,23.0
test-host-1,45.0
test-host-2,12.0
test-host-2,16.0
test-host-1,21.0
test-host-1,70.0
test-host-1,21.0
test-host-2,5.0
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"properties": {
"host.name": {
"type": "keyword"
},
"kibana.alert.risk_score": {
"type": "double"
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ double e()
"boolean|date|double|integer|ip|keyword|long|text|unsigned_long|version mv_max(field:boolean|date|double|integer|ip|keyword|long|text|unsigned_long|version)"
"double|integer|long|unsigned_long mv_median(number:double|integer|long|unsigned_long)"
"boolean|date|double|integer|ip|keyword|long|text|unsigned_long|version mv_min(field:boolean|date|double|integer|ip|keyword|long|text|unsigned_long|version)"
"double mv_pseries_weighted_sum(number:double, p:double)"
"boolean|cartesian_point|cartesian_shape|date|double|geo_point|geo_shape|integer|ip|keyword|long|text|version mv_slice(field:boolean|cartesian_point|cartesian_shape|date|double|geo_point|geo_shape|integer|ip|keyword|long|text|version, start:integer, ?end:integer)"
"boolean|date|double|integer|ip|keyword|long|text|version mv_sort(field:boolean|date|double|integer|ip|keyword|long|text|version, ?order:keyword)"
"double|integer|long|unsigned_long mv_sum(number:double|integer|long|unsigned_long)"
Expand Down Expand Up @@ -174,6 +175,7 @@ mv_last |field |"boolean|cartesian_point|car
mv_max |field |"boolean|date|double|integer|ip|keyword|long|text|unsigned_long|version" |Multivalue expression.
mv_median |number |"double|integer|long|unsigned_long" |Multivalue expression.
mv_min |field |"boolean|date|double|integer|ip|keyword|long|text|unsigned_long|version" |Multivalue expression.
mv_pseries_wei|[number, p] |[double, double] |[Multivalue expression., It is a constant number that represents the 'p' parameter in the P-Series. It impacts every element's contribution to the weighted sum.]
mv_slice |[field, start, end] |["boolean|cartesian_point|cartesian_shape|date|double|geo_point|geo_shape|integer|ip|keyword|long|text|version", integer, integer]|[Multivalue expression. If `null`\, the function returns `null`., Start position. If `null`\, the function returns `null`. The start argument can be negative. An index of -1 is used to specify the last value in the list., End position(included). Optional; if omitted\, the position at `start` is returned. The end argument can be negative. An index of -1 is used to specify the last value in the list.]
mv_sort |[field, order] |["boolean|date|double|integer|ip|keyword|long|text|version", keyword] |[Multivalue expression. If `null`\, the function returns `null`., Sort order. The valid options are ASC and DESC\, the default is ASC.]
mv_sum |number |"double|integer|long|unsigned_long" |Multivalue expression.
Expand Down Expand Up @@ -296,6 +298,7 @@ mv_last |Converts a multivalue expression into a single valued column cont
mv_max |Converts a multivalued expression into a single valued column containing the maximum value.
mv_median |Converts a multivalued field into a single valued field containing the median value.
mv_min |Converts a multivalued expression into a single valued column containing the minimum value.
mv_pseries_wei|Converts a multivalued expression into a single-valued column by multiplying every element on the input list by its corresponding term in P-Series and computing the sum.
mv_slice |Returns a subset of the multivalued field using the start and end index values.
mv_sort |Sorts a multivalued field in lexicographical order.
mv_sum |Converts a multivalued field into a single valued field containing the sum of all of the values.
Expand Down Expand Up @@ -419,6 +422,7 @@ mv_last |"boolean|cartesian_point|cartesian_shape|date|double|geo_point|ge
mv_max |"boolean|date|double|integer|ip|keyword|long|text|unsigned_long|version" |false |false |false
mv_median |"double|integer|long|unsigned_long" |false |false |false
mv_min |"boolean|date|double|integer|ip|keyword|long|text|unsigned_long|version" |false |false |false
mv_pseries_wei|"double" |[false, false] |false |false
mv_slice |"boolean|cartesian_point|cartesian_shape|date|double|geo_point|geo_shape|integer|ip|keyword|long|text|version" |[false, false, true] |false |false
mv_sort |"boolean|date|double|integer|ip|keyword|long|text|version" |[false, true] |false |false
mv_sum |"double|integer|long|unsigned_long" |false |false |false
Expand Down Expand Up @@ -497,5 +501,5 @@ countFunctions#[skip:-8.15.99]
meta functions | stats a = count(*), b = count(*), c = count(*) | mv_expand c;

a:long | b:long | c:long
113 | 113 | 113
114 | 114 | 114
;
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
default
required_capability: mv_pseries_weighted_sum

// tag::example[]
ROW a = [70.0, 45.0, 21.0, 21.0, 21.0]
| EVAL sum = MV_PSERIES_WEIGHTED_SUM(a, 1.5)
| KEEP sum
// end::example[]
;

// tag::example-result[]
sum:double
94.45465156212452
// end::example-result[]
;

oneElement
required_capability: mv_pseries_weighted_sum

ROW data = [3.0]
| EVAL score = MV_PSERIES_WEIGHTED_SUM(data, 9999.9)
| KEEP score;

score:double
3.0
;

zeroP
required_capability: mv_pseries_weighted_sum

ROW data = [3.0, 10.0, 15.0]
| EVAL score = MV_PSERIES_WEIGHTED_SUM(data, 0.0)
| KEEP score;

score:double
28.0
;

negativeP
required_capability: mv_pseries_weighted_sum

ROW data = [10.0, 5.0, 3.0]
| EVAL score = MV_PSERIES_WEIGHTED_SUM(data, -2.0)
| KEEP score;

score:double
57.0
;

composed
required_capability: mv_pseries_weighted_sum

ROW data = [21.0, 45.0, 21.0, 70.0, 21.0]
| EVAL sorted = MV_SORT(data, "desc")
| EVAL score = MV_PSERIES_WEIGHTED_SUM(sorted, 1.5)
| EVAL normalized_score = ROUND(100 * score / 261.2, 2)
| KEEP normalized_score, score;

normalized_score:double|score:double
36.16 |94.45465156212452
;

multivalueAggregation
required_capability: mv_pseries_weighted_sum

FROM alerts
| WHERE host.name is not null
| SORT host.name, kibana.alert.risk_score
| STATS score=MV_PSERIES_WEIGHTED_SUM(
TOP(kibana.alert.risk_score, 10000, "desc"), 1.5
) BY host.name
| EVAL normalized_score = ROUND(100 * score / 261.2, 2)
| KEEP host.name, normalized_score, score;

host.name:keyword|normalized_score:double|score:double
test-host-1 |36.16 |94.45465156212452
test-host-2 |13.03 |34.036822671263614
;

asArgument
required_capability: mv_pseries_weighted_sum

ROW data = [70.0, 45.0, 21.0, 21.0, 21.0]
| EVAL score = ROUND(MV_PSERIES_WEIGHTED_SUM(data, 1.5), 1)
| KEEP score;

score:double
94.5
;

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit f79c621

Please sign in to comment.