diff --git a/docs/ppl-lang/planning/ppl-geoip.md b/docs/ppl-lang/planning/ppl-geoip.md new file mode 100644 index 000000000..f6bef8f34 --- /dev/null +++ b/docs/ppl-lang/planning/ppl-geoip.md @@ -0,0 +1,39 @@ +## geoip syntax proposal + +geoip function to add information about the geographical location of an IPv4 or IPv6 address + +1. **Proposed syntax** + - `... | eval geoinfo = geoip([datasource,] ipAddress [,properties])` + - generic syntax + - `... | eval geoinfo = geoip(ipAddress)` + - use the default geoip datasource + - `... | eval geoinfo = geoip("abc", ipAddress)` + - use the "abc" geoip datasource + - `... | eval geoinfo = geoip(ipAddress, "city,lat,lon")` + - use the default geoip datasource, retrieve only city, lat and lon + - `... | eval geoinfo = geoip("abc", ipAddress, "city,lat,lon")` + - use the "abc" geoip datasource, retrieve only city, lat and lon + + +2. **Proposed wiring with the geoip database** + - Leverage the functionality of the ip2geo processor + - ip2geo processor configuration, functionality and code will be used + - Prerequisite for the geoip is that ip2geo processor is configured properly + - See https://opensearch.org/docs/latest/ingest-pipelines/processors/ip2geo/ + + +### New syntax definition in ANTLR + +```ANTLR + +// functions +evalFunctionCall + : evalFunctionName LT_PRTHS functionArgs RT_PRTHS + | geoipFunction + ; + +geoipFunction + : GEOIP LT_PRTHS (datasource = functionArg COMMA)? ipAddress = functionArg (COMMA properties = stringLiteral)? RT_PRTHS + ; +``` + diff --git a/ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.g4 b/ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.g4 index 2b916a245..5d980f167 100644 --- a/ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.g4 +++ b/ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.g4 @@ -372,6 +372,9 @@ TYPEOF: 'TYPEOF'; //OTHER CONDITIONAL EXPRESSIONS COALESCE: 'COALESCE'; +//GEOLOCATION FUNCTIONS +GEOIP: 'GEOIP'; + // RELEVANCE FUNCTIONS AND PARAMETERS MATCH: 'MATCH'; MATCH_PHRASE: 'MATCH_PHRASE'; diff --git a/ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4 b/ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4 index 7a6f14839..0c010a51f 100644 --- a/ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4 +++ b/ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4 @@ -465,6 +465,7 @@ wcFieldExpression // functions evalFunctionCall : evalFunctionName LT_PRTHS functionArgs RT_PRTHS + | geoipFunction ; // cast function @@ -781,6 +782,10 @@ coalesceFunctionName : COALESCE ; +geoipFunction + : GEOIP LT_PRTHS (datasource = functionArg COMMA)? ipAddress = functionArg (COMMA properties = stringLiteral)? RT_PRTHS + ; + // operators comparisonOperator : EQUAL @@ -1051,4 +1056,5 @@ keywordsCanBeId | FULL | SEMI | ANTI + | GEOIP ;