diff --git a/legend-engine-core/legend-engine-core-base/legend-engine-core-executionPlan-execution/legend-engine-executionPlan-dependencies/pom.xml b/legend-engine-core/legend-engine-core-base/legend-engine-core-executionPlan-execution/legend-engine-executionPlan-dependencies/pom.xml index 60f56d348fb..0e98d41bf14 100644 --- a/legend-engine-core/legend-engine-core-base/legend-engine-core-executionPlan-execution/legend-engine-executionPlan-dependencies/pom.xml +++ b/legend-engine-core/legend-engine-core-base/legend-engine-core-executionPlan-execution/legend-engine-executionPlan-dependencies/pom.xml @@ -57,6 +57,10 @@ commons-codec commons-codec + + org.apache.commons + commons-text + diff --git a/legend-engine-core/legend-engine-core-base/legend-engine-core-executionPlan-execution/legend-engine-executionPlan-dependencies/src/main/java/org/finos/legend/engine/plan/dependencies/util/Library.java b/legend-engine-core/legend-engine-core-base/legend-engine-core-executionPlan-execution/legend-engine-executionPlan-dependencies/src/main/java/org/finos/legend/engine/plan/dependencies/util/Library.java index b21b1f879c4..b83a4121d71 100644 --- a/legend-engine-core/legend-engine-core-base/legend-engine-core-executionPlan-execution/legend-engine-executionPlan-dependencies/src/main/java/org/finos/legend/engine/plan/dependencies/util/Library.java +++ b/legend-engine-core/legend-engine-core-base/legend-engine-core-executionPlan-execution/legend-engine-executionPlan-dependencies/src/main/java/org/finos/legend/engine/plan/dependencies/util/Library.java @@ -16,6 +16,8 @@ import org.apache.commons.codec.binary.Base64; import org.apache.commons.codec.digest.DigestUtils; +import org.apache.commons.text.similarity.JaroWinklerSimilarity; +import org.apache.commons.text.similarity.LevenshteinDistance; import org.eclipse.collections.api.factory.Lists; import org.eclipse.collections.api.list.MutableList; import org.finos.legend.engine.plan.dependencies.domain.date.DayOfWeek; @@ -1776,4 +1778,14 @@ public static double coTangent(double input) { return 1.0 / Math.tan(input); } + + public static double jaroWinklerSimilarity(String str1, String str2) + { + return new JaroWinklerSimilarity().apply(str1, str2); + } + + public static long levenshteinDistance(String str1, String str2) + { + return new LevenshteinDistance().apply(str1, str2); + } } diff --git a/legend-engine-core/legend-engine-core-base/legend-engine-core-language-pure/legend-engine-language-pure-compiler/src/main/java/org/finos/legend/engine/language/pure/compiler/toPureGraph/handlers/Handlers.java b/legend-engine-core/legend-engine-core-base/legend-engine-core-language-pure/legend-engine-language-pure-compiler/src/main/java/org/finos/legend/engine/language/pure/compiler/toPureGraph/handlers/Handlers.java index 218356c7bb0..6f60f3b945f 100644 --- a/legend-engine-core/legend-engine-core-base/legend-engine-core-language-pure/legend-engine-language-pure-compiler/src/main/java/org/finos/legend/engine/language/pure/compiler/toPureGraph/handlers/Handlers.java +++ b/legend-engine-core/legend-engine-core-base/legend-engine-core-language-pure/legend-engine-language-pure-compiler/src/main/java/org/finos/legend/engine/language/pure/compiler/toPureGraph/handlers/Handlers.java @@ -1362,6 +1362,8 @@ private void registerStrings() m(h("meta::pure::functions::string::encodeUrl_String_1__String_1__String_1_", true, ps -> res("String", "one"), ps -> ps.size() == 2)))); register(m(m(h("meta::pure::functions::string::decodeUrl_String_1__String_1_", false, ps -> res("String", "one"), ps -> ps.size() == 1)), m(h("meta::pure::functions::string::decodeUrl_String_1__String_1__String_1_", true, ps -> res("String", "one"), ps -> ps.size() == 2)))); + register("meta::pure::functions::string::jaroWinklerSimilarity_String_1__String_1__Float_1_", true, ps -> res("Float", "one")); + register("meta::pure::functions::string::levenshteinDistance_String_1__String_1__Integer_1_", true, ps -> res("Integer", "one")); } private void registerTrigo() @@ -2513,6 +2515,8 @@ private Map buildDispatch() map.put("meta::pure::functions::string::lpad_String_1__Integer_1__String_1__String_1_", (List ps) -> ps.size() == 3 && isOne(ps.get(0)._multiplicity()) && ("Nil".equals(ps.get(0)._genericType()._rawType()._name()) || "String".equals(ps.get(0)._genericType()._rawType()._name())) && isOne(ps.get(1)._multiplicity()) && ("Nil".equals(ps.get(1)._genericType()._rawType()._name()) || "Integer".equals(ps.get(1)._genericType()._rawType()._name())) && isOne(ps.get(2)._multiplicity()) && ("Nil".equals(ps.get(2)._genericType()._rawType()._name()) || "String".equals(ps.get(2)._genericType()._rawType()._name()))); map.put("meta::pure::functions::string::rpad_String_1__Integer_1__String_1_", (List ps) -> ps.size() == 2 && isOne(ps.get(0)._multiplicity()) && ("Nil".equals(ps.get(0)._genericType()._rawType()._name()) || "String".equals(ps.get(0)._genericType()._rawType()._name())) && isOne(ps.get(1)._multiplicity()) && ("Nil".equals(ps.get(1)._genericType()._rawType()._name()) || "Integer".equals(ps.get(1)._genericType()._rawType()._name()))); map.put("meta::pure::functions::string::rpad_String_1__Integer_1__String_1__String_1_", (List ps) -> ps.size() == 3 && isOne(ps.get(0)._multiplicity()) && ("Nil".equals(ps.get(0)._genericType()._rawType()._name()) || "String".equals(ps.get(0)._genericType()._rawType()._name())) && isOne(ps.get(1)._multiplicity()) && ("Nil".equals(ps.get(1)._genericType()._rawType()._name()) || "Integer".equals(ps.get(1)._genericType()._rawType()._name())) && isOne(ps.get(2)._multiplicity()) && ("Nil".equals(ps.get(2)._genericType()._rawType()._name()) || "String".equals(ps.get(2)._genericType()._rawType()._name()))); + map.put("meta::pure::functions::string::jaroWinklerSimilarity_String_1__String_1__Float_1_", (List ps) -> ps.size() == 2 && isOne(ps.get(0)._multiplicity()) && ("Nil".equals(ps.get(0)._genericType()._rawType()._name()) || "String".equals(ps.get(0)._genericType()._rawType()._name())) && isOne(ps.get(1)._multiplicity()) && ("Nil".equals(ps.get(1)._genericType()._rawType()._name()) || "String".equals(ps.get(1)._genericType()._rawType()._name()))); + map.put("meta::pure::functions::string::levenshteinDistance_String_1__String_1__Integer_1_", (List ps) -> ps.size() == 2 && isOne(ps.get(0)._multiplicity()) && ("Nil".equals(ps.get(0)._genericType()._rawType()._name()) || "String".equals(ps.get(0)._genericType()._rawType()._name())) && isOne(ps.get(1)._multiplicity()) && ("Nil".equals(ps.get(1)._genericType()._rawType()._name()) || "String".equals(ps.get(1)._genericType()._rawType()._name()))); map.put("meta::pure::graphFetch::calculateSourceTree_RootGraphFetchTree_1__Mapping_1__Extension_MANY__RootGraphFetchTree_1_", (List ps) -> ps.size() == 3 && isOne(ps.get(0)._multiplicity()) && Sets.immutable.with("Nil", "RootGraphFetchTree", "ExtendedRootGraphFetchTree", "RoutedRootGraphFetchTree", "SerializeTopRootGraphFetchTree").contains(ps.get(0)._genericType()._rawType()._name()) && isOne(ps.get(1)._multiplicity()) && ("Nil".equals(ps.get(1)._genericType()._rawType()._name()) || "Mapping".equals(ps.get(1)._genericType()._rawType()._name())) && ("Nil".equals(ps.get(2)._genericType()._rawType()._name()) || "Extension".equals(ps.get(2)._genericType()._rawType()._name()))); map.put("meta::pure::graphFetch::execution::graphFetchChecked_T_MANY__RootGraphFetchTree_1__Checked_MANY_", (List ps) -> ps.size() == 2 && isOne(ps.get(1)._multiplicity()) && Sets.immutable.with("Nil", "RootGraphFetchTree", "ExtendedRootGraphFetchTree", "RoutedRootGraphFetchTree", "SerializeTopRootGraphFetchTree").contains(ps.get(1)._genericType()._rawType()._name())); map.put("meta::pure::graphFetch::execution::graphFetch_T_MANY__RootGraphFetchTree_1__Integer_1__T_MANY_", (List ps) -> ps.size() == 3 && isOne(ps.get(1)._multiplicity()) && Sets.immutable.with("Nil", "RootGraphFetchTree", "ExtendedRootGraphFetchTree", "RoutedRootGraphFetchTree", "SerializeTopRootGraphFetchTree").contains(ps.get(1)._genericType()._rawType()._name()) && isOne(ps.get(2)._multiplicity()) && ("Nil".equals(ps.get(2)._genericType()._rawType()._name()) || "Integer".equals(ps.get(2)._genericType()._rawType()._name()))); diff --git a/legend-engine-core/legend-engine-core-pure/legend-engine-pure-code-compiled-core/src/main/resources/core/legend/test/handlersTest.pure b/legend-engine-core/legend-engine-core-pure/legend-engine-pure-code-compiled-core/src/main/resources/core/legend/test/handlersTest.pure index 380dd9a2e83..b15783c4a3e 100644 --- a/legend-engine-core/legend-engine-core-pure/legend-engine-pure-code-compiled-core/src/main/resources/core/legend/test/handlersTest.pure +++ b/legend-engine-core/legend-engine-core-pure/legend-engine-pure-code-compiled-core/src/main/resources/core/legend/test/handlersTest.pure @@ -402,6 +402,9 @@ Class meta::legend::test::handlers::model::TestString toUpper(){$this.string->toUpper()}:String[1]; trim(){$this.string->trim()}:String[1]; hashString(){$this.string->meta::pure::functions::hash::hash(meta::pure::functions::hash::HashType.MD5)}:String[1]; + + jaroWinklerSimilarity(){$this.string->meta::pure::functions::string::jaroWinklerSimilarity($this.string)}:Float[1]; + levenshteinDistance(){$this.string->meta::pure::functions::string::levenshteinDistance($this.string)}:Integer[1]; } Class meta::legend::test::handlers::model::TestDate diff --git a/legend-engine-core/legend-engine-core-pure/legend-engine-pure-code-compiled-core/src/main/resources/core/pure/router/routing/router_routing.pure b/legend-engine-core/legend-engine-core-pure/legend-engine-pure-code-compiled-core/src/main/resources/core/pure/router/routing/router_routing.pure index 41fc0c86777..f09ee2fd5fe 100644 --- a/legend-engine-core/legend-engine-core-pure/legend-engine-pure-code-compiled-core/src/main/resources/core/pure/router/routing/router_routing.pure +++ b/legend-engine-core/legend-engine-core-pure/legend-engine-pure-code-compiled-core/src/main/resources/core/pure/router/routing/router_routing.pure @@ -784,6 +784,8 @@ function meta::pure::router::routing::shouldStopFunctions(extensions:meta::pure: left_String_1__Integer_1__String_1_, right_String_1__Integer_1__String_1_, typeName_Any_1__String_1_, + levenshteinDistance_String_1__String_1__Integer_1_, + jaroWinklerSimilarity_String_1__String_1__Float_1_, meta::pure::tds::extensions::firstNotNull_T_MANY__T_$0_1$_, meta::pure::functions::date::calendar::annualized_Date_1__String_1__Date_1__Number_$0_1$__Number_$0_1$_, meta::pure::functions::date::calendar::cme_Date_1__String_1__Date_1__Number_$0_1$__Number_$0_1$_, diff --git a/legend-engine-xts-java/legend-engine-xt-javaPlatformBinding-pure/src/main/resources/core_java_platform_binding/legendJavaPlatformBinding/planConventions/stringLibrary.pure b/legend-engine-xts-java/legend-engine-xt-javaPlatformBinding-pure/src/main/resources/core_java_platform_binding/legendJavaPlatformBinding/planConventions/stringLibrary.pure index b4154331f98..bdc5eed19d7 100644 --- a/legend-engine-xts-java/legend-engine-xt-javaPlatformBinding-pure/src/main/resources/core_java_platform_binding/legendJavaPlatformBinding/planConventions/stringLibrary.pure +++ b/legend-engine-xts-java/legend-engine-xt-javaPlatformBinding-pure/src/main/resources/core_java_platform_binding/legendJavaPlatformBinding/planConventions/stringLibrary.pure @@ -87,8 +87,9 @@ function meta::pure::executionPlan::platformBinding::legendJava::library::string fc2(decodeUrl_String_1__String_1__String_1_, {ctx,str,charset | $library->j_invoke('decodeUrl', [$str, $charset], javaString())}), fc2(encodeUrl_String_1__String_1__String_1_, {ctx,str,charset | $library->j_invoke('encodeUrl', [$str, $charset], javaString())}), - fc2(hash_String_1__HashType_1__String_1_, {ctx,text,hashType | $library->j_invoke('hash', [$text, $hashType], javaString())}) - + fc2(hash_String_1__HashType_1__String_1_, {ctx,text,hashType | $library->j_invoke('hash', [$text, $hashType], javaString())}), + fc2(jaroWinklerSimilarity_String_1__String_1__Float_1_, {ctx,str1,str2 | $library->j_invoke('jaroWinklerSimilarity',[$str1, $str2], javaDouble())}), + fc2(levenshteinDistance_String_1__String_1__Integer_1_, {ctx,str1,str2 | $library->j_invoke('levenshteinDistance',[$str1, $str2], javaLong())}) ]); $conventions->registerLibrary($lib); diff --git a/legend-engine-xts-relationalStore/legend-engine-xt-relationalStore-dbExtension/legend-engine-xt-relationalStore-duckdb/legend-engine-xt-relationalStore-duckdb-pure/src/main/resources/core_relational_duckdb/relational/sqlQueryToString/duckdbExtension.pure b/legend-engine-xts-relationalStore/legend-engine-xt-relationalStore-dbExtension/legend-engine-xt-relationalStore-duckdb/legend-engine-xt-relationalStore-duckdb-pure/src/main/resources/core_relational_duckdb/relational/sqlQueryToString/duckdbExtension.pure index 232b9623549..6340fb6d259 100644 --- a/legend-engine-xts-relationalStore/legend-engine-xt-relationalStore-dbExtension/legend-engine-xt-relationalStore-duckdb/legend-engine-xt-relationalStore-duckdb-pure/src/main/resources/core_relational_duckdb/relational/sqlQueryToString/duckdbExtension.pure +++ b/legend-engine-xts-relationalStore/legend-engine-xt-relationalStore-dbExtension/legend-engine-xt-relationalStore-duckdb/legend-engine-xt-relationalStore-duckdb-pure/src/main/resources/core_relational_duckdb/relational/sqlQueryToString/duckdbExtension.pure @@ -64,7 +64,7 @@ function meta::relational::functions::sqlQueryToString::duckDB::convertDateToSql | if ($date->hasSubsecond(), | let d= format('%t{[' + $timeZone + ']yyyy-MM-dd HH:mm:ss.SSSSSS}', $date); format('TIMESTAMP \'%s\'',$d);, - | let d= format('%t{[' + $timeZone + ']yyyy-MM-dd HH:mm:ss}', $date); + | let d= format('%t{[' + $timeZone + ']yyyy-MM-dd HH:mm:ss}', $date); format('TIMESTAMP_S \'%s\'',$d); ), | let d =format('%t{[' + $timeZone + ']yyyy-MM-dd}', $date); @@ -74,12 +74,12 @@ function meta::relational::functions::sqlQueryToString::duckDB::convertDateToSql function <> meta::relational::functions::sqlQueryToString::duckDB::getDynaFunctionToSqlForDuckDB(): DynaFunctionToSql[*] { let allStates = allGenerationStates(); - + [ dynaFnToSql('adjust', $allStates, ^ToSql(format='date_add(%s)', transform={p:String[3] | $p->at(0) + ',' + constructIntervalFunction($p->at(2), $p->at(1)) })), dynaFnToSql('booland', $allStates, ^ToSql(format='every(%s)')), - dynaFnToSql('boolor', $allStates, ^ToSql(format='any(%s)')), - dynaFnToSql('castBoolean', $allStates, ^ToSql(format='cast(%s as boolean)')), + dynaFnToSql('boolor', $allStates, ^ToSql(format='any(%s)')), + dynaFnToSql('castBoolean', $allStates, ^ToSql(format='cast(%s as boolean)')), dynaFnToSql('chr', $allStates, ^ToSql(format='char(%s)')), dynaFnToSql('concat', $allStates, ^ToSql(format='concat%s', transform={p:String[*]|$p->joinStrings('(', ', ', ')')})), // dynaFnToSql('convertDate', $allStates, ^ToSql(format='%s', transform={p:String[*] | $p->convertToDateH2()})), @@ -145,7 +145,9 @@ function <> meta::relational::functions::sqlQueryToString::duckD dynaFnToSql('toString', $allStates, ^ToSql(format='cast(%s as varchar)')), // dynaFnToSql('toTimestamp', $allStates, ^ToSql(format='%s', transform={p:String[2] | $p->transformToTimestampH2()})), dynaFnToSql('weekOfYear', $allStates, ^ToSql(format='week(%s)')), - dynaFnToSql('year', $allStates, ^ToSql(format='year(%s)')) + dynaFnToSql('year', $allStates, ^ToSql(format='year(%s)')), + dynaFnToSql('jaroWinklerSimilarity', $allStates, ^ToSql(format='jaro_winkler_similarity(%s, %s)')), + dynaFnToSql('levenshteinDistance', $allStates, ^ToSql(format='levenshtein(%s, %s)')) ]; } @@ -158,8 +160,8 @@ function <> meta::relational::functions::sqlQueryToString::duckD if($dayOfWeek->at(1)=='\'Sunday\'', |'dayofweek('+$dayOfWeek->at(0)+')+1', |'isodow('+$dayOfWeek->at(0)+')' // (Monday = 1, Sunday = 7). - ); - ); + ); + ); } function <> meta::relational::functions::sqlQueryToString::duckDB::processPaddingParams(p:String[*]):String[*] @@ -177,7 +179,7 @@ function <> meta::relational::functions::sqlQueryToString::duckD function meta::relational::functions::sqlQueryToString::duckDB::constructIntervalFunction(unit:String[1], i:String[1]):String[1] { let unitWithoutQuotes = $unit->removeQuotesIfExist(); - + let interval_func= [ pair(DurationUnit.YEARS->toString(), 'to_years'), pair(DurationUnit.MONTHS->toString(), 'to_months'), diff --git a/legend-engine-xts-relationalStore/legend-engine-xt-relationalStore-dbExtension/legend-engine-xt-relationalStore-snowflake/legend-engine-xt-relationalStore-snowflake-pure/src/main/resources/core_relational_snowflake/relational/sqlQueryToString/snowflakeExtension.pure b/legend-engine-xts-relationalStore/legend-engine-xt-relationalStore-dbExtension/legend-engine-xt-relationalStore-snowflake/legend-engine-xt-relationalStore-snowflake-pure/src/main/resources/core_relational_snowflake/relational/sqlQueryToString/snowflakeExtension.pure index 378be5b3fe7..bfe477b1d1e 100644 --- a/legend-engine-xts-relationalStore/legend-engine-xt-relationalStore-dbExtension/legend-engine-xt-relationalStore-snowflake/legend-engine-xt-relationalStore-snowflake-pure/src/main/resources/core_relational_snowflake/relational/sqlQueryToString/snowflakeExtension.pure +++ b/legend-engine-xts-relationalStore/legend-engine-xt-relationalStore-dbExtension/legend-engine-xt-relationalStore-snowflake/legend-engine-xt-relationalStore-snowflake-pure/src/main/resources/core_relational_snowflake/relational/sqlQueryToString/snowflakeExtension.pure @@ -158,9 +158,11 @@ function <> meta::relational::functions::sqlQueryToString::snowf dynaFnToSql('hour', $allStates, ^ToSql(format='date_part(\'hour\', %s)')), dynaFnToSql('indexOf', $allStates, ^ToSql(format='CHARINDEX(%s)', transform={p:String[2] | $p->at(1) + ', ' + $p->at(0)})), dynaFnToSql('isAlphaNumeric', $allStates, ^ToSql(format=regexpPattern('%s'), transform={p:String[1]|$p->transformAlphaNumericParamsDefault()})), + dynaFnToSql('jaroWinklerSimilarity', $allStates, ^ToSql(format='(jarowinkler_similarity(%s, %s)/100)')), dynaFnToSql('joinStrings', $allStates, ^ToSql(format='listagg(%s, %s)')), dynaFnToSql('log10', $allStates, ^ToSql(format='log(10, %s)')), dynaFnToSql('length', $allStates, ^ToSql(format='length(%s)')), + dynaFnToSql('levenshteinDistance', $allStates, ^ToSql(format='editdistance(%s, %s)')), dynaFnToSql('matches', $allStates, ^ToSql(format=regexpPattern('%s'), transform={p:String[2]|$p->transformRegexpParams()})), dynaFnToSql('minute', $allStates, ^ToSql(format='minute(%s)')), dynaFnToSql('month', $allStates, ^ToSql(format='MONTH(%s)')), diff --git a/legend-engine-xts-relationalStore/legend-engine-xt-relationalStore-dbExtension/legend-engine-xt-relationalStore-snowflake/legend-engine-xt-relationalStore-snowflake-pure/src/main/resources/core_relational_snowflake/relational/transform/fromPure/tests/testSnowflakeToSQLString.pure b/legend-engine-xts-relationalStore/legend-engine-xt-relationalStore-dbExtension/legend-engine-xt-relationalStore-snowflake/legend-engine-xt-relationalStore-snowflake-pure/src/main/resources/core_relational_snowflake/relational/transform/fromPure/tests/testSnowflakeToSQLString.pure index 35e356f124c..1898b2a4603 100644 --- a/legend-engine-xts-relationalStore/legend-engine-xt-relationalStore-dbExtension/legend-engine-xt-relationalStore-snowflake/legend-engine-xt-relationalStore-snowflake-pure/src/main/resources/core_relational_snowflake/relational/transform/fromPure/tests/testSnowflakeToSQLString.pure +++ b/legend-engine-xts-relationalStore/legend-engine-xt-relationalStore-dbExtension/legend-engine-xt-relationalStore-snowflake/legend-engine-xt-relationalStore-snowflake-pure/src/main/resources/core_relational_snowflake/relational/transform/fromPure/tests/testSnowflakeToSQLString.pure @@ -222,3 +222,23 @@ function <> meta::relational::tests::sqlToString::snowflake::simpleGr let snowflakeSql = toSQLString($fn, meta::relational::tests::simpleRelationalMapping, meta::relational::runtime::DatabaseType.Snowflake, meta::relational::extension::relationalExtensions()); assertEquals('select "productTable_d#5_d#2_m2".NAME as "90.01", count(*) as "cnt" from tradeTable as "root" left outer join productSchema.productTable as "productTable_d#5_d#2_m2" on ("root".prodId = "productTable_d#5_d#2_m2".ID) group by "90.01"', $snowflakeSql); } + +function <> meta::relational::tests::sqlToString::snowflake::testJaroWinklerSimilarity():Boolean[1] +{ + let sql = toSQLString( + |Person.all() + ->project(p|$p.firstName->jaroWinklerSimilarity('John'), 'similarity') + , simpleRelationalMapping, DatabaseType.Snowflake, meta::relational::extension::relationalExtensions()); + + assertEquals('select (jarowinkler_similarity("root".FIRSTNAME, \'John\')/100) as "similarity" from personTable as "root"', $sql); +} + +function <> meta::relational::tests::sqlToString::snowflake::testLevenshteinDistance():Boolean[1] +{ + let sql = toSQLString( + |Person.all() + ->project(p|$p.firstName->levenshteinDistance('John'), 'similarity') + , simpleRelationalMapping, DatabaseType.Snowflake, meta::relational::extension::relationalExtensions()); + + assertEquals('select editdistance("root".FIRSTNAME, \'John\') as "similarity" from personTable as "root"', $sql); +} \ No newline at end of file diff --git a/legend-engine-xts-relationalStore/legend-engine-xt-relationalStore-execution/legend-engine-xt-relationalStore-executionPlan-connection/pom.xml b/legend-engine-xts-relationalStore/legend-engine-xt-relationalStore-execution/legend-engine-xt-relationalStore-executionPlan-connection/pom.xml index a505614eaf2..0525c812bca 100644 --- a/legend-engine-xts-relationalStore/legend-engine-xt-relationalStore-execution/legend-engine-xt-relationalStore-executionPlan-connection/pom.xml +++ b/legend-engine-xts-relationalStore/legend-engine-xt-relationalStore-execution/legend-engine-xt-relationalStore-executionPlan-connection/pom.xml @@ -235,6 +235,13 @@ + + + org.apache.commons + commons-text + + + commons-codec diff --git a/legend-engine-xts-relationalStore/legend-engine-xt-relationalStore-execution/legend-engine-xt-relationalStore-executionPlan-connection/src/main/java/org/finos/legend/engine/plan/execution/stores/relational/LegendH2Extensions.java b/legend-engine-xts-relationalStore/legend-engine-xt-relationalStore-execution/legend-engine-xt-relationalStore-executionPlan-connection/src/main/java/org/finos/legend/engine/plan/execution/stores/relational/LegendH2Extensions.java index c0d03b3ea44..64105801d39 100644 --- a/legend-engine-xts-relationalStore/legend-engine-xt-relationalStore-execution/legend-engine-xt-relationalStore-executionPlan-connection/src/main/java/org/finos/legend/engine/plan/execution/stores/relational/LegendH2Extensions.java +++ b/legend-engine-xts-relationalStore/legend-engine-xt-relationalStore-execution/legend-engine-xt-relationalStore-executionPlan-connection/src/main/java/org/finos/legend/engine/plan/execution/stores/relational/LegendH2Extensions.java @@ -18,6 +18,8 @@ import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.commons.codec.binary.Base64; import org.apache.commons.lang3.StringUtils; +import org.apache.commons.text.similarity.JaroWinklerSimilarity; +import org.apache.commons.text.similarity.LevenshteinDistance; import org.finos.legend.engine.shared.core.ObjectMapperFactory; import org.h2.tools.SimpleResultSet; import org.h2.value.Value; @@ -309,4 +311,24 @@ else if (value instanceof Integer || value instanceof Long) throw new RuntimeException(e); } } + + public static Value legend_h2_extension_edit_distance(Value string1, Value string2) + { + if (string1 == ValueNull.INSTANCE || string2 == ValueNull.INSTANCE) + { + return ValueNull.INSTANCE; + } + + return ValueInteger.get(new LevenshteinDistance().apply(string1.getString(), string2.getString())); + } + + public static Value legend_h2_extension_jaro_winkler_similarity(Value string1, Value string2) + { + if (string1 == ValueNull.INSTANCE || string2 == ValueNull.INSTANCE) + { + return ValueNull.INSTANCE; + } + + return ValueDouble.get(new JaroWinklerSimilarity().apply(string1.getString(), string2.getString())); + } } diff --git a/legend-engine-xts-relationalStore/legend-engine-xt-relationalStore-execution/legend-engine-xt-relationalStore-executionPlan-connection/src/main/java/org/finos/legend/engine/plan/execution/stores/relational/connection/authentication/strategy/DefaultH2AuthenticationStrategy.java b/legend-engine-xts-relationalStore/legend-engine-xt-relationalStore-execution/legend-engine-xt-relationalStore-executionPlan-connection/src/main/java/org/finos/legend/engine/plan/execution/stores/relational/connection/authentication/strategy/DefaultH2AuthenticationStrategy.java index 8e919a9f625..db24c8d92ed 100644 --- a/legend-engine-xts-relationalStore/legend-engine-xt-relationalStore-execution/legend-engine-xt-relationalStore-executionPlan-connection/src/main/java/org/finos/legend/engine/plan/execution/stores/relational/connection/authentication/strategy/DefaultH2AuthenticationStrategy.java +++ b/legend-engine-xts-relationalStore/legend-engine-xt-relationalStore-execution/legend-engine-xt-relationalStore-executionPlan-connection/src/main/java/org/finos/legend/engine/plan/execution/stores/relational/connection/authentication/strategy/DefaultH2AuthenticationStrategy.java @@ -108,7 +108,9 @@ private static List getLegendH2ExtensionSQLs() "CREATE ALIAS IF NOT EXISTS legend_h2_extension_base64_encode FOR \"org.finos.legend.engine.plan.execution.stores.relational.LegendH2Extensions.legend_h2_extension_base64_encode\";", "CREATE ALIAS IF NOT EXISTS legend_h2_extension_reverse_string FOR \"org.finos.legend.engine.plan.execution.stores.relational.LegendH2Extensions.legend_h2_extension_reverse_string\";", "CREATE ALIAS IF NOT EXISTS legend_h2_extension_flatten_array FOR \"org.finos.legend.engine.plan.execution.stores.relational.LegendH2Extensions.legend_h2_extension_flatten_array\";", - "CREATE ALIAS IF NOT EXISTS legend_h2_extension_split_part FOR \"org.finos.legend.engine.plan.execution.stores.relational.LegendH2Extensions.legend_h2_extension_split_part\";" + "CREATE ALIAS IF NOT EXISTS legend_h2_extension_split_part FOR \"org.finos.legend.engine.plan.execution.stores.relational.LegendH2Extensions.legend_h2_extension_split_part\";", + "CREATE ALIAS IF NOT EXISTS legend_h2_extension_edit_distance FOR \"org.finos.legend.engine.plan.execution.stores.relational.LegendH2Extensions.legend_h2_extension_edit_distance\";", + "CREATE ALIAS IF NOT EXISTS legend_h2_extension_jaro_winkler_similarity FOR \"org.finos.legend.engine.plan.execution.stores.relational.LegendH2Extensions.legend_h2_extension_jaro_winkler_similarity\";" ); } @@ -123,7 +125,9 @@ private static List getLegendH2_1_4_200_ExtensionSQLs() "CREATE ALIAS IF NOT EXISTS legend_h2_extension_hash_md5 FOR \"org.finos.legend.engine.plan.execution.stores.relational.LegendH2Extensions_1_4_200.legend_h2_extension_hash_md5\";", "CREATE ALIAS IF NOT EXISTS legend_h2_extension_hash_sha1 FOR \"org.finos.legend.engine.plan.execution.stores.relational.LegendH2Extensions_1_4_200.legend_h2_extension_hash_sha1\";", "CREATE ALIAS IF NOT EXISTS legend_h2_extension_flatten_array FOR \"org.finos.legend.engine.plan.execution.stores.relational.LegendH2Extensions_1_4_200.legend_h2_extension_flatten_array\";", - "CREATE ALIAS IF NOT EXISTS legend_h2_extension_split_part FOR \"org.finos.legend.engine.plan.execution.stores.relational.LegendH2Extensions_1_4_200.legend_h2_extension_split_part\";" + "CREATE ALIAS IF NOT EXISTS legend_h2_extension_split_part FOR \"org.finos.legend.engine.plan.execution.stores.relational.LegendH2Extensions_1_4_200.legend_h2_extension_split_part\";", + "CREATE ALIAS IF NOT EXISTS legend_h2_extension_edit_distance FOR \"org.finos.legend.engine.plan.execution.stores.relational.LegendH2Extensions_1_4_200.legend_h2_extension_edit_distance\";", + "CREATE ALIAS IF NOT EXISTS legend_h2_extension_jaro_winkler_similarity FOR \"org.finos.legend.engine.plan.execution.stores.relational.LegendH2Extensions_1_4_200.legend_h2_extension_jaro_winkler_similarity\";" ); } } diff --git a/legend-engine-xts-relationalStore/legend-engine-xt-relationalStore-execution/legend-engine-xt-relationalStore-h2-1.4.200-execution/pom.xml b/legend-engine-xts-relationalStore/legend-engine-xt-relationalStore-execution/legend-engine-xt-relationalStore-h2-1.4.200-execution/pom.xml index fbc7d73254c..5764f4cd9f9 100644 --- a/legend-engine-xts-relationalStore/legend-engine-xt-relationalStore-execution/legend-engine-xt-relationalStore-h2-1.4.200-execution/pom.xml +++ b/legend-engine-xts-relationalStore/legend-engine-xt-relationalStore-execution/legend-engine-xt-relationalStore-h2-1.4.200-execution/pom.xml @@ -68,5 +68,12 @@ + + + org.apache.commons + commons-text + + + diff --git a/legend-engine-xts-relationalStore/legend-engine-xt-relationalStore-execution/legend-engine-xt-relationalStore-h2-1.4.200-execution/src/main/java/org/finos/legend/engine/plan/execution/stores/relational/LegendH2Extensions_1_4_200.java b/legend-engine-xts-relationalStore/legend-engine-xt-relationalStore-execution/legend-engine-xt-relationalStore-h2-1.4.200-execution/src/main/java/org/finos/legend/engine/plan/execution/stores/relational/LegendH2Extensions_1_4_200.java index 22e2a1d1101..19aa050c097 100644 --- a/legend-engine-xts-relationalStore/legend-engine-xt-relationalStore-execution/legend-engine-xt-relationalStore-h2-1.4.200-execution/src/main/java/org/finos/legend/engine/plan/execution/stores/relational/LegendH2Extensions_1_4_200.java +++ b/legend-engine-xts-relationalStore/legend-engine-xt-relationalStore-execution/legend-engine-xt-relationalStore-h2-1.4.200-execution/src/main/java/org/finos/legend/engine/plan/execution/stores/relational/LegendH2Extensions_1_4_200.java @@ -19,6 +19,8 @@ import org.apache.commons.codec.binary.Base64; import org.apache.commons.lang3.StringUtils; import org.apache.commons.codec.digest.DigestUtils; +import org.apache.commons.text.similarity.JaroWinklerSimilarity; +import org.apache.commons.text.similarity.LevenshteinDistance; import org.finos.legend.engine.shared.core.ObjectMapperFactory; import org.h2.tools.SimpleResultSet; import org.h2.value.Value; @@ -321,4 +323,24 @@ else if (value instanceof Integer || value instanceof Long) throw new RuntimeException(e); } } + + public static Value legend_h2_extension_edit_distance(Value string1, Value string2) + { + if (string1 == ValueNull.INSTANCE || string2 == ValueNull.INSTANCE) + { + return ValueNull.INSTANCE; + } + + return ValueInt.get(new LevenshteinDistance().apply(string1.getString(), string2.getString())); + } + + public static Value legend_h2_extension_jaro_winkler_similarity(Value string1, Value string2) + { + if (string1 == ValueNull.INSTANCE || string2 == ValueNull.INSTANCE) + { + return ValueNull.INSTANCE; + } + + return ValueDouble.get(new JaroWinklerSimilarity().apply(string1.getString(), string2.getString())); + } } diff --git a/legend-engine-xts-relationalStore/legend-engine-xt-relationalStore-generation/legend-engine-xt-relationalStore-pure/src/main/resources/core_relational/relational/pureToSQLQuery/pureToSQLQuery.pure b/legend-engine-xts-relationalStore/legend-engine-xt-relationalStore-generation/legend-engine-xt-relationalStore-pure/src/main/resources/core_relational/relational/pureToSQLQuery/pureToSQLQuery.pure index fec05cb48cd..a25d326edc2 100644 --- a/legend-engine-xts-relationalStore/legend-engine-xt-relationalStore-generation/legend-engine-xt-relationalStore-pure/src/main/resources/core_relational/relational/pureToSQLQuery/pureToSQLQuery.pure +++ b/legend-engine-xts-relationalStore/legend-engine-xt-relationalStore-generation/legend-engine-xt-relationalStore-pure/src/main/resources/core_relational/relational/pureToSQLQuery/pureToSQLQuery.pure @@ -8227,6 +8227,8 @@ function meta::relational::functions::pureToSqlQuery::getSupportedFunctions():Ma ^PureFunctionToRelationalFunctionPair(first=meta::pure::functions::collection::isDistinct_T_MANY__Boolean_1_, second=meta::relational::functions::pureToSqlQuery::processAggregation_FunctionExpression_1__PropertyMapping_MANY__SelectWithCursor_1__Map_1__State_1__JoinType_1__String_1__List_1__DebugContext_1__Extension_MANY__RelationalOperationElement_1_), ^PureFunctionToRelationalFunctionPair(first=meta::pure::mutation::save_T_MANY__RootGraphFetchTree_1__Mapping_1__Runtime_1__T_MANY_, second=meta::relational::functions::pureToSqlQuery::processNoOp_FunctionExpression_1__PropertyMapping_MANY__SelectWithCursor_1__Map_1__State_1__JoinType_1__String_1__List_1__DebugContext_1__Extension_MANY__RelationalOperationElement_1_), ^PureFunctionToRelationalFunctionPair(first=meta::pure::functions::hash::hash_String_1__HashType_1__String_1_, second=meta::relational::functions::pureToSqlQuery::processHash_FunctionExpression_1__PropertyMapping_MANY__SelectWithCursor_1__Map_1__State_1__JoinType_1__String_1__List_1__DebugContext_1__Extension_MANY__RelationalOperationElement_1_), + ^PureFunctionToRelationalFunctionPair(first=meta::pure::functions::string::jaroWinklerSimilarity_String_1__String_1__Float_1_, second=meta::relational::functions::pureToSqlQuery::processDynaFunction_FunctionExpression_1__PropertyMapping_MANY__SelectWithCursor_1__Map_1__State_1__JoinType_1__String_1__List_1__DebugContext_1__Extension_MANY__RelationalOperationElement_1_), + ^PureFunctionToRelationalFunctionPair(first=meta::pure::functions::string::levenshteinDistance_String_1__String_1__Integer_1_, second=meta::relational::functions::pureToSqlQuery::processDynaFunction_FunctionExpression_1__PropertyMapping_MANY__SelectWithCursor_1__Map_1__State_1__JoinType_1__String_1__List_1__DebugContext_1__Extension_MANY__RelationalOperationElement_1_), ^PureFunctionToRelationalFunctionPair(first=meta::pure::functions::relation::filter_Relation_1__Function_1__Relation_1_, second=meta::relational::functions::pureToSqlQuery::processTdsFilter_FunctionExpression_1__PropertyMapping_MANY__SelectWithCursor_1__Map_1__State_1__JoinType_1__String_1__List_1__DebugContext_1__Extension_MANY__RelationalOperationElement_1_), ^PureFunctionToRelationalFunctionPair(first=meta::pure::functions::relation::distinct_Relation_1__ColSpecArray_1__Relation_1_, second=meta::relational::functions::pureToSqlQuery::processDistinct_FunctionExpression_1__PropertyMapping_MANY__SelectWithCursor_1__Map_1__State_1__JoinType_1__String_1__List_1__DebugContext_1__Extension_MANY__RelationalOperationElement_1_), diff --git a/legend-engine-xts-relationalStore/legend-engine-xt-relationalStore-generation/legend-engine-xt-relationalStore-pure/src/main/resources/core_relational/relational/relationalExtension.pure b/legend-engine-xts-relationalStore/legend-engine-xt-relationalStore-generation/legend-engine-xt-relationalStore-pure/src/main/resources/core_relational/relational/relationalExtension.pure index 2dc85f6d55a..a8dbf0b2002 100644 --- a/legend-engine-xts-relationalStore/legend-engine-xt-relationalStore-generation/legend-engine-xt-relationalStore-pure/src/main/resources/core_relational/relational/relationalExtension.pure +++ b/legend-engine-xts-relationalStore/legend-engine-xt-relationalStore-generation/legend-engine-xt-relationalStore-pure/src/main/resources/core_relational/relational/relationalExtension.pure @@ -848,6 +848,16 @@ function <> meta::relational::functions::typeInference::getDynaF ]) ), + pair( + 'jaroWinklerSimilarity', + list([ + pair( + {params: RelationalOperationElement[*] | true}, + {params: RelationalOperationElement[*] | ^meta::relational::metamodel::datatype::Float()} + ) + ]) + ), + pair( 'joinStrings', list([ @@ -908,6 +918,16 @@ function <> meta::relational::functions::typeInference::getDynaF ]) ), + pair( + 'levenshteinDistance', + list([ + pair( + {params: RelationalOperationElement[*] | true}, + {params: RelationalOperationElement[*] | ^meta::relational::metamodel::datatype::Integer()} + ) + ]) + ), + pair( 'log', list([ diff --git a/legend-engine-xts-relationalStore/legend-engine-xt-relationalStore-generation/legend-engine-xt-relationalStore-pure/src/main/resources/core_relational/relational/sqlQueryToString/dbExtension.pure b/legend-engine-xts-relationalStore/legend-engine-xt-relationalStore-generation/legend-engine-xt-relationalStore-pure/src/main/resources/core_relational/relational/sqlQueryToString/dbExtension.pure index 755ac1699cf..916d6d09d98 100644 --- a/legend-engine-xts-relationalStore/legend-engine-xt-relationalStore-generation/legend-engine-xt-relationalStore-pure/src/main/resources/core_relational/relational/sqlQueryToString/dbExtension.pure +++ b/legend-engine-xts-relationalStore/legend-engine-xt-relationalStore-generation/legend-engine-xt-relationalStore-pure/src/main/resources/core_relational/relational/sqlQueryToString/dbExtension.pure @@ -996,6 +996,8 @@ Enum meta::relational::functions::sqlQueryToString::DynaFunctionRegistry isNull, isNumeric, joinStrings, + jaroWinklerSimilarity, + levenshteinDistance, least, left, length, diff --git a/legend-engine-xts-relationalStore/legend-engine-xt-relationalStore-generation/legend-engine-xt-relationalStore-pure/src/main/resources/core_relational/relational/sqlQueryToString/dbSpecific/h2/h2Extension1_4_200.pure b/legend-engine-xts-relationalStore/legend-engine-xt-relationalStore-generation/legend-engine-xt-relationalStore-pure/src/main/resources/core_relational/relational/sqlQueryToString/dbSpecific/h2/h2Extension1_4_200.pure index 2534f507100..e8683c9b3ac 100644 --- a/legend-engine-xts-relationalStore/legend-engine-xt-relationalStore-generation/legend-engine-xt-relationalStore-pure/src/main/resources/core_relational/relational/sqlQueryToString/dbSpecific/h2/h2Extension1_4_200.pure +++ b/legend-engine-xts-relationalStore/legend-engine-xt-relationalStore-generation/legend-engine-xt-relationalStore-pure/src/main/resources/core_relational/relational/sqlQueryToString/dbSpecific/h2/h2Extension1_4_200.pure @@ -102,8 +102,10 @@ function <> meta::relational::functions::sqlQueryToString::h2::v dynaFnToSql('indexOf', $allStates, ^ToSql(format='LOCATE(%s)', transform={p:String[2] | $p->at(1) + ', ' + $p->at(0)})), dynaFnToSql('isNumeric', $allStates, ^ToSql(format='(lower(%s) = upper(%s))')), dynaFnToSql('isAlphaNumeric', $allStates, ^ToSql(format=regexpPattern('%s'), transform={p:String[1]|$p->transformAlphaNumericParamsDefault()})), + dynaFnToSql('jaroWinklerSimilarity', $allStates, ^ToSql(format='legend_h2_extension_jaro_winkler_similarity(%s, %s)')), dynaFnToSql('joinStrings', $allStates, ^ToSql(format='group_concat(%s separator %s)')), dynaFnToSql('length', $allStates, ^ToSql(format='char_length(%s)')), + dynaFnToSql('levenshteinDistance', $allStates, ^ToSql(format='legend_h2_extension_edit_distance(%s, %s)')), dynaFnToSql('matches', $allStates, ^ToSql(format=regexpPattern('%s'), transform={p:String[2]|$p->transformRegexpParams()})), dynaFnToSql('md5', $allStates, ^ToSql(format='legend_h2_extension_hash_md5(%s)')), dynaFnToSql('minute', $allStates, ^ToSql(format='minute(%s)')), diff --git a/legend-engine-xts-relationalStore/legend-engine-xt-relationalStore-generation/legend-engine-xt-relationalStore-pure/src/main/resources/core_relational/relational/sqlQueryToString/dbSpecific/h2/h2Extension2_1_214.pure b/legend-engine-xts-relationalStore/legend-engine-xt-relationalStore-generation/legend-engine-xt-relationalStore-pure/src/main/resources/core_relational/relational/sqlQueryToString/dbSpecific/h2/h2Extension2_1_214.pure index 454990928e0..d219409af51 100644 --- a/legend-engine-xts-relationalStore/legend-engine-xt-relationalStore-generation/legend-engine-xt-relationalStore-pure/src/main/resources/core_relational/relational/sqlQueryToString/dbSpecific/h2/h2Extension2_1_214.pure +++ b/legend-engine-xts-relationalStore/legend-engine-xt-relationalStore-generation/legend-engine-xt-relationalStore-pure/src/main/resources/core_relational/relational/sqlQueryToString/dbSpecific/h2/h2Extension2_1_214.pure @@ -215,8 +215,10 @@ function <> meta::relational::functions::sqlQueryToString::h2::v dynaFnToSql('indexOf', $allStates, ^ToSql(format='LOCATE(%s)', transform={p:String[2] | $p->at(1) + ', ' + $p->at(0)})), dynaFnToSql('isNumeric', $allStates, ^ToSql(format='(lower(%s) = upper(%s))')), dynaFnToSql('isAlphaNumeric', $allStates, ^ToSql(format=regexpPattern('%s'), transform={p:String[1]|$p->transformAlphaNumericParamsDefault()})), + dynaFnToSql('jaroWinklerSimilarity', $allStates, ^ToSql(format='legend_h2_extension_jaro_winkler_similarity(%s, %s)')), dynaFnToSql('joinStrings', $allStates, ^ToSql(format='group_concat(%s separator %s)')), dynaFnToSql('length', $allStates, ^ToSql(format='char_length(%s)')), + dynaFnToSql('levenshteinDistance', $allStates, ^ToSql(format='legend_h2_extension_edit_distance(%s, %s)')), dynaFnToSql('matches', $allStates, ^ToSql(format=regexpPattern('%s'), transform={p:String[2]|$p->transformRegexpParams()})), dynaFnToSql('md5', $allStates, ^ToSql(format='rawtohex(hash(\'MD5\', %s))')), dynaFnToSql('minute', $allStates, ^ToSql(format='minute(%s)')), diff --git a/legend-engine-xts-relationalStore/legend-engine-xt-relationalStore-generation/legend-engine-xt-relationalStore-pure/src/main/resources/core_relational/relational/sqlQueryToString/testSuite/dynaFunctions/string.pure b/legend-engine-xts-relationalStore/legend-engine-xt-relationalStore-generation/legend-engine-xt-relationalStore-pure/src/main/resources/core_relational/relational/sqlQueryToString/testSuite/dynaFunctions/string.pure index 0f73ea1e6ed..6adebb03cd7 100644 --- a/legend-engine-xts-relationalStore/legend-engine-xt-relationalStore-generation/legend-engine-xt-relationalStore-pure/src/main/resources/core_relational/relational/sqlQueryToString/testSuite/dynaFunctions/string.pure +++ b/legend-engine-xts-relationalStore/legend-engine-xt-relationalStore-generation/legend-engine-xt-relationalStore-pure/src/main/resources/core_relational/relational/sqlQueryToString/testSuite/dynaFunctions/string.pure @@ -369,4 +369,18 @@ function <> meta::relational::tests::dbSpecificTests::sqlQueryTe let dynaFunc = ^DynaFunction(name='ascii', parameters=[^Literal(value='a')]); let expected = ^Literal(value=97); runDynaFunctionDatabaseTest($dynaFunc, $expected, $config); +} + +function <> meta::relational::tests::dbSpecificTests::sqlQueryTests::dynaFunctions::jaroWinklerSimilarity::testJaroWinklerSimilarity(config:DbTestConfig[1]):Boolean[1] +{ + let dynaFunc = ^DynaFunction(name='jaroWinklerSimilarity', parameters=[^Literal(value='John Smith'), ^Literal(value = 'Jane Smith')]); + let expected = ^Literal(value=0.88); + runDynaFunctionDatabaseTest($dynaFunc, $expected, $config); +} + +function <> meta::relational::tests::dbSpecificTests::sqlQueryTests::dynaFunctions::levenshteinDistance::testLevenshteinDistance(config:DbTestConfig[1]):Boolean[1] +{ + let dynaFunc = ^DynaFunction(name='levenshteinDistance', parameters=[^Literal(value='John Smith'), ^Literal(value = 'Jane Smith')]); + let expected = ^Literal(value=3); + runDynaFunctionDatabaseTest($dynaFunc, $expected, $config); } \ No newline at end of file diff --git a/pom.xml b/pom.xml index c0d17f5365e..e200da9da6c 100644 --- a/pom.xml +++ b/pom.xml @@ -106,7 +106,7 @@ - 5.6.1 + 5.7.0 0.25.4