From 759067c45f92a72dac432a6d92fc760e19dfcfca Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Mon, 23 Sep 2024 23:05:21 +0200 Subject: [PATCH 01/12] some initial thoughts --- R/Dictionary.R | 3 +++ R/dictionary_sugar.R | 15 +++++++++++---- R/did_you_mean.R | 12 ++++++++++++ 3 files changed, 26 insertions(+), 4 deletions(-) diff --git a/R/Dictionary.R b/R/Dictionary.R index 507a19fc..97ca6721 100644 --- a/R/Dictionary.R +++ b/R/Dictionary.R @@ -177,15 +177,18 @@ Dictionary = R6::R6Class("Dictionary", ) ) +# ADD .dicts_suggest dictionary_get = function(self, key, ...) { obj = dictionary_retrieve_item(self, key) dots = assert_list(list(...), names = "unique", .var.name = "arguments passed to Dictionary") dictionary_initialize_item(key, obj, dots) } +# ADD .dicts_suggest dictionary_retrieve_item = function(self, key) { obj = get0(key, envir = self$items, inherits = FALSE, ifnotfound = NULL) if (is.null(obj)) { + # ADD did_you_mean_dicts here? stopf("Element with key '%s' not found in %s!%s", key, class(self)[1L], did_you_mean(key, self$keys())) } obj diff --git a/R/dictionary_sugar.R b/R/dictionary_sugar.R index b34567ed..562a3e9d 100644 --- a/R/dictionary_sugar.R +++ b/R/dictionary_sugar.R @@ -32,18 +32,22 @@ #' d = Dictionary$new() #' d$add("key", item) #' dictionary_sugar_get(d, "key", x = 2) -dictionary_sugar_get = function(dict, .key, ...) { +dictionary_sugar_get = function(dict, .key, .dicts_suggest, ...) { assert_class(dict, "Dictionary") if (missing(.key)) { return(dict) } assert_string(.key) + # ASSERT .dicts_suggest + # either class Dictionary or list of Dictionaries? if (...length() == 0L) { + # ADD .dicts_suggest return(dictionary_get(dict, .key)) } dots = assert_list(list(...), .var.name = "additional arguments passed to Dictionary") assert_list(dots[!is.na(names2(dots))], names = "unique", .var.name = "named arguments passed to Dictionary") + # ADD .dicts_suggest obj = dictionary_retrieve_item(dict, .key) if (length(dots) == 0L) { return(assert_r6(dictionary_initialize_item(.key, obj))) @@ -55,7 +59,6 @@ dictionary_sugar_get = function(dict, .key, ...) { instance = assert_r6(dictionary_initialize_item(.key, obj, dots[ii])) dots = dots[!ii] - # set params in ParamSet if (length(dots) && exists("param_set", envir = instance, inherits = FALSE)) { param_ids = instance$param_set$ids() @@ -94,6 +97,7 @@ dictionary_sugar_mget = function(dict, .keys, ...) { if (missing(.keys)) { return(dict) } + # ADD .dicts_suggest objs = lapply(.keys, dictionary_sugar_get, dict = dict, ...) if (!is.null(names(.keys))) { nn = names2(.keys) @@ -132,10 +136,10 @@ fields = function(x) { #' @title A Quick Way to Initialize Objects from Dictionaries with Incremented ID #' #' @description -#' Covenience wrapper around [dictionary_sugar_get] and [dictionary_sugar_mget] to allow easier avoidance of of ID +#' Covenience wrapper around [dictionary_sugar_get] and [dictionary_sugar_mget] to allow easier avoidance of ID #' clashes which is useful when the same object is used multiple times and the ids have to be unique. #' Let `` be the key of the object to retrieve. When passing the `_` to this -#' function, where `` is any natural numer, the object with key `` is retrieved and the +#' function, where `` is any natural number, the object with key `` is retrieved and the #' suffix `_` is appended to the id after the object is constructed. #' #' @param dict ([Dictionary])\cr @@ -166,12 +170,14 @@ fields = function(x) { dictionary_sugar_inc_get = function(dict, .key, ...) { m = regexpr("_\\d+$", .key) if (attr(m, "match.length") == -1L) { + # ADD .dicts_suggest return(dictionary_sugar_get(dict = dict, .key = .key, ...)) } assert_true(!methods::hasArg("id")) split = regmatches(.key, m, invert = NA)[[1L]] newkey = split[[1L]] suffix = split[[2L]] + # ADD .dicts_suggest obj = dictionary_sugar_get(dict = dict, .key = newkey, ...) obj$id = paste0(obj$id, suffix) obj @@ -181,6 +187,7 @@ dictionary_sugar_inc_get = function(dict, .key, ...) { #' @rdname dictionary_sugar_inc_get #' @export dictionary_sugar_inc_mget = function(dict, .keys, ...) { + # ADD .dicts_suggest objs = lapply(.keys, dictionary_sugar_inc_get, dict = dict, ...) if (!is.null(names(.keys))) { nn = names2(.keys) diff --git a/R/did_you_mean.R b/R/did_you_mean.R index 7e8d9ca4..c7fdf1be 100644 --- a/R/did_you_mean.R +++ b/R/did_you_mean.R @@ -22,3 +22,15 @@ did_you_mean = function(str, candidates) { } sprintf(" Did you mean %s?", str_collapse(suggested, quote = "'", sep = " / ")) } + +# write new function: did_you_mean_dicts +# extracts keys from dicts +# passes this to same logic as did_you_mean (can't call directioly, bc of the "Did you mean?" everytime) +# Wraps results according to the dictionary, i.e. "po(%s)" or some such +# Returns string with full message +did_you_mean_dicts = function(key, dicts) { + # ASSERTIONS + # maybe not necessary; did_you_mean doesn't and should be checked higher up anyway? + + +} From c7dbf64a032cea9f1ad69891b70706c09934cda2 Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Mon, 30 Sep 2024 23:46:08 +0200 Subject: [PATCH 02/12] implemented did_you_mean_dicts --- R/Dictionary.R | 13 +++++----- R/dictionary_sugar.R | 31 ++++++++++------------- R/did_you_mean.R | 59 ++++++++++++++++++++++++++++++++++++-------- 3 files changed, 68 insertions(+), 35 deletions(-) diff --git a/R/Dictionary.R b/R/Dictionary.R index 97ca6721..88b3c021 100644 --- a/R/Dictionary.R +++ b/R/Dictionary.R @@ -177,19 +177,18 @@ Dictionary = R6::R6Class("Dictionary", ) ) -# ADD .dicts_suggest -dictionary_get = function(self, key, ...) { - obj = dictionary_retrieve_item(self, key) +dictionary_get = function(self, key, ..., .dicts_suggest) { + obj = dictionary_retrieve_item(self, key, .dicts_suggest = .dicts_suggest) dots = assert_list(list(...), names = "unique", .var.name = "arguments passed to Dictionary") dictionary_initialize_item(key, obj, dots) } -# ADD .dicts_suggest -dictionary_retrieve_item = function(self, key) { +dictionary_retrieve_item = function(self, key, dicts_suggest) { obj = get0(key, envir = self$items, inherits = FALSE, ifnotfound = NULL) if (is.null(obj)) { - # ADD did_you_mean_dicts here? - stopf("Element with key '%s' not found in %s!%s", key, class(self)[1L], did_you_mean(key, self$keys())) + stopf("Element with key '%s' not found in %s!%s%s", key, class(self)[1L], + did_you_mean(key, self$keys()), + did_you_mean_dicts(key, dicts_suggest)) } obj } diff --git a/R/dictionary_sugar.R b/R/dictionary_sugar.R index 562a3e9d..2d675cc0 100644 --- a/R/dictionary_sugar.R +++ b/R/dictionary_sugar.R @@ -24,6 +24,8 @@ #' Keys of the objects to construct. #' @param ... (`any`)\cr #' See description. +#' @param .dicts_suggest (named [`list`]) +#' Named list of [dictionaries][Dictionary] used to look up suggestions for `.key` in cases of misspelling. #' @return [R6::R6Class()] #' @export #' @examples @@ -32,23 +34,20 @@ #' d = Dictionary$new() #' d$add("key", item) #' dictionary_sugar_get(d, "key", x = 2) -dictionary_sugar_get = function(dict, .key, .dicts_suggest, ...) { +dictionary_sugar_get = function(dict, .key, ..., .dicts_suggest = NULL) { assert_class(dict, "Dictionary") if (missing(.key)) { return(dict) } assert_string(.key) - # ASSERT .dicts_suggest - # either class Dictionary or list of Dictionaries? + assert_list(.dicts_suggest, "Dictionary", any.missing = FALSE, min.len = 1, unique = TRUE, names = "named", null.ok = TRUE) if (...length() == 0L) { - # ADD .dicts_suggest - return(dictionary_get(dict, .key)) + return(dictionary_get(dict, .key, .dicts_suggest = .dicts_suggest)) } dots = assert_list(list(...), .var.name = "additional arguments passed to Dictionary") assert_list(dots[!is.na(names2(dots))], names = "unique", .var.name = "named arguments passed to Dictionary") - # ADD .dicts_suggest - obj = dictionary_retrieve_item(dict, .key) + obj = dictionary_retrieve_item(dict, .key, .dicts_suggest) if (length(dots) == 0L) { return(assert_r6(dictionary_initialize_item(.key, obj))) } @@ -93,12 +92,11 @@ dictionary_sugar = dictionary_sugar_get #' @rdname dictionary_sugar_get #' @export -dictionary_sugar_mget = function(dict, .keys, ...) { +dictionary_sugar_mget = function(dict, .keys, ..., .dicts_suggest = NULL) { if (missing(.keys)) { return(dict) } - # ADD .dicts_suggest - objs = lapply(.keys, dictionary_sugar_get, dict = dict, ...) + objs = lapply(.keys, dictionary_sugar_get, dict = dict, .dicts_suggest = .dicts_suggest, ...) if (!is.null(names(.keys))) { nn = names2(.keys) ii = which(!is.na(nn)) @@ -167,28 +165,25 @@ fields = function(x) { #' map(objs, "id") #' #' @export -dictionary_sugar_inc_get = function(dict, .key, ...) { +dictionary_sugar_inc_get = function(dict, .key, ..., .dicts_suggest = NULL) { m = regexpr("_\\d+$", .key) if (attr(m, "match.length") == -1L) { - # ADD .dicts_suggest - return(dictionary_sugar_get(dict = dict, .key = .key, ...)) + return(dictionary_sugar_get(dict = dict, .key = .key, ..., .dicts_suggest = .dicts_suggest)) } assert_true(!methods::hasArg("id")) split = regmatches(.key, m, invert = NA)[[1L]] newkey = split[[1L]] suffix = split[[2L]] - # ADD .dicts_suggest - obj = dictionary_sugar_get(dict = dict, .key = newkey, ...) + obj = dictionary_sugar_get(dict = dict, .key = newkey, ..., .dicts_suggest = .dicts_suggest) obj$id = paste0(obj$id, suffix) obj - } #' @rdname dictionary_sugar_inc_get #' @export -dictionary_sugar_inc_mget = function(dict, .keys, ...) { +dictionary_sugar_inc_mget = function(dict, .keys, ..., .dicts_suggest = NULL) { # ADD .dicts_suggest - objs = lapply(.keys, dictionary_sugar_inc_get, dict = dict, ...) + objs = lapply(.keys, dictionary_sugar_inc_get, dict = dict, ..., .dicts_suggest = .dicts_suggest) if (!is.null(names(.keys))) { nn = names2(.keys) ii = which(!is.na(nn)) diff --git a/R/did_you_mean.R b/R/did_you_mean.R index c7fdf1be..15700d1f 100644 --- a/R/did_you_mean.R +++ b/R/did_you_mean.R @@ -13,9 +13,7 @@ #' @examples #' did_you_mean("yep", c("yes", "no")) did_you_mean = function(str, candidates) { - candidates = unique(candidates) - D = set_names(adist(str, candidates, ignore.case = TRUE, partial = TRUE)[1L, ], candidates) - suggested = names(head(sort(D[D <= ceiling(0.2 * nchar(str))]), 3L)) + suggested = find_suggested(str, candidates, threshold = 0.2) if (!length(suggested)) { return("") @@ -23,14 +21,55 @@ did_you_mean = function(str, candidates) { sprintf(" Did you mean %s?", str_collapse(suggested, quote = "'", sep = " / ")) } -# write new function: did_you_mean_dicts -# extracts keys from dicts -# passes this to same logic as did_you_mean (can't call directioly, bc of the "Did you mean?" everytime) -# Wraps results according to the dictionary, i.e. "po(%s)" or some such -# Returns string with full message +#' @title Suggest Alternatives from Given Dictionaries +#' +#' @description +#' Helps to suggest alternatives for a given key based on the keys of given dictionaries. +#' +#' @param key (`character(1)`) \cr +#' Key to look for in `dicts`. +#' @param dicts (named list)\cr +#' Named list of [dictionaries][Dictionary]. +#' @return (`character(1)`). Either a phrase suggesting one or more keys based on the dictionaries in `dicts`, +#' or an empty string if no close match is found. did_you_mean_dicts = function(key, dicts) { - # ASSERTIONS - # maybe not necessary; did_you_mean doesn't and should be checked higher up anyway? + if (is.null(dicts)) { + return("") + } + # Iterate through dicts, get suggestions, paste as messages + suggested = character(length(dicts)) + for (i in seq_along(dicts)) { + entries = find_suggested(key, dicts[[i]]$keys()) + + if (length(entries)) { + suggested[[i]] = sprintf("%s: %s", names(dicts)[[i]], + str_collapse(entries, quote = "'", sep = " / ")) + } + } + # Drop elements for dicts for which no suggestions could be made + suggested = suggested[nchar(suggested) > 0L] + + if (!length(suggested)) { + return("") + } + sprintf(" Similar entries in other dictionaries, %s?", str_collapse(suggested, sep = " or ")) + # TODO: handle ordering for exact hits (order dicts approriately?) + # TODO: maximum number of suggestions (within dict is handled by find_suggested, but not if we are looking at many dicts) + # TODO: Tests +} +#' @title Find Suggestions +#' +#' @param str (`character(1)`)\cr +#' String. +#' @param candidates (`character()`)\cr +#' Candidate strings. +#' @param threshold (`numeric(1)`)\cr +#' Percentage value of characters when sorting `candidates` by distance +#' @return (`character(1)`). Either suggested candidates from `candidates` or an empty string if no close match is found. +find_suggested = function(str, candidates, threshold = 0.2) { + candidates = unique(candidates) + D = set_names(adist(str, candidates, ignore.case = TRUE, partial = TRUE)[1L, ], candidates) + names(head(sort(D[D <= ceiling(threshold * nchar(str))]), 3L)) } From e335493ed8e03441707d28d5cd65c9879a3fcba0 Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Mon, 30 Sep 2024 23:48:02 +0200 Subject: [PATCH 03/12] remove comment --- R/dictionary_sugar.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/dictionary_sugar.R b/R/dictionary_sugar.R index 2d675cc0..60e4e7f0 100644 --- a/R/dictionary_sugar.R +++ b/R/dictionary_sugar.R @@ -182,7 +182,6 @@ dictionary_sugar_inc_get = function(dict, .key, ..., .dicts_suggest = NULL) { #' @rdname dictionary_sugar_inc_get #' @export dictionary_sugar_inc_mget = function(dict, .keys, ..., .dicts_suggest = NULL) { - # ADD .dicts_suggest objs = lapply(.keys, dictionary_sugar_inc_get, dict = dict, ..., .dicts_suggest = .dicts_suggest) if (!is.null(names(.keys))) { nn = names2(.keys) From 85596888c32c0277cfa5bbcfc73f90b93928de2a Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Tue, 1 Oct 2024 18:04:31 +0200 Subject: [PATCH 04/12] feat: ordering of dictionaries + simple tests --- R/Dictionary.R | 3 +- R/did_you_mean.R | 60 ++++++++++++++++++++++---------- tests/testthat/test_Dictionary.R | 21 +++++++++++ 3 files changed, 64 insertions(+), 20 deletions(-) diff --git a/R/Dictionary.R b/R/Dictionary.R index 88b3c021..5fc0cd94 100644 --- a/R/Dictionary.R +++ b/R/Dictionary.R @@ -178,7 +178,7 @@ Dictionary = R6::R6Class("Dictionary", ) dictionary_get = function(self, key, ..., .dicts_suggest) { - obj = dictionary_retrieve_item(self, key, .dicts_suggest = .dicts_suggest) + obj = dictionary_retrieve_item(self, key, .dicts_suggest) dots = assert_list(list(...), names = "unique", .var.name = "arguments passed to Dictionary") dictionary_initialize_item(key, obj, dots) } @@ -209,7 +209,6 @@ dictionary_initialize_item = function(key, obj, cargs = list()) { } } - #' @export as.data.table.Dictionary = function(x, ...) { setkeyv(as.data.table(list(key = x$keys())), "key")[] diff --git a/R/did_you_mean.R b/R/did_you_mean.R index 15700d1f..167dc8a6 100644 --- a/R/did_you_mean.R +++ b/R/did_you_mean.R @@ -13,12 +13,12 @@ #' @examples #' did_you_mean("yep", c("yes", "no")) did_you_mean = function(str, candidates) { - suggested = find_suggested(str, candidates, threshold = 0.2) + suggestions = find_suggestions(str, candidates, threshold = 0.2, max_candidates = 3L, ret_dist = FALSE) - if (!length(suggested)) { + if (!length(suggestions)) { return("") } - sprintf(" Did you mean %s?", str_collapse(suggested, quote = "'", sep = " / ")) + sprintf(" Did you mean %s?", str_collapse(suggestions, quote = "'", sep = " / ")) } #' @title Suggest Alternatives from Given Dictionaries @@ -36,27 +36,42 @@ did_you_mean_dicts = function(key, dicts) { if (is.null(dicts)) { return("") } - # Iterate through dicts, get suggestions, paste as messages - suggested = character(length(dicts)) + + # Initialize variables to store suggestions and minimum distances + suggestions = character(length(dicts)) + min_distance_per_dict = numeric(length(dicts)) + for (i in seq_along(dicts)) { - entries = find_suggested(key, dicts[[i]]$keys()) + # Get distances and the corresponding entries for the current dictionary + distances = find_suggestions(key, dicts[[i]]$keys(), ret_dist = TRUE) + entries = names(distances) - if (length(entries)) { - suggested[[i]] = sprintf("%s: %s", names(dicts)[[i]], - str_collapse(entries, quote = "'", sep = " / ")) + # Handle the case of no matches: skip the dictionary + if (!length(entries)) { + min_distance_per_dict[[i]] = NA + next } + # Record the closest distance + min_distance_per_dict[[i]] = min(distances) + + # Create a suggestion message for the current dictionary + suggestions[[i]] = sprintf("%s: %s", names(dicts)[[i]], + str_collapse(entries, quote = "'", sep = " / ")) } - # Drop elements for dicts for which no suggestions could be made - suggested = suggested[nchar(suggested) > 0L] - if (!length(suggested)) { + # Order the suggestions by their closest match + suggestions = suggestions[order(min_distance_per_dict)] + # Remove empty suggestions (i.e., dictionaries with no close matches) + valid_suggestions = suggestions[nchar(suggestions) > 0L] + # Only show 3 dictionaries with best matches + # valid_suggestions = head(valid_suggestions, 3L) + + # If no valid suggestions, return an empty string + if (!length(valid_suggestions)) { return("") } - sprintf(" Similar entries in other dictionaries, %s?", str_collapse(suggested, sep = " or ")) - # TODO: handle ordering for exact hits (order dicts approriately?) - # TODO: maximum number of suggestions (within dict is handled by find_suggested, but not if we are looking at many dicts) - # TODO: Tests + sprintf("\nSimilar entries in other dictionaries, %s.", str_collapse(valid_suggestions, sep = ", or ")) } #' @title Find Suggestions @@ -67,9 +82,18 @@ did_you_mean_dicts = function(key, dicts) { #' Candidate strings. #' @param threshold (`numeric(1)`)\cr #' Percentage value of characters when sorting `candidates` by distance +#' @param max_candidates (`integer(1)`)\cr +#' Maximum number of candidates to return. +#' @param ret_similarity (`logical(1)`)\cr +#' Return similarity values instead of names. #' @return (`character(1)`). Either suggested candidates from `candidates` or an empty string if no close match is found. -find_suggested = function(str, candidates, threshold = 0.2) { +find_suggestions = function(str, candidates, threshold = 0.2, max_candidates = 3L, ret_dist = FALSE) { candidates = unique(candidates) D = set_names(adist(str, candidates, ignore.case = TRUE, partial = TRUE)[1L, ], candidates) - names(head(sort(D[D <= ceiling(threshold * nchar(str))]), 3L)) + sorted = head(sort(D[D <= ceiling(threshold * nchar(str))]), max_candidates) + if (ret_dist) { + sorted + } else { + names(sorted) + } } diff --git a/tests/testthat/test_Dictionary.R b/tests/testthat/test_Dictionary.R index c90c8b57..d8f0fe95 100644 --- a/tests/testthat/test_Dictionary.R +++ b/tests/testthat/test_Dictionary.R @@ -125,3 +125,24 @@ test_that("#115", { d$add("a", function() A$new()) expect_error(dictionary_sugar_get(d, "a", y = 10), "Did you mean") }) + +test_that("similar entries in other dictionaries", { + obj = R6Class("A", public = list(x = NULL)) + + d = Dictionary$new() + d$add("abc", obj) + + d_lookup1 = Dictionary$new() + d_lookup1$add("cde", obj) + + expect_error(dictionary_sugar_get(d, "cde", .dicts_suggest = list("lookup1" = d_lookup1)), "Similar entries in other dictionaries") + + d_lookup2 = Dictionary$new() + d_lookup2$add("bcd", obj) + + # Dictionaries ordered by closest match per dictionary + expect_error( + dictionary_sugar_get(d, "cde", .dicts_suggest = list("lookup1" = d_lookup1, "lookup2" = d_lookup2)), + "Similar entries in other dictionaries.*lookup1.*lookup2" + ) +}) From 2408c9cff0f6084748377809228406b687325b58 Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Tue, 1 Oct 2024 18:41:13 +0200 Subject: [PATCH 05/12] small changes --- R/dictionary_sugar.R | 2 +- R/did_you_mean.R | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/R/dictionary_sugar.R b/R/dictionary_sugar.R index 60e4e7f0..6bef1ff4 100644 --- a/R/dictionary_sugar.R +++ b/R/dictionary_sugar.R @@ -76,7 +76,7 @@ dictionary_sugar_get = function(dict, .key, ..., .dicts_suggest = NULL) { for (i in seq_along(dots)) { nn = ndots[[i]] if (!exists(nn, envir = instance, inherits = FALSE)) { - stopf("Cannot set argument '%s' for '%s' (not a constructor argument, not a parameter, not a field.%s", + stopf("Cannot set argument '%s' for '%s' (not a constructor argument, not a parameter, not a field).%s", nn, class(instance)[1L], did_you_mean(nn, c(constructor_args, param_ids, setdiff(names(instance), ".__enclos_env__")))) # nolint } instance[[nn]] = dots[[i]] diff --git a/R/did_you_mean.R b/R/did_you_mean.R index 167dc8a6..97021714 100644 --- a/R/did_you_mean.R +++ b/R/did_you_mean.R @@ -13,7 +13,7 @@ #' @examples #' did_you_mean("yep", c("yes", "no")) did_you_mean = function(str, candidates) { - suggestions = find_suggestions(str, candidates, threshold = 0.2, max_candidates = 3L, ret_dist = FALSE) + suggestions = find_suggestions(str, candidates, threshold = 0.2, max_candidates = 3L, ret_distances = FALSE) if (!length(suggestions)) { return("") @@ -33,17 +33,17 @@ did_you_mean = function(str, candidates) { #' @return (`character(1)`). Either a phrase suggesting one or more keys based on the dictionaries in `dicts`, #' or an empty string if no close match is found. did_you_mean_dicts = function(key, dicts) { + # No message if no dictionaries are given if (is.null(dicts)) { return("") } - # Initialize variables to store suggestions and minimum distances suggestions = character(length(dicts)) min_distance_per_dict = numeric(length(dicts)) for (i in seq_along(dicts)) { # Get distances and the corresponding entries for the current dictionary - distances = find_suggestions(key, dicts[[i]]$keys(), ret_dist = TRUE) + distances = find_suggestions(key, dicts[[i]]$keys(), ret_distances = TRUE) entries = names(distances) # Handle the case of no matches: skip the dictionary @@ -63,8 +63,8 @@ did_you_mean_dicts = function(key, dicts) { suggestions = suggestions[order(min_distance_per_dict)] # Remove empty suggestions (i.e., dictionaries with no close matches) valid_suggestions = suggestions[nchar(suggestions) > 0L] - # Only show 3 dictionaries with best matches - # valid_suggestions = head(valid_suggestions, 3L) + # Only show the 3 dictionaries with the best matches + valid_suggestions = head(valid_suggestions, 3L) # If no valid suggestions, return an empty string if (!length(valid_suggestions)) { @@ -87,11 +87,11 @@ did_you_mean_dicts = function(key, dicts) { #' @param ret_similarity (`logical(1)`)\cr #' Return similarity values instead of names. #' @return (`character(1)`). Either suggested candidates from `candidates` or an empty string if no close match is found. -find_suggestions = function(str, candidates, threshold = 0.2, max_candidates = 3L, ret_dist = FALSE) { +find_suggestions = function(str, candidates, threshold = 0.2, max_candidates = 3L, ret_distances = FALSE) { candidates = unique(candidates) D = set_names(adist(str, candidates, ignore.case = TRUE, partial = TRUE)[1L, ], candidates) sorted = head(sort(D[D <= ceiling(threshold * nchar(str))]), max_candidates) - if (ret_dist) { + if (ret_distances) { sorted } else { names(sorted) From 4666d5a93896cd7e5372a085302246f60a19a152 Mon Sep 17 00:00:00 2001 From: mb706 Date: Wed, 2 Oct 2024 11:23:10 +0200 Subject: [PATCH 06/12] Apply suggestions from code review --- R/did_you_mean.R | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/R/did_you_mean.R b/R/did_you_mean.R index 97021714..d4238dd0 100644 --- a/R/did_you_mean.R +++ b/R/did_you_mean.R @@ -38,8 +38,8 @@ did_you_mean_dicts = function(key, dicts) { return("") } - suggestions = character(length(dicts)) - min_distance_per_dict = numeric(length(dicts)) + suggestions = character(0) + min_distance_per_dict = numeric(0) for (i in seq_along(dicts)) { # Get distances and the corresponding entries for the current dictionary @@ -48,21 +48,19 @@ did_you_mean_dicts = function(key, dicts) { # Handle the case of no matches: skip the dictionary if (!length(entries)) { - min_distance_per_dict[[i]] = NA next } # Record the closest distance - min_distance_per_dict[[i]] = min(distances) + min_distance_per_dict[[length(min_distance_per_dict) + 1]] = min(distances) # Create a suggestion message for the current dictionary - suggestions[[i]] = sprintf("%s: %s", names(dicts)[[i]], + suggestions[[length(suggestions) + 1]] = sprintf("%s: %s", names(dicts)[[i]], str_collapse(entries, quote = "'", sep = " / ")) } # Order the suggestions by their closest match suggestions = suggestions[order(min_distance_per_dict)] # Remove empty suggestions (i.e., dictionaries with no close matches) - valid_suggestions = suggestions[nchar(suggestions) > 0L] # Only show the 3 dictionaries with the best matches valid_suggestions = head(valid_suggestions, 3L) From fe20c7dad1ad9d59a57754e5a66635ffcccf3d73 Mon Sep 17 00:00:00 2001 From: mb706 Date: Wed, 2 Oct 2024 11:25:04 +0200 Subject: [PATCH 07/12] Update R/did_you_mean.R --- R/did_you_mean.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/did_you_mean.R b/R/did_you_mean.R index d4238dd0..c4acad1e 100644 --- a/R/did_you_mean.R +++ b/R/did_you_mean.R @@ -60,7 +60,6 @@ did_you_mean_dicts = function(key, dicts) { # Order the suggestions by their closest match suggestions = suggestions[order(min_distance_per_dict)] - # Remove empty suggestions (i.e., dictionaries with no close matches) # Only show the 3 dictionaries with the best matches valid_suggestions = head(valid_suggestions, 3L) From 6abb5ceb411728a7d45fdaf274d924c639aff026 Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Wed, 2 Oct 2024 13:26:10 +0200 Subject: [PATCH 08/12] changed formating of message generated by did_you_mean_dicts --- R/did_you_mean.R | 65 ++++++++++++++++++++++++++---------------------- 1 file changed, 35 insertions(+), 30 deletions(-) diff --git a/R/did_you_mean.R b/R/did_you_mean.R index c4acad1e..7d04053d 100644 --- a/R/did_you_mean.R +++ b/R/did_you_mean.R @@ -21,18 +21,20 @@ did_you_mean = function(str, candidates) { sprintf(" Did you mean %s?", str_collapse(suggestions, quote = "'", sep = " / ")) } -#' @title Suggest Alternatives from Given Dictionaries -#' -#' @description -#' Helps to suggest alternatives for a given key based on the keys of given dictionaries. -#' -#' @param key (`character(1)`) \cr -#' Key to look for in `dicts`. -#' @param dicts (named list)\cr -#' Named list of [dictionaries][Dictionary]. -#' @return (`character(1)`). Either a phrase suggesting one or more keys based on the dictionaries in `dicts`, -#' or an empty string if no close match is found. -did_you_mean_dicts = function(key, dicts) { +# @title Suggest Alternatives from Given Dictionaries +# +# @description +# Helps to suggest alternatives for a given key based on the keys of given dictionaries. +# +# @param key (`character(1)`) \cr +# Key to look for in `dicts`. +# @param dicts (named list)\cr +# Named list of [dictionaries][Dictionary]. +# @param max_candidates_dicts (`integer(1)`) \cr +# Maximum number of dictionaries for which suggestions are outputted. +# @return (`character(1)`). Either a phrase suggesting one or more keys based on the dictionaries in `dicts`, +# or an empty string if no close match is found. +did_you_mean_dicts = function(key, dicts, max_candidates_dicts = 3L) { # No message if no dictionaries are given if (is.null(dicts)) { return("") @@ -50,40 +52,43 @@ did_you_mean_dicts = function(key, dicts) { if (!length(entries)) { next } + # Record the closest distance min_distance_per_dict[[length(min_distance_per_dict) + 1]] = min(distances) # Create a suggestion message for the current dictionary - suggestions[[length(suggestions) + 1]] = sprintf("%s: %s", names(dicts)[[i]], - str_collapse(entries, quote = "'", sep = " / ")) + suggestions[[length(suggestions) + 1]] = sprintf( + "%s: %s", names(dicts)[[i]], str_collapse(entries, quote = "'", sep = " / ") + ) } # Order the suggestions by their closest match suggestions = suggestions[order(min_distance_per_dict)] # Only show the 3 dictionaries with the best matches - valid_suggestions = head(valid_suggestions, 3L) + suggestions = head(suggestions, max_candidates_dicts) # If no valid suggestions, return an empty string - if (!length(valid_suggestions)) { + if (!length(suggestions)) { return("") } - sprintf("\nSimilar entries in other dictionaries, %s.", str_collapse(valid_suggestions, sep = ", or ")) + # add \n + sprintf("\nSimilar entries in other dictionaries:\n %s", str_collapse(suggestions, sep = "\n ")) } -#' @title Find Suggestions -#' -#' @param str (`character(1)`)\cr -#' String. -#' @param candidates (`character()`)\cr -#' Candidate strings. -#' @param threshold (`numeric(1)`)\cr -#' Percentage value of characters when sorting `candidates` by distance -#' @param max_candidates (`integer(1)`)\cr -#' Maximum number of candidates to return. -#' @param ret_similarity (`logical(1)`)\cr -#' Return similarity values instead of names. -#' @return (`character(1)`). Either suggested candidates from `candidates` or an empty string if no close match is found. +# @title Find Suggestions +# +# @param str (`character(1)`)\cr +# String. +# @param candidates (`character()`)\cr +# Candidate strings. +# @param threshold (`numeric(1)`)\cr +# Percentage value of characters when sorting `candidates` by distance +# @param max_candidates (`integer(1)`)\cr +# Maximum number of candidates to return. +# @param ret_similarity (`logical(1)`)\cr +# Return similarity values instead of names. +# @return (`character(1)`). Either suggested candidates from `candidates` or an empty string if no close match is found. find_suggestions = function(str, candidates, threshold = 0.2, max_candidates = 3L, ret_distances = FALSE) { candidates = unique(candidates) D = set_names(adist(str, candidates, ignore.case = TRUE, partial = TRUE)[1L, ], candidates) From ae4b5e4796b5e643b719dc0dc699804e074c3afe Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Wed, 2 Oct 2024 13:26:28 +0200 Subject: [PATCH 09/12] run document --- man/dictionary_sugar_get.Rd | 9 ++++++--- man/dictionary_sugar_inc_get.Rd | 8 ++++---- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/man/dictionary_sugar_get.Rd b/man/dictionary_sugar_get.Rd index f9e77c90..4625c591 100644 --- a/man/dictionary_sugar_get.Rd +++ b/man/dictionary_sugar_get.Rd @@ -6,11 +6,11 @@ \alias{dictionary_sugar_mget} \title{A Quick Way to Initialize Objects from Dictionaries} \usage{ -dictionary_sugar_get(dict, .key, ...) +dictionary_sugar_get(dict, .key, ..., .dicts_suggest = NULL) -dictionary_sugar(dict, .key, ...) +dictionary_sugar(dict, .key, ..., .dicts_suggest = NULL) -dictionary_sugar_mget(dict, .keys, ...) +dictionary_sugar_mget(dict, .keys, ..., .dicts_suggest = NULL) } \arguments{ \item{dict}{(\link{Dictionary}).} @@ -21,6 +21,9 @@ Key of the object to construct.} \item{...}{(\code{any})\cr See description.} +\item{.dicts_suggest}{(named \code{\link{list}}) +Named list of \link[=Dictionary]{dictionaries} used to look up suggestions for \code{.key} in cases of misspelling.} + \item{.keys}{(\code{character()})\cr Keys of the objects to construct.} } diff --git a/man/dictionary_sugar_inc_get.Rd b/man/dictionary_sugar_inc_get.Rd index ca6d3887..0b318527 100644 --- a/man/dictionary_sugar_inc_get.Rd +++ b/man/dictionary_sugar_inc_get.Rd @@ -5,9 +5,9 @@ \alias{dictionary_sugar_inc_mget} \title{A Quick Way to Initialize Objects from Dictionaries with Incremented ID} \usage{ -dictionary_sugar_inc_get(dict, .key, ...) +dictionary_sugar_inc_get(dict, .key, ..., .dicts_suggest = NULL) -dictionary_sugar_inc_mget(dict, .keys, ...) +dictionary_sugar_inc_mget(dict, .keys, ..., .dicts_suggest = NULL) } \arguments{ \item{dict}{(\link{Dictionary})\cr @@ -26,10 +26,10 @@ Keys of the objects to construct - possibly with suffixes of the form \verb{_ An element from the dictionary. } \description{ -Covenience wrapper around \link{dictionary_sugar_get} and \link{dictionary_sugar_mget} to allow easier avoidance of of ID +Covenience wrapper around \link{dictionary_sugar_get} and \link{dictionary_sugar_mget} to allow easier avoidance of ID clashes which is useful when the same object is used multiple times and the ids have to be unique. Let \verb{} be the key of the object to retrieve. When passing the \verb{_} to this -function, where \verb{} is any natural numer, the object with key \verb{} is retrieved and the +function, where \verb{} is any natural number, the object with key \verb{} is retrieved and the suffix \verb{_} is appended to the id after the object is constructed. } \examples{ From aa8a0d21ddeff432bbe7ce71c26fef11a1ad2a14 Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Wed, 2 Oct 2024 13:26:55 +0200 Subject: [PATCH 10/12] test for no suggestion when no match is found --- tests/testthat/test_Dictionary.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/testthat/test_Dictionary.R b/tests/testthat/test_Dictionary.R index d8f0fe95..d600faa9 100644 --- a/tests/testthat/test_Dictionary.R +++ b/tests/testthat/test_Dictionary.R @@ -135,14 +135,14 @@ test_that("similar entries in other dictionaries", { d_lookup1 = Dictionary$new() d_lookup1$add("cde", obj) + # Makes suggestions expect_error(dictionary_sugar_get(d, "cde", .dicts_suggest = list("lookup1" = d_lookup1)), "Similar entries in other dictionaries") + # Makes no suggestsions + expect_error(dictionary_sugar_get(d, "xyz", .dicts_suggest = list("lookup1" = d_lookup1)), "(?!(Similar entries in other dictionaries))", perl = TRUE) d_lookup2 = Dictionary$new() d_lookup2$add("bcd", obj) # Dictionaries ordered by closest match per dictionary - expect_error( - dictionary_sugar_get(d, "cde", .dicts_suggest = list("lookup1" = d_lookup1, "lookup2" = d_lookup2)), - "Similar entries in other dictionaries.*lookup1.*lookup2" - ) + expect_error(dictionary_sugar_get(d, "bcd", .dicts_suggest = list("lookup1" = d_lookup1, "lookup2" = d_lookup2)), "Similar entries in other dictionaries.*lookup2.*lookup1") }) From f5d5af3a599d0007ac9beeee5bbafe8de8b8fe42 Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Wed, 2 Oct 2024 13:34:14 +0200 Subject: [PATCH 11/12] updated NEWS.md --- NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index 5dff1a06..c99205e2 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,6 +2,8 @@ * Bugfix: `crate()` is using the correct 'topenv' environment now. * Remove the unused 'safe' variants of dictionary getters +* `dictionary_sugar_get()` and corresponding functions now take a list of dictionaries as + optional argument `.dicts_suggest` to look for suggestions if `.key` is not part of the dictionary. # mlr3misc 0.15.1 From cce80dbd0aac4f3495ce30001e0fa6f759de204c Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Wed, 2 Oct 2024 13:37:49 +0200 Subject: [PATCH 12/12] docs: small changes + added .dicts_suggest for dictionary_sugar_inc_get --- R/dictionary_sugar.R | 9 +++++++-- man/dictionary_sugar_get.Rd | 3 ++- man/dictionary_sugar_inc_get.Rd | 3 +++ 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/R/dictionary_sugar.R b/R/dictionary_sugar.R index 6bef1ff4..6ddd64ae 100644 --- a/R/dictionary_sugar.R +++ b/R/dictionary_sugar.R @@ -25,15 +25,18 @@ #' @param ... (`any`)\cr #' See description. #' @param .dicts_suggest (named [`list`]) -#' Named list of [dictionaries][Dictionary] used to look up suggestions for `.key` in cases of misspelling. +#' Named list of [dictionaries][Dictionary] used to look up suggestions for `.key` if `.key` does not exist in `dict`. +#' #' @return [R6::R6Class()] -#' @export +#' #' @examples #' library(R6) #' item = R6Class("Item", public = list(x = 0)) #' d = Dictionary$new() #' d$add("key", item) #' dictionary_sugar_get(d, "key", x = 2) +#' +#' @export dictionary_sugar_get = function(dict, .key, ..., .dicts_suggest = NULL) { assert_class(dict, "Dictionary") if (missing(.key)) { @@ -148,6 +151,8 @@ fields = function(x) { #' Keys of the objects to construct - possibly with suffixes of the form `_` which will be appended to the ids. #' @param ... (any)\cr #' See description of [mlr3misc::dictionary_sugar]. +#' @param .dicts_suggest (named [`list`]) +#' Named list of [dictionaries][Dictionary] used to look up suggestions for `.key` if `.key` does not exist in `dict`. #' #' @return An element from the dictionary. #' diff --git a/man/dictionary_sugar_get.Rd b/man/dictionary_sugar_get.Rd index 4625c591..a166afa5 100644 --- a/man/dictionary_sugar_get.Rd +++ b/man/dictionary_sugar_get.Rd @@ -22,7 +22,7 @@ Key of the object to construct.} See description.} \item{.dicts_suggest}{(named \code{\link{list}}) -Named list of \link[=Dictionary]{dictionaries} used to look up suggestions for \code{.key} in cases of misspelling.} +Named list of \link[=Dictionary]{dictionaries} used to look up suggestions for \code{.key} if \code{.key} does not exist in \code{dict}.} \item{.keys}{(\code{character()})\cr Keys of the objects to construct.} @@ -56,4 +56,5 @@ item = R6Class("Item", public = list(x = 0)) d = Dictionary$new() d$add("key", item) dictionary_sugar_get(d, "key", x = 2) + } diff --git a/man/dictionary_sugar_inc_get.Rd b/man/dictionary_sugar_inc_get.Rd index 0b318527..520f49e3 100644 --- a/man/dictionary_sugar_inc_get.Rd +++ b/man/dictionary_sugar_inc_get.Rd @@ -19,6 +19,9 @@ Key of the object to construct - possibly with a suffix of the form \verb{_} \item{...}{(any)\cr See description of \link{dictionary_sugar}.} +\item{.dicts_suggest}{(named \code{\link{list}}) +Named list of \link[=Dictionary]{dictionaries} used to look up suggestions for \code{.key} if \code{.key} does not exist in \code{dict}.} + \item{.keys}{(\code{character()})\cr Keys of the objects to construct - possibly with suffixes of the form \verb{_} which will be appended to the ids.} }