Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: dictionary_sugar_get does string comparison with other given dictionaries for error message #120

Merged
merged 12 commits into from
Oct 8, 2024
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

* Bugfix: `crate()` is using the correct 'topenv' environment now.
* Remove the unused 'safe' variants of dictionary getters
* `dictionary_sugar_get()` and corresponding functions now take a list of dictionaries as
optional argument `.dicts_suggest` to look for suggestions if `.key` is not part of the dictionary.

# mlr3misc 0.15.1

Expand Down
11 changes: 6 additions & 5 deletions R/Dictionary.R
Original file line number Diff line number Diff line change
Expand Up @@ -177,16 +177,18 @@ Dictionary = R6::R6Class("Dictionary",
)
)

dictionary_get = function(self, key, ...) {
obj = dictionary_retrieve_item(self, key)
dictionary_get = function(self, key, ..., .dicts_suggest) {
obj = dictionary_retrieve_item(self, key, .dicts_suggest)
dots = assert_list(list(...), names = "unique", .var.name = "arguments passed to Dictionary")
dictionary_initialize_item(key, obj, dots)
}

dictionary_retrieve_item = function(self, key) {
dictionary_retrieve_item = function(self, key, dicts_suggest) {
obj = get0(key, envir = self$items, inherits = FALSE, ifnotfound = NULL)
if (is.null(obj)) {
stopf("Element with key '%s' not found in %s!%s", key, class(self)[1L], did_you_mean(key, self$keys()))
stopf("Element with key '%s' not found in %s!%s%s", key, class(self)[1L],
did_you_mean(key, self$keys()),
did_you_mean_dicts(key, dicts_suggest))
}
obj
}
Expand All @@ -207,7 +209,6 @@ dictionary_initialize_item = function(key, obj, cargs = list()) {
}
}


#' @export
as.data.table.Dictionary = function(x, ...) {
setkeyv(as.data.table(list(key = x$keys())), "key")[]
Expand Down
38 changes: 22 additions & 16 deletions R/dictionary_sugar.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,27 +24,33 @@
#' Keys of the objects to construct.
#' @param ... (`any`)\cr
#' See description.
#' @param .dicts_suggest (named [`list`])
#' Named list of [dictionaries][Dictionary] used to look up suggestions for `.key` if `.key` does not exist in `dict`.
#'
#' @return [R6::R6Class()]
#' @export
#'
#' @examples
#' library(R6)
#' item = R6Class("Item", public = list(x = 0))
#' d = Dictionary$new()
#' d$add("key", item)
#' dictionary_sugar_get(d, "key", x = 2)
dictionary_sugar_get = function(dict, .key, ...) {
#'
#' @export
dictionary_sugar_get = function(dict, .key, ..., .dicts_suggest = NULL) {
assert_class(dict, "Dictionary")
if (missing(.key)) {
return(dict)
}
assert_string(.key)
assert_list(.dicts_suggest, "Dictionary", any.missing = FALSE, min.len = 1, unique = TRUE, names = "named", null.ok = TRUE)
if (...length() == 0L) {
return(dictionary_get(dict, .key))
return(dictionary_get(dict, .key, .dicts_suggest = .dicts_suggest))
}
dots = assert_list(list(...), .var.name = "additional arguments passed to Dictionary")
assert_list(dots[!is.na(names2(dots))], names = "unique", .var.name = "named arguments passed to Dictionary")

obj = dictionary_retrieve_item(dict, .key)
obj = dictionary_retrieve_item(dict, .key, .dicts_suggest)
if (length(dots) == 0L) {
return(assert_r6(dictionary_initialize_item(.key, obj)))
}
Expand All @@ -55,7 +61,6 @@ dictionary_sugar_get = function(dict, .key, ...) {
instance = assert_r6(dictionary_initialize_item(.key, obj, dots[ii]))
dots = dots[!ii]


# set params in ParamSet
if (length(dots) && exists("param_set", envir = instance, inherits = FALSE)) {
param_ids = instance$param_set$ids()
Expand All @@ -74,7 +79,7 @@ dictionary_sugar_get = function(dict, .key, ...) {
for (i in seq_along(dots)) {
nn = ndots[[i]]
if (!exists(nn, envir = instance, inherits = FALSE)) {
stopf("Cannot set argument '%s' for '%s' (not a constructor argument, not a parameter, not a field.%s",
stopf("Cannot set argument '%s' for '%s' (not a constructor argument, not a parameter, not a field).%s",
nn, class(instance)[1L], did_you_mean(nn, c(constructor_args, param_ids, setdiff(names(instance), ".__enclos_env__")))) # nolint
}
instance[[nn]] = dots[[i]]
Expand All @@ -90,11 +95,11 @@ dictionary_sugar = dictionary_sugar_get

#' @rdname dictionary_sugar_get
#' @export
dictionary_sugar_mget = function(dict, .keys, ...) {
dictionary_sugar_mget = function(dict, .keys, ..., .dicts_suggest = NULL) {
if (missing(.keys)) {
return(dict)
}
objs = lapply(.keys, dictionary_sugar_get, dict = dict, ...)
objs = lapply(.keys, dictionary_sugar_get, dict = dict, .dicts_suggest = .dicts_suggest, ...)
if (!is.null(names(.keys))) {
nn = names2(.keys)
ii = which(!is.na(nn))
Expand Down Expand Up @@ -132,10 +137,10 @@ fields = function(x) {
#' @title A Quick Way to Initialize Objects from Dictionaries with Incremented ID
#'
#' @description
#' Covenience wrapper around [dictionary_sugar_get] and [dictionary_sugar_mget] to allow easier avoidance of of ID
#' Covenience wrapper around [dictionary_sugar_get] and [dictionary_sugar_mget] to allow easier avoidance of ID
#' clashes which is useful when the same object is used multiple times and the ids have to be unique.
#' Let `<key>` be the key of the object to retrieve. When passing the `<key>_<n>` to this
#' function, where `<n>` is any natural numer, the object with key `<key>` is retrieved and the
#' function, where `<n>` is any natural number, the object with key `<key>` is retrieved and the
#' suffix `_<n>` is appended to the id after the object is constructed.
#'
#' @param dict ([Dictionary])\cr
Expand All @@ -146,6 +151,8 @@ fields = function(x) {
#' Keys of the objects to construct - possibly with suffixes of the form `_<n>` which will be appended to the ids.
#' @param ... (any)\cr
#' See description of [mlr3misc::dictionary_sugar].
#' @param .dicts_suggest (named [`list`])
#' Named list of [dictionaries][Dictionary] used to look up suggestions for `.key` if `.key` does not exist in `dict`.
#'
#' @return An element from the dictionary.
#'
Expand All @@ -163,25 +170,24 @@ fields = function(x) {
#' map(objs, "id")
#'
#' @export
dictionary_sugar_inc_get = function(dict, .key, ...) {
dictionary_sugar_inc_get = function(dict, .key, ..., .dicts_suggest = NULL) {
m = regexpr("_\\d+$", .key)
if (attr(m, "match.length") == -1L) {
return(dictionary_sugar_get(dict = dict, .key = .key, ...))
return(dictionary_sugar_get(dict = dict, .key = .key, ..., .dicts_suggest = .dicts_suggest))
}
assert_true(!methods::hasArg("id"))
split = regmatches(.key, m, invert = NA)[[1L]]
newkey = split[[1L]]
suffix = split[[2L]]
obj = dictionary_sugar_get(dict = dict, .key = newkey, ...)
obj = dictionary_sugar_get(dict = dict, .key = newkey, ..., .dicts_suggest = .dicts_suggest)
obj$id = paste0(obj$id, suffix)
obj

}

#' @rdname dictionary_sugar_inc_get
#' @export
dictionary_sugar_inc_mget = function(dict, .keys, ...) {
objs = lapply(.keys, dictionary_sugar_inc_get, dict = dict, ...)
dictionary_sugar_inc_mget = function(dict, .keys, ..., .dicts_suggest = NULL) {
objs = lapply(.keys, dictionary_sugar_inc_get, dict = dict, ..., .dicts_suggest = .dicts_suggest)
if (!is.null(names(.keys))) {
nn = names2(.keys)
ii = which(!is.na(nn))
Expand Down
87 changes: 82 additions & 5 deletions R/did_you_mean.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,89 @@
#' @examples
#' did_you_mean("yep", c("yes", "no"))
did_you_mean = function(str, candidates) {
candidates = unique(candidates)
D = set_names(adist(str, candidates, ignore.case = TRUE, partial = TRUE)[1L, ], candidates)
suggested = names(head(sort(D[D <= ceiling(0.2 * nchar(str))]), 3L))
suggestions = find_suggestions(str, candidates, threshold = 0.2, max_candidates = 3L, ret_distances = FALSE)

if (!length(suggestions)) {
return("")
}
sprintf(" Did you mean %s?", str_collapse(suggestions, quote = "'", sep = " / "))
}

# @title Suggest Alternatives from Given Dictionaries
#
# @description
# Helps to suggest alternatives for a given key based on the keys of given dictionaries.
#
# @param key (`character(1)`) \cr
# Key to look for in `dicts`.
# @param dicts (named list)\cr
# Named list of [dictionaries][Dictionary].
# @param max_candidates_dicts (`integer(1)`) \cr
# Maximum number of dictionaries for which suggestions are outputted.
# @return (`character(1)`). Either a phrase suggesting one or more keys based on the dictionaries in `dicts`,
# or an empty string if no close match is found.
did_you_mean_dicts = function(key, dicts, max_candidates_dicts = 3L) {
# No message if no dictionaries are given
if (is.null(dicts)) {
return("")
}

suggestions = character(0)
min_distance_per_dict = numeric(0)

for (i in seq_along(dicts)) {
# Get distances and the corresponding entries for the current dictionary
distances = find_suggestions(key, dicts[[i]]$keys(), ret_distances = TRUE)
entries = names(distances)

if (!length(suggested)) {
# Handle the case of no matches: skip the dictionary
if (!length(entries)) {
next
}

# Record the closest distance
min_distance_per_dict[[length(min_distance_per_dict) + 1]] = min(distances)

# Create a suggestion message for the current dictionary
suggestions[[length(suggestions) + 1]] = sprintf(
"%s: %s", names(dicts)[[i]], str_collapse(entries, quote = "'", sep = " / ")
)
}

# Order the suggestions by their closest match
suggestions = suggestions[order(min_distance_per_dict)]
# Only show the 3 dictionaries with the best matches
suggestions = head(suggestions, max_candidates_dicts)

# If no valid suggestions, return an empty string
if (!length(suggestions)) {
return("")
}
sprintf(" Did you mean %s?", str_collapse(suggested, quote = "'", sep = " / "))

# add \n
sprintf("\nSimilar entries in other dictionaries:\n %s", str_collapse(suggestions, sep = "\n "))
}

# @title Find Suggestions
#
# @param str (`character(1)`)\cr
# String.
# @param candidates (`character()`)\cr
# Candidate strings.
# @param threshold (`numeric(1)`)\cr
# Percentage value of characters when sorting `candidates` by distance
# @param max_candidates (`integer(1)`)\cr
# Maximum number of candidates to return.
# @param ret_similarity (`logical(1)`)\cr
# Return similarity values instead of names.
# @return (`character(1)`). Either suggested candidates from `candidates` or an empty string if no close match is found.
find_suggestions = function(str, candidates, threshold = 0.2, max_candidates = 3L, ret_distances = FALSE) {
candidates = unique(candidates)
D = set_names(adist(str, candidates, ignore.case = TRUE, partial = TRUE)[1L, ], candidates)
sorted = head(sort(D[D <= ceiling(threshold * nchar(str))]), max_candidates)
if (ret_distances) {
sorted
} else {
names(sorted)
}
}
10 changes: 7 additions & 3 deletions man/dictionary_sugar_get.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 7 additions & 4 deletions man/dictionary_sugar_inc_get.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

21 changes: 21 additions & 0 deletions tests/testthat/test_Dictionary.R
Original file line number Diff line number Diff line change
Expand Up @@ -125,3 +125,24 @@ test_that("#115", {
d$add("a", function() A$new())
expect_error(dictionary_sugar_get(d, "a", y = 10), "Did you mean")
})

test_that("similar entries in other dictionaries", {
obj = R6Class("A", public = list(x = NULL))

d = Dictionary$new()
d$add("abc", obj)

d_lookup1 = Dictionary$new()
d_lookup1$add("cde", obj)

# Makes suggestions
expect_error(dictionary_sugar_get(d, "cde", .dicts_suggest = list("lookup1" = d_lookup1)), "Similar entries in other dictionaries")
# Makes no suggestsions
expect_error(dictionary_sugar_get(d, "xyz", .dicts_suggest = list("lookup1" = d_lookup1)), "(?!(Similar entries in other dictionaries))", perl = TRUE)

d_lookup2 = Dictionary$new()
d_lookup2$add("bcd", obj)

# Dictionaries ordered by closest match per dictionary
expect_error(dictionary_sugar_get(d, "bcd", .dicts_suggest = list("lookup1" = d_lookup1, "lookup2" = d_lookup2)), "Similar entries in other dictionaries.*lookup2.*lookup1")
})
Loading