From f20bebe7a59c7b9d495d63890fd4403270558748 Mon Sep 17 00:00:00 2001 From: mpadge Date: Tue, 24 Sep 2024 11:39:49 +0200 Subject: [PATCH] add internal 'rm_fn_data' in rerank fn to close #8 --- DESCRIPTION | 2 +- R/rerank.R | 15 +++++++-------- R/similar-pkgs.R | 10 +++++++++- codemeta.json | 2 +- 4 files changed, 18 insertions(+), 11 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 07ab2b1..be367ae 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: pkgsimil Title: Similarity Metrics between R Packages -Version: 0.1.2.099 +Version: 0.1.2.100 Authors@R: c( person("Mark", "Padgham", , "mark.padgham@email.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0003-2172-5265"))) diff --git a/R/rerank.R b/R/rerank.R index 2de309b..4cfde37 100644 --- a/R/rerank.R +++ b/R/rerank.R @@ -1,7 +1,9 @@ #' Re-randk an input `data.frame` of packages with several columns of scores. #' +#' @param rm_fn_data If `TRUE` (default), only generate combined ranks from +#' data excluding function descriptions. #' @noRd -pkgsimil_rerank <- function (s) { +pkgsimil_rerank <- function (s, rm_fn_data = TRUE) { cols <- names (s) [-which (names (s) == "package")] new_cols <- paste0 (cols, "_rank") @@ -21,13 +23,10 @@ pkgsimil_rerank <- function (s) { rank_matrix <- as.matrix (s [, new_cols]) rank_matrix <- 1 / (k + rank_matrix) - # Weight rankings without function definitions higher than those with. - # Relative weighting are `wt_factor ^ 2`. - wt_factor <- 0.5 - cols_with <- grep ("with", colnames (rank_matrix)) - cols_wo <- grep ("wo", colnames (rank_matrix)) - rank_matrix [, cols_with] <- rank_matrix [, cols_with] * wt_factor - rank_matrix [, cols_wo] <- rank_matrix [, cols_wo] / wt_factor + if (rm_fn_data) { + cols_wo <- grep ("wo", colnames (rank_matrix)) + rank_matrix <- rank_matrix [, cols_wo] + } rank_scores <- rowSums (rank_matrix) diff --git a/R/similar-pkgs.R b/R/similar-pkgs.R index 6175a4f..2317f5a 100644 --- a/R/similar-pkgs.R +++ b/R/similar-pkgs.R @@ -167,6 +167,14 @@ similar_pkgs_from_text <- function (input, similarities [is.na (similarities)] <- 0 index <- seq_len (n) + rm_fn_data <- !input_mentions_functions (input) - return (pkgsimil_rerank (similarities) [index]) + return (pkgsimil_rerank (similarities, rm_fn_data) [index]) +} + +input_mentions_functions <- function (input) { + + stopifnot (length (input) == 1L) + + grepl ("\\sfunction\\s", input, ignore.case = TRUE) } diff --git a/codemeta.json b/codemeta.json index 62200e2..70a6a1e 100644 --- a/codemeta.json +++ b/codemeta.json @@ -8,7 +8,7 @@ "codeRepository": "https://github.com/ropensci-review-tools/pkgsimil", "issueTracker": "https://github.com/ropensci-review-tools/pkgsimil/issues", "license": "https://spdx.org/licenses/MIT", - "version": "0.1.2.099", + "version": "0.1.2.100", "programmingLanguage": { "@type": "ComputerLanguage", "name": "R",