diff --git a/R/deprecated.R b/R/deprecated.R index a68d271..06fb6aa 100644 --- a/R/deprecated.R +++ b/R/deprecated.R @@ -249,6 +249,6 @@ measures_global <- function(...) { #' #' @export sento_app <- function(...) { - stop("Use the sento_app() function now put into the sentometrics.app package.") + stop("Use the sento_app() function from the sentometrics.app package.") } diff --git a/R/sentomeasures_measures_xyz.R b/R/sentomeasures_measures_xyz.R index 389f2ca..0578fd8 100644 --- a/R/sentomeasures_measures_xyz.R +++ b/R/sentomeasures_measures_xyz.R @@ -126,36 +126,6 @@ check_agg_dimensions <- function(sento_measures, features = NULL, lexicons = NUL return(list(stop = stop, msg1 = msg1, msg2 = msg2)) } -measures_global <- function(sento_measures, lexicons = NULL, features = NULL, time = NULL) { - check_class(sento_measures, "sento_measures") - - dims <- get_dimensions(sento_measures) - n <- sapply(dims, length) - weightsInp <- list(features, lexicons, time) - weights <- sapply(1:3, function(i) { - if (is.null(weightsInp[[i]])) - w <- as.list(rep(1/n[i], n[i])) # modify weights if equal to default value of NULL - else { - w <- as.list(weightsInp[[i]]) - if (length(w) != n[i]) - stop("All weights must be equal in length to the respective number of components.") - } - names(w) <- dims[[i]] # named weight lists - return(w) - }) - - measuresLong <- data.table::as.data.table(sento_measures, format = "long") - measuresLong[, "wFeat" := unlist(weights[[1]][measuresLong[["features"]]])] # weights features - measuresLong[, "wLex" := unlist(weights[[2]][measuresLong[["lexicons"]]])] # weights lexicon - measuresLong[, "wTime" :=- unlist(weights[[3]][measuresLong[["time"]]])] # weights time - globs <- measuresLong[, list(globLex = mean(value * wLex), - globFeat = mean(value * wFeat), - globTime = mean(value * wTime)), by = date] - globs[["global"]] <- rowMeans(globs[, -1]) - - return(globs) -} - #' Update sentiment measures #' #' @author Jeroen Van Pelt, Samuel Borms, Andres Algaba diff --git a/R/sentomeasures_methods.R b/R/sentomeasures_methods.R index ad9675c..fd83a05 100644 --- a/R/sentomeasures_methods.R +++ b/R/sentomeasures_methods.R @@ -554,7 +554,7 @@ aggregate.sento_measures <- function(x, features = NULL, lexicons = NULL, time = stopifnot(is.null(features) || is.numeric(features)) stopifnot(is.null(lexicons) || is.numeric(lexicons)) stopifnot(is.null(time) || is.numeric(time)) - measures <- measures_global(x, lexicons, features, time) + measures <- agg_global(x, lexicons, features, time) if (do.keep == TRUE) measures <- cbind(measures, data.table::as.data.table(x)[, -1]) return(measures) } @@ -626,3 +626,33 @@ aggregate.sento_measures <- function(x, features = NULL, lexicons = NULL, time = return(sento_measures) } +agg_global <- function(sento_measures, lexicons = NULL, features = NULL, time = NULL) { + check_class(sento_measures, "sento_measures") + + dims <- get_dimensions(sento_measures) + n <- sapply(dims, length) + weightsInp <- list(features, lexicons, time) + weights <- sapply(1:3, function(i) { + if (is.null(weightsInp[[i]])) + w <- as.list(rep(1/n[i], n[i])) # modify weights if equal to default value of NULL + else { + w <- as.list(weightsInp[[i]]) + if (length(w) != n[i]) + stop("All weights must be equal in length to the respective number of components.") + } + names(w) <- dims[[i]] # named weight lists + return(w) + }) + + measuresLong <- data.table::as.data.table(sento_measures, format = "long") + measuresLong[, "wFeat" := unlist(weights[[1]][measuresLong[["features"]]])] # weights features + measuresLong[, "wLex" := unlist(weights[[2]][measuresLong[["lexicons"]]])] # weights lexicon + measuresLong[, "wTime" := unlist(weights[[3]][measuresLong[["time"]]])] # weights time + globs <- measuresLong[, list(globLex = mean(value * wLex), + globFeat = mean(value * wFeat), + globTime = mean(value * wTime)), by = date] + globs[["global"]] <- rowMeans(globs[, -1]) + + return(globs) +} + diff --git a/appendix/run_timings.R b/appendix/run_timings.R index aabf2bb..17730a2 100644 --- a/appendix/run_timings.R +++ b/appendix/run_timings.R @@ -5,8 +5,8 @@ ###### DESCRIPTION ###### -### This code was used in a previous version of the vignette paper 'The R Package sentometrics -### to Compute, Aggregate and Predict with Textual Sentiment' (Ardia, Bluteau, Borms and Boudt, 2019), +### This code is used for the supplementary appendix to the vignette paper 'The R Package sentometrics +### to Compute, Aggregate and Predict with Textual Sentiment' (Ardia, Bluteau, Borms and Boudt, 2020), ### comparing various textual sentiment computation tools in R. ### Download the package and its dependencies first before you run this script... ### install.packages("sentometrics", dependencies = TRUE) # from CRAN (version 0.8), OR @@ -14,32 +14,9 @@ ###### SESSION INFO ###### -### R version 3.6.1 (2019-07-05) -### Platform: x86_64-w64-mingw32/x64 (64-bit) -### Running under: Windows 10 x64 (build 18362) -### -### Matrix products: default -### -### locale: -### [1] LC_COLLATE=English_Belgium.1252 LC_CTYPE=English_Belgium.1252 LC_MONETARY=English_Belgium.1252 -### [4] LC_NUMERIC=C LC_TIME=English_Belgium.1252 -### -### attached base packages: -### [1] stats graphics grDevices utils datasets methods base -### -### other attached packages: -### [1] microbenchmark_1.4-7 tidyr_1.0.0 dplyr_0.8.3 lexicon_1.2.1 -### [5] SentimentAnalysis_1.3-3 syuzhet_1.0.4 meanr_0.1-2 tidytext_0.2.2 -### [9] quanteda_1.5.1 data.table_1.12.6 sentometrics_0.7.6 -### -### loaded via a namespace (and not attached): -### [1] Rcpp_1.0.2 pillar_1.3.1 compiler_3.6.1 tokenizers_0.2.1 iterators_1.0.12 tools_3.6.1 -### [7] stopwords_0.9.0 zeallot_0.1.0 lifecycle_0.1.0 lubridate_1.7.4 tibble_2.1.3 gtable_0.3.0 -### [13] lattice_0.20-38 pkgconfig_2.0.2 rlang_0.4.0 Matrix_1.2-17 foreach_1.4.7 fastmatch_1.1-0 -### [19] janeaustenr_0.1.5 stringr_1.4.0 vctrs_0.2.0 generics_0.0.2 glmnet_2.0-18 grid_3.6.1 -### [25] tidyselect_0.2.5 glue_1.3.0 R6_2.4.0 ggplot2_3.2.1 purrr_0.3.0 spacyr_1.0 -### [31] magrittr_1.5 backports_1.1.3 SnowballC_0.6.0 scales_1.0.0 codetools_0.2-16 assertthat_0.2.0 -### [37] colorspace_1.4-0 stringi_1.4.3 lazyeval_0.2.2 RcppParallel_4.4.4 munsell_0.5.0 crayon_1.3.4 +# R version 3.6.1 (2019-07-05) +# TODO: update! +# [37] colorspace_1.4-0 stringi_1.4.3 lazyeval_0.2.2 RcppParallel_4.4.4 munsell_0.5.0 crayon_1.3.4 remove(list = ls()) @@ -63,10 +40,6 @@ library("dplyr") library("tidyr") library("microbenchmark") -info <- sessionInfo() -print(info) -cat("\n") - ########################################### loading of packages, definition of lexicons data("usnews", package = "sentometrics") @@ -302,5 +275,13 @@ cat("\n") ########################################### +cat("############################## \n") +cat("###### SESSION INFO \n \n") +info <- sessionInfo() +print(info) +cat("\n") + +########################################### + sink() diff --git a/cran-comments.md b/cran-comments.md index 800fcf2..a2df4d6 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,7 +1,11 @@ -## submission (version 0.7.6) [31/10/2019] +## submission (version 0.8) [13/01/2020] -- fixed memory leak bug +**new**: minor improvements + +