Skip to content

Commit

Permalink
prep CRAN v0.8
Browse files Browse the repository at this point in the history
  • Loading branch information
sborms committed Jan 13, 2020
1 parent 509345e commit d80d0c8
Show file tree
Hide file tree
Showing 8 changed files with 101 additions and 104 deletions.
2 changes: 1 addition & 1 deletion R/deprecated.R
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,6 @@ measures_global <- function(...) {
#'
#' @export
sento_app <- function(...) {
stop("Use the sento_app() function now put into the sentometrics.app package.")
stop("Use the sento_app() function from the sentometrics.app package.")
}

30 changes: 0 additions & 30 deletions R/sentomeasures_measures_xyz.R
Original file line number Diff line number Diff line change
Expand Up @@ -126,36 +126,6 @@ check_agg_dimensions <- function(sento_measures, features = NULL, lexicons = NUL
return(list(stop = stop, msg1 = msg1, msg2 = msg2))
}

measures_global <- function(sento_measures, lexicons = NULL, features = NULL, time = NULL) {
check_class(sento_measures, "sento_measures")

dims <- get_dimensions(sento_measures)
n <- sapply(dims, length)
weightsInp <- list(features, lexicons, time)
weights <- sapply(1:3, function(i) {
if (is.null(weightsInp[[i]]))
w <- as.list(rep(1/n[i], n[i])) # modify weights if equal to default value of NULL
else {
w <- as.list(weightsInp[[i]])
if (length(w) != n[i])
stop("All weights must be equal in length to the respective number of components.")
}
names(w) <- dims[[i]] # named weight lists
return(w)
})

measuresLong <- data.table::as.data.table(sento_measures, format = "long")
measuresLong[, "wFeat" := unlist(weights[[1]][measuresLong[["features"]]])] # weights features
measuresLong[, "wLex" := unlist(weights[[2]][measuresLong[["lexicons"]]])] # weights lexicon
measuresLong[, "wTime" :=- unlist(weights[[3]][measuresLong[["time"]]])] # weights time
globs <- measuresLong[, list(globLex = mean(value * wLex),
globFeat = mean(value * wFeat),
globTime = mean(value * wTime)), by = date]
globs[["global"]] <- rowMeans(globs[, -1])

return(globs)
}

#' Update sentiment measures
#'
#' @author Jeroen Van Pelt, Samuel Borms, Andres Algaba
Expand Down
32 changes: 31 additions & 1 deletion R/sentomeasures_methods.R
Original file line number Diff line number Diff line change
Expand Up @@ -554,7 +554,7 @@ aggregate.sento_measures <- function(x, features = NULL, lexicons = NULL, time =
stopifnot(is.null(features) || is.numeric(features))
stopifnot(is.null(lexicons) || is.numeric(lexicons))
stopifnot(is.null(time) || is.numeric(time))
measures <- measures_global(x, lexicons, features, time)
measures <- agg_global(x, lexicons, features, time)
if (do.keep == TRUE) measures <- cbind(measures, data.table::as.data.table(x)[, -1])
return(measures)
}
Expand Down Expand Up @@ -626,3 +626,33 @@ aggregate.sento_measures <- function(x, features = NULL, lexicons = NULL, time =
return(sento_measures)
}

agg_global <- function(sento_measures, lexicons = NULL, features = NULL, time = NULL) {
check_class(sento_measures, "sento_measures")

dims <- get_dimensions(sento_measures)
n <- sapply(dims, length)
weightsInp <- list(features, lexicons, time)
weights <- sapply(1:3, function(i) {
if (is.null(weightsInp[[i]]))
w <- as.list(rep(1/n[i], n[i])) # modify weights if equal to default value of NULL
else {
w <- as.list(weightsInp[[i]])
if (length(w) != n[i])
stop("All weights must be equal in length to the respective number of components.")
}
names(w) <- dims[[i]] # named weight lists
return(w)
})

measuresLong <- data.table::as.data.table(sento_measures, format = "long")
measuresLong[, "wFeat" := unlist(weights[[1]][measuresLong[["features"]]])] # weights features
measuresLong[, "wLex" := unlist(weights[[2]][measuresLong[["lexicons"]]])] # weights lexicon
measuresLong[, "wTime" := unlist(weights[[3]][measuresLong[["time"]]])] # weights time
globs <- measuresLong[, list(globLex = mean(value * wLex),
globFeat = mean(value * wFeat),
globTime = mean(value * wTime)), by = date]
globs[["global"]] <- rowMeans(globs[, -1])

return(globs)
}

45 changes: 13 additions & 32 deletions appendix/run_timings.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,41 +5,18 @@

###### DESCRIPTION ######

### This code was used in a previous version of the vignette paper 'The R Package sentometrics
### to Compute, Aggregate and Predict with Textual Sentiment' (Ardia, Bluteau, Borms and Boudt, 2019),
### This code is used for the supplementary appendix to the vignette paper 'The R Package sentometrics
### to Compute, Aggregate and Predict with Textual Sentiment' (Ardia, Bluteau, Borms and Boudt, 2020),
### comparing various textual sentiment computation tools in R.
### Download the package and its dependencies first before you run this script...
### install.packages("sentometrics", dependencies = TRUE) # from CRAN (version 0.8), OR
### install.packages("sentometrics_0.8.tar.gz", repos = NULL, dependencies = TRUE) # from the tar

###### SESSION INFO ######

### R version 3.6.1 (2019-07-05)
### Platform: x86_64-w64-mingw32/x64 (64-bit)
### Running under: Windows 10 x64 (build 18362)
###
### Matrix products: default
###
### locale:
### [1] LC_COLLATE=English_Belgium.1252 LC_CTYPE=English_Belgium.1252 LC_MONETARY=English_Belgium.1252
### [4] LC_NUMERIC=C LC_TIME=English_Belgium.1252
###
### attached base packages:
### [1] stats graphics grDevices utils datasets methods base
###
### other attached packages:
### [1] microbenchmark_1.4-7 tidyr_1.0.0 dplyr_0.8.3 lexicon_1.2.1
### [5] SentimentAnalysis_1.3-3 syuzhet_1.0.4 meanr_0.1-2 tidytext_0.2.2
### [9] quanteda_1.5.1 data.table_1.12.6 sentometrics_0.7.6
###
### loaded via a namespace (and not attached):
### [1] Rcpp_1.0.2 pillar_1.3.1 compiler_3.6.1 tokenizers_0.2.1 iterators_1.0.12 tools_3.6.1
### [7] stopwords_0.9.0 zeallot_0.1.0 lifecycle_0.1.0 lubridate_1.7.4 tibble_2.1.3 gtable_0.3.0
### [13] lattice_0.20-38 pkgconfig_2.0.2 rlang_0.4.0 Matrix_1.2-17 foreach_1.4.7 fastmatch_1.1-0
### [19] janeaustenr_0.1.5 stringr_1.4.0 vctrs_0.2.0 generics_0.0.2 glmnet_2.0-18 grid_3.6.1
### [25] tidyselect_0.2.5 glue_1.3.0 R6_2.4.0 ggplot2_3.2.1 purrr_0.3.0 spacyr_1.0
### [31] magrittr_1.5 backports_1.1.3 SnowballC_0.6.0 scales_1.0.0 codetools_0.2-16 assertthat_0.2.0
### [37] colorspace_1.4-0 stringi_1.4.3 lazyeval_0.2.2 RcppParallel_4.4.4 munsell_0.5.0 crayon_1.3.4
# R version 3.6.1 (2019-07-05)
# TODO: update!
# [37] colorspace_1.4-0 stringi_1.4.3 lazyeval_0.2.2 RcppParallel_4.4.4 munsell_0.5.0 crayon_1.3.4

remove(list = ls())

Expand All @@ -63,10 +40,6 @@ library("dplyr")
library("tidyr")
library("microbenchmark")

info <- sessionInfo()
print(info)
cat("\n")

########################################### loading of packages, definition of lexicons

data("usnews", package = "sentometrics")
Expand Down Expand Up @@ -302,5 +275,13 @@ cat("\n")

###########################################

cat("############################## \n")
cat("###### SESSION INFO \n \n")
info <- sessionInfo()
print(info)
cat("\n")

###########################################

sink()

8 changes: 6 additions & 2 deletions cran-comments.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@

## submission (version 0.7.6) [31/10/2019]
## submission (version 0.8) [13/01/2020]

- fixed memory leak bug
**new**: minor improvements

<!--- ## submission (version 0.7.6) [31/10/2019]
- fixed memory leak bug --->

<!--- ## submission (version 0.7.5) [30/10/2019]
Expand Down
84 changes: 48 additions & 36 deletions examples/run_vignette.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
###### DESCRIPTION ######

### This code is used in the vignette paper 'The R Package sentometrics to Compute,
### Aggregate and Predict with Textual Sentiment' (Ardia, Bluteau, Borms and Boudt, 2019).
### Aggregate and Predict with Textual Sentiment' (Ardia, Bluteau, Borms and Boudt, 2020).
### See the paper for the results and setup details.
### Download the package and its dependencies first before you run this script...
### install.packages("sentometrics", dependencies = TRUE) # from CRAN (version 0.8), OR
Expand All @@ -22,37 +22,43 @@

###### SESSION INFO ######

### R version 3.6.1 (2019-07-05)
### Platform: x86_64-w64-mingw32/x64 (64-bit)
### Running under: Windows 10 x64 (build 18362)
###
### Matrix products: default
###
### locale:
### [1] LC_COLLATE=English_Belgium.1252 LC_CTYPE=English_Belgium.1252 LC_MONETARY=English_Belgium.1252
### [4] LC_NUMERIC=C LC_TIME=English_Belgium.1252
###
### attached base packages:
### [1] stats graphics grDevices utils datasets methods base
###
### other attached packages:
### [1] zoo_1.8-4 lubridate_1.7.4 stm_1.3.3 quanteda_1.5.1 lexicon_1.2.1 gridExtra_2.3
### [7] ggplot2_3.2.1 data.table_1.12.6 sentometrics_0.7.6 testthat_2.2.1
###
### loaded via a namespace (and not attached):
### [1] Rcpp_1.0.2 lattice_0.20-38 prettyunits_1.0.2 class_7.3-15 ps_1.3.0 assertthat_0.2.0
### [7] glmnet_2.0-18 rprojroot_1.3-2 digest_0.6.20 packrat_0.5.0 ipred_0.9-8 foreach_1.4.7
### [13] R6_2.4.0 plyr_1.8.4 backports_1.1.3 stats4_3.6.1 pillar_1.3.1 rlang_0.4.0
### [19] lazyeval_0.2.2 caret_6.0-84 rstudioapi_0.10 callr_3.3.1 rpart_4.1-15 Matrix_1.2-17
### [25] desc_1.2.0 devtools_2.1.0 splines_3.6.1 gower_0.1.2 stringr_1.4.0 ISOweek_0.6-2
### [31] munsell_0.5.0 spacyr_1.0 compiler_3.6.1 pkgconfig_2.0.2 pkgbuild_1.0.3 nnet_7.3-12
### [37] tidyselect_0.2.5 tibble_2.1.3 prodlim_2018.04.18 codetools_0.2-16 RcppRoll_0.3.0 crayon_1.3.4
### [43] dplyr_0.8.3 withr_2.1.2 MASS_7.3-51.4 recipes_0.1.6 ModelMetrics_1.2.2 grid_3.6.1
### [49] nlme_3.1-141 gtable_0.3.0 magrittr_1.5 scales_1.0.0 RcppParallel_4.4.4 cli_1.1.0
### [55] stringi_1.4.3 reshape2_1.4.3 fs_1.3.1 remotes_2.1.0 syuzhet_1.0.4 timeDate_3043.102
### [61] stopwords_0.9.0 generics_0.0.2 fastmatch_1.1-0 lava_1.6.5 iterators_1.0.12 tools_3.6.1
### [67] glue_1.3.0 purrr_0.3.0 processx_3.4.1 pkgload_1.0.2 survival_2.44-1.1 colorspace_1.4-0
### [73] sessioninfo_1.1.1 memoise_1.1.0 usethis_1.5.1
# R version 3.6.2 (2019-12-12)
# Platform: x86_64-w64-mingw32/x64 (64-bit)
# Running under: Windows 10 x64 (build 18362)
#
# Matrix products: default
#
# locale:
# [1] LC_COLLATE=English_Belgium.1252 LC_CTYPE=English_Belgium.1252 LC_MONETARY=English_Belgium.1252
# [4] LC_NUMERIC=C LC_TIME=English_Belgium.1252
#
# attached base packages:
# [1] stats graphics grDevices utils datasets methods base
#
# other attached packages:
# [1] microbenchmark_1.4-7 tidyr_1.0.0 dplyr_0.8.3 SentimentAnalysis_1.3-3
# [5] syuzhet_1.0.4 meanr_0.1-2 tidytext_0.2.2 zoo_1.8-7
# [9] lubridate_1.7.4 stm_1.3.5 quanteda_1.5.2 lexicon_1.2.1
# [13] gridExtra_2.3 ggplot2_3.2.1 data.table_1.12.8 sentometrics_0.8.0
# [17] testthat_2.3.1
#
# loaded via a namespace (and not attached):
# [1] nlme_3.1-143 matrixStats_0.55.0 fs_1.3.1 usethis_1.5.1 devtools_2.2.1 rprojroot_1.3-2
# [7] SnowballC_0.6.0 tools_3.6.2 backports_1.1.5 R6_2.4.1 rpart_4.1-15 lazyeval_0.2.2
# [13] colorspace_1.4-1 nnet_7.3-12 withr_2.1.2 tidyselect_0.2.5 prettyunits_1.1.0 processx_3.4.1
# [19] compiler_3.6.2 glmnet_3.0-2 cli_2.0.1 xml2_1.2.2 NLP_0.2-0 desc_1.2.0
# [25] labeling_0.3 slam_0.1-47 scales_1.1.0 tm_0.7-7 callr_3.4.0 stringr_1.4.0
# [31] digest_0.6.23 pkgconfig_2.0.3 sessioninfo_1.1.1 rlang_0.4.2 rstudioapi_0.10 shape_1.4.4
# [37] generics_0.0.2 farver_2.0.1 ModelMetrics_1.2.2 tokenizers_0.2.1 magrittr_1.5 Matrix_1.2-18
# [43] Rcpp_1.0.3 munsell_0.5.0 fansi_0.4.1 lifecycle_0.1.0 stringi_1.4.5 pROC_1.16.0
# [49] MASS_7.3-51.4 pkgbuild_1.0.6 plyr_1.8.5 recipes_0.1.9 grid_3.6.2 parallel_3.6.2
# [55] crayon_1.3.4 lattice_0.20-38 splines_3.6.2 zeallot_0.1.0 ps_1.3.0 pillar_1.4.3
# [61] ISOweek_0.6-2 reshape2_1.4.3 codetools_0.2-16 stopwords_1.0 stats4_3.6.2 pkgload_1.0.2
# [67] fastmatch_1.1-0 glue_1.3.1 packrat_0.5.0 remotes_2.1.0 RcppParallel_4.4.4 vctrs_0.2.1
# [73] foreach_1.4.7 gtable_0.3.0 purrr_0.3.3 assertthat_0.2.1 gower_0.2.1 prodlim_2019.11.13
# [79] janeaustenr_0.1.5 class_7.3-15 survival_3.1-8 timeDate_3043.102 RcppRoll_0.3.0 tibble_2.1.3
# [85] iterators_1.0.12 memoise_1.1.0 spacyr_1.2 lava_1.6.6 ellipsis_0.3.0 caret_6.0-85
# [91] ipred_0.9-9

remove(list = ls())
options(prompt = "R> ", continue = "+ ", width = 120, digits = 4, max.print = 90, useFancyQuotes = FALSE)
Expand All @@ -69,10 +75,6 @@ library("stm")
library("lubridate")
library("zoo")

info <- sessionInfo()
print(info)
cat("\n")

set.seed(505)

##################
Expand Down Expand Up @@ -373,5 +375,15 @@ a <- gridExtra::grid.arrange(fe + theme(axis.title.x = element_blank()),
le + theme(axis.title.y = element_blank()),
ncol = 1, nrow = 2)

######

cat("############################## \n")
cat("###### SESSION INFO \n \n")
info <- sessionInfo()
print(info)
cat("\n")

######

sink()

2 changes: 1 addition & 1 deletion inst/CITATION
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

year <- "2019"
year <- "2020"
link <- "https://doi.org/10.2139/ssrn.3067734"

citHeader("To cite the R package 'sentometrics' in publications use:")
Expand Down
2 changes: 1 addition & 1 deletion man/sentometrics-defunct.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit d80d0c8

Please sign in to comment.