diff --git a/.Rbuildignore b/.Rbuildignore index 8093041..c39493f 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -13,3 +13,9 @@ ^_pkgdown\.yml$ ^docs$ ^pkgdown$ +^\.github$ +^README\.Rmd$ +^tauargus_files/*$ +tauargus_files/* +^_pkgdown_old\.yml$ +^README\.html$ diff --git a/.gitignore b/.gitignore index 76d2164..55cba71 100644 --- a/.gitignore +++ b/.gitignore @@ -14,3 +14,5 @@ vignettes/tauargus_exe.ini .hst docs + +output/ \ No newline at end of file diff --git a/DESCRIPTION b/DESCRIPTION index a2a9308..54de9dc 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -2,15 +2,18 @@ Package: rtauargus Type: Package Title: Using Tau-Argus from R Language: fr -Version: 1.1.2 +Version: 1.2.0 Depends: R (>= 3.5.0) Imports: purrr (>= 0.2), dplyr (>= 0.7), + data.table, gdata, stringr, rlang, - zoo + zoo, + sdcHierarchies, + lifecycle Suggests: testthat, knitr, @@ -39,6 +42,14 @@ Authors@R: c( "Félix", "Beroud", role = c("aut") ), + person( + "André-Raymond", "Socard", + role = c("aut") + ), + person( + "Wistan", "Pomel", + role = c("aut") + ), person( family = "Institut National de la Statistique et des Études Économiques", role = "cph" @@ -48,9 +59,12 @@ Description: Protects tables by calling the Tau-Argus software from R. License: MIT + file LICENSE Encoding: UTF-8 LazyData: true -RoxygenNote: 7.1.2 +RoxygenNote: 7.2.3 VignetteBuilder: knitr URL: https://inseefrlab.github.io/rtauargus, https://github.com/inseefrlab/rtauargus, https://inseefrlab.github.io/rtauargus/ BugReports: https://github.com/inseefrlab/rtauargus/issues +Roxygen: list(markdown = TRUE) +StagedInstall: no + diff --git a/NAMESPACE b/NAMESPACE index 4c75f98..d7c5de2 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,25 +1,48 @@ # Generated by roxygen2: do not edit by hand +export(from_4_to_3) +export(from_4_to_3_case_0_hr) +export(from_4_to_3_case_1_hr) +export(from_4_to_3_case_2_hr) +export(from_5_to_3) export(import) +export(length_tabs) export(micro_arb) export(micro_asc_rda) +export(micro_rtauargus) +export(nb_tab_generated) +export(reduce_dims) export(reset_rtauargus_options) -export(rtauargus) +export(restore_format) export(rtauargus_options) export(rtauargus_plus) export(run_arb) +export(sp_format) export(tab_arb) export(tab_multi_manager) export(tab_rda) export(tab_rtauargus) export(tab_rtauargus2) +export(tab_rtauargus4) +export(tabulate_micro_data) +export(var_to_merge) export(write_hrc) export(write_hrc2) +import(data.table, except = transpose) importFrom(dplyr,"%>%") importFrom(dplyr,arrange) +importFrom(dplyr,filter) importFrom(dplyr,mutate) +importFrom(dplyr,select) +importFrom(lifecycle,badge) +importFrom(lifecycle,deprecated) importFrom(purrr,map) importFrom(purrr,map_at) importFrom(purrr,transpose) importFrom(rlang,.data) +importFrom(sdcHierarchies,hier_convert) +importFrom(sdcHierarchies,hier_import) +importFrom(stats,setNames) +importFrom(stringr,str_detect) +importFrom(utils,combn) importFrom(zoo,na.locf) diff --git a/NEWS.md b/NEWS.md index 710a0f4..3de2fea 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,6 +4,29 @@ subtitle: History of changes / Historique des modifications output: rmarkdown::html_vignette --- + +## rtauargus 1.2.0 + +[01/2024] + +* Implementation of a method to tackle some tables of 4/5 dimensions. + +> The method is quickly explained and its use is shown in a specific vignette (french). +> A paper explaining more deeply the idea and the modus operandi is available +here: "https://github.com/InseeFrLab/dims_reduction_tables_workshop_20231215". + +* Implementation of the function `tabulate_micro_data()` to compute tabular data from +a microdata file. + +> The function can create frequency and magnitude tabular data with hierarchical variables. +The tabular data computed contains the information to compute primary secret +according to frequency rule and (1,k)-dominance rule. + +* Resolution of a malfunction while dealing with costs. + +* **rtauargus()** function has been renamed more properly as **micro_rtauargus()**. +Its arguments and its behaviour remain the same. + ## rtauargus 1.1.2 [01/02/2023] diff --git a/R/data.R b/R/data.R index 5103b45..1bea196 100644 --- a/R/data.R +++ b/R/data.R @@ -8,7 +8,7 @@ #' \item{ACTIVITY}{business sector, hierarchical variables with three levels described #' in the activity_corr_table dataset. The root is noted "Total"} #' \item{SIZE}{size of the companies (Number of employees in three categories -#' + overall category "Total")} +#' and overall category "Total")} #' \item{N_OBS}{Frequency, number of companies} #' \item{TOT}{turnover value in euros} #' \item{MAX}{turnover of the company which contributes the most to the cell.} @@ -44,7 +44,7 @@ #' Hierarchical variables with two levels (nuts2 and nuts3) described #' in the nuts23_fr_corr_table dataset. The root is noted "Total"} #' \item{SIZE}{size of the companies (Number of employees in three categories -#' + overall category "Total")} +#' and overall category "Total")} #' \item{N_OBS}{Frequency, number of companies} #' \item{TOT}{turnover value in euros} #' \item{MAX}{turnover of the company which contributes the most to the cell.} @@ -81,7 +81,7 @@ #' \item{A21}{business sectors in 21 categories} #' \item{A88}{business sectors in 88 categories} #' } -#' @details Use the \code{write_hrc2} function to create a .hrc file from this +#' @details Use the `write_hrc2` function to create a .hrc file from this #' correspondence table. "activity_corr_table" @@ -95,7 +95,7 @@ #' \item{NUTS2}{NUTS2 levels in France - equivalent of French "Régions"} #' \item{NUTS3}{NUTS3 levels in France - equivalent of French "Départements"} #' } -#' @details Use the \code{write_hrc2} function to create a .hrc file from this +#' @details Use the `write_hrc2` function to create a .hrc file from this #' correspondence table. "nuts23_fr_corr_table" @@ -116,7 +116,7 @@ #' areas and their corresponding NUTS3 areas are in the data. #' The root is noted "Total_EAST"} #' \item{SIZE}{size of the companies (Number of employees in three categories -#' + overall category "Total")} +#' and overall category "Total")} #' \item{N_OBS}{Frequency, number of companies} #' \item{TOT}{turnover value in euros} #' \item{MAX}{turnover of the company which contributes the most to the cell.} @@ -125,3 +125,64 @@ #' activity_corr_table #' nuts23_fr_corr_table "turnover_act_nuts_size" + + +#' data crossing 4 categorical variables, none are hierarchical. +#' +#' @format A tibble/data frame with 689 rows and 12 variables: +#' \describe{ +#' \item{A10}{business sector, not hierarchical} +#' \item{cj}{legal category, not hierarchical} +#' \item{type_distrib}{type of distribution, not hierarchical} +#' \item{treff}{Number of employees (categorical), not hierarchical} +#' \item{nb_obs}{Frequency, number of companies} +#' \item{nb_obs_rnd}{Frequency rounded, number of companies} +#' \item{pizzas_tot}{turnover value in euros} +#' \item{pizzas_tot_abs}{turnover absolute value in euros} +#' \item{pizzas_max}{turnover max value in euros} +#' \item{is_secret_freq}{Boolean, TRUE if primary secret for frequency rule} +#' \item{is_secret_dom}{Boolean, TRUE if primary secret for dominance rule} +#' \item{is_secret_prim}{Boolean, TRUE if primary secret for any rule} +#' +#' } +"datatest1" + +#' data crossing 5 categorical variables, none are hierarchical. +#' +#' @format A tibble/data frame with 5 612 rows and 15 variables: +#' \describe{ +#' \item{A10}{business sector, not hierarchical} +#' \item{cj}{legal category, not hierarchical} +#' \item{type_distrib}{type of distribution, not hierarchical} +#' \item{treff}{Number of employees (categorical), not hierarchical} +#' \item{nuts1}{NUTS region, no hierarchical} +#' \item{nb_obs}{Frequency, number of companies} +#' \item{nb_obs_rnd}{Frequency rounded, number of companies} +#' \item{pizzas_tot}{turnover value in euros} +#' \item{pizzas_tot_abs}{turnover absolute value in euros} +#' \item{pizzas_max}{turnover max value in euros} +#' \item{is_secret_freq}{Boolean, TRUE if primary secret for frequency rule} +#' \item{is_secret_dom}{Boolean, TRUE if primary secret for dominance rule} +#' \item{is_secret_prim}{Boolean, TRUE if primary secret for any rule} +#' +#' } +"datatest2" + +#' Companies data at individual level. +#' +#' @format A data.table with 9 786 rows and 12 variables: +#' \describe{ +#' \item{A10}{business sector, not hierarchical} +#' \item{A21}{business sector, not hierarchical but nested in A10} +#' \item{A88}{business sector, not hierarchical but nested in A21} +#' \item{CJ}{legal category, not hierarchical} +#' \item{TYPE}{type of distribution, not hierarchical} +#' \item{SIZE}{Number of employees (categorical), not hierarchical} +#' \item{NUTS1}{NUTS 1 level of European administrative regions, not hierarchical} +#' \item{NUTS2}{NUTS 2 level of European administrative regions, not hierarchical} +#' \item{NUTS3}{NUTS 3 level of European administrative regions, not hierarchical} +#' \item{WEIGHT}{Weight of the companies, numeric} +#' \item{TURNOVER}{Turnover, numeric} +#' \item{PRODUCTION}{Production, numeric} +#' } +"indiv_dt" diff --git a/R/hrc.R b/R/hrc.R index 790bf4a..7af43c5 100644 --- a/R/hrc.R +++ b/R/hrc.R @@ -8,8 +8,8 @@ #' microdonnées. #' #' The function reconstructs the variable hierarchy from the levels -#' present in the data. The variables in \code{vars_hrc} must be -#' \strong{classified from the finest to the most aggregated}. +#' present in the data. The variables in `vars_hrc` must be +#' **classified from the finest to the most aggregated**. #' #' The relationship between each hierarchical level must be an application (in the #' mathematical sense of the term), i.e. each fine level must have a @@ -22,17 +22,17 @@ #' #' Missing values in the hierarchical variables will be #' imputed beforehand using another hierarchical variable (parameter -#' \code{fill_na}). In ascending strategy (\code{"up"}), the variables are +#' `fill_na`). In ascending strategy (`"up"`), the variables are #' from the most aggregated to the most refined, and vice versa in the -#' downward strategy (\code{"down"}). +#' downward strategy (`"down"`). #' -#' The parameter \code{compact} allows to create hierarchies with variable +#' The parameter `compact` allows to create hierarchies with variable #' depths. The idea is to cut the branches consisting of a single value #' repeated up to the maximum depth (see examples).\cr #' #' La fonction reconstitue la hiérarchie des variables à partir des niveaux -#' présents dans les données. Les variables dans \code{vars_hrc} doivent être -#' \strong{classées de la plus fine à la plus agrégée}. +#' présents dans les données. Les variables dans `vars_hrc` doivent être +#' **classées de la plus fine à la plus agrégée**. #' #' La relation entre chaque niveau hiérarchique doit être une application (au #' sens mathématique du terme), c'est-à-dire que chaque niveau fin doit avoir un @@ -45,47 +45,47 @@ #' #' Les valeurs manquantes présentes dans les variables hiérarchiques seront #' préalablement imputées à l'aide d'une autre variable hiérarchique (paramètre -#' \code{fill_na}). En stratégie ascendante (\code{"up"}), les variables sont +#' `fill_na`). En stratégie ascendante (`"up"`), les variables sont #' parcourues de la plus agrégée à la plus fine, et inversement en stratégie -#' descendante (\code{"down"}). +#' descendante (`"down"`). #' -#' Le paramètre \code{compact} permet de créer des hiérarchies à profondeurs +#' Le paramètre `compact` permet de créer des hiérarchies à profondeurs #' variables. L'idée est de couper les branches constituées d'une seule valeur #' répétée jusqu'à la profondeur maximale (voir exemples). #' #' @inheritParams micro_asc_rda -#' @param vars_hrc \strong{[mandatory]} vector of variable names +#' @param vars_hrc vector of variable names #' constituting the hierarchy, from the finest to the most aggregated level.\cr -#' (\strong{[obligatoire]} vecteur des noms des variables +#' (vecteur des noms des variables #' constituant la hiérarchie, du niveau le plus fin au niveau le plus agrégé.) #' @param hrc_filename name and location of the produced hrc file. If not #' filled, a temporary file.\cr #' (nom et emplacement du fichier hrc produit. Si non renseigné, un fichier temporaire.) #' @param fill_na fill in any missing values, using an other variable : #' \itemize{ -#' \item{\code{"up"} (default) : hierarchical variable of the level level +#' \item{`"up"` (default) : hierarchical variable of the level level #' immediately above} -#' \item{\code{"down"} : hierarchical variable of the level immediately +#' \item{`"down"` : hierarchical variable of the level immediately #' lower} #' }\cr #' (remplissage d'éventuelles valeurs manquantes, à l'aide d'une #' autre variable :\itemize{ -#' \item{\code{"up"} (défaut) : variable hiérarchique de niveau +#' \item{`"up"` (défaut) : variable hiérarchique de niveau #' immédiatement supérieur} -#' \item{\code{"down"} : variable hiérarchique de niveau immédiatement +#' \item{`"down"` : variable hiérarchique de niveau immédiatement #' inférieur} #' }) #' @param compact to prune branches repeating a single value to the -#' lowest level of depth (\code{TRUE} by default).\cr +#' lowest level of depth (`TRUE` by default).\cr #' (pour élaguer les branches répétant une unique valeur jusqu'au -#' plus bas niveau de profondeur (\code{TRUE} par défaut).) -#' @param hierlevels if only one variable is specified in \code{vars_hrc}, +#' plus bas niveau de profondeur (`TRUE` par défaut).) +#' @param hierlevels if only one variable is specified in `vars_hrc`, #' allows to generate the hierarchy according to the position of the characters in the -#' string. For example, \code{hierlevels = "2 3"} to build a +#' string. For example, `hierlevels = "2 3"` to build a #' hierarchy from a common code.\cr -#' (si une seule variable est spécifiée dans \code{vars_hrc}, +#' (si une seule variable est spécifiée dans `vars_hrc`, #' permet de générer la hiérarchie selon la position des caractères dans la -#' chaîne. Par exemple, \code{hierlevels = "2 3"} pour construire une +#' chaîne. Par exemple, `hierlevels = "2 3"` pour construire une #' hiérarchie département-commune à partir d'un code commune.) #' #' @return The name of the hrc file (useful in the case of a temporary file with @@ -446,7 +446,8 @@ df_hierlevels <- function(var_hrc, hierlevels) { } lev <- strsplit(hierlevels, " +")[[1]] - lev <- as.integer(lev) %>% `[`(. != 0) + lev <- as.integer(lev) + lev <- lev[lev != 0] if (sum(lev) != n1) { stop("la somme de hierlevels doit etre egale au nombre de caracteres") } diff --git a/R/import.R b/R/import.R index 45ca94c..968e0a5 100644 --- a/R/import.R +++ b/R/import.R @@ -125,7 +125,7 @@ meta_import <- function(data, #' Requires that the batch has been executed and finished without error. In order #' to import immediately after the batch has been executed, this function will be #' most often called via \code{link{run_arb}} (by setting -#' \code{import = TRUE}). +#' `import = TRUE`). #' #' It is only possible (for the moment) to import results of type "2" #' (csv for pivot-table) and "4" (sbs). If it is not possible to import for @@ -134,8 +134,8 @@ meta_import <- function(data, #' #' (Nécessite que le batch ait été exécuté et se soit terminé sans erreur. Afin #' d'importer immédiatement après exécution du batch, cette fonction sera ainsi -#' le plus souvent appelée via \code{\link{run_arb}} (en paramétrant -#' \code{import = TRUE}). +#' le plus souvent appelée via [run_arb()] (en paramétrant +#' `import = TRUE`). #' #' Il n'est possible (pour l'instant) que d'importer les résultats de type "2" #' (csv for pivot-table) et "4" (sbs). En cas d'impossibilité de l'import pour @@ -149,10 +149,10 @@ meta_import <- function(data, #' #' @return A list of one or more data.frames. Each data.frame corresponds to #' to the result of a tabulation. The names of the tables filled in the -#' lines of the batch of the form \code{// "..."} are recovered. \cr +#' lines of the batch of the form `// "..."` are recovered. \cr #' (Une liste d'un ou plusieurs data.frames. Chaque data.frame correspond #' au résultat d'une tabulation. Les noms des tableaux renseignés dans les -#' lignes du batch de la forme \code{// "..."} sont récupérés.) +#' lignes du batch de la forme `// "..."` sont récupérés.) #' #' @section Attributes: #' @@ -160,21 +160,21 @@ meta_import <- function(data, #' allowing to keep a trace of the specifications passed to Tau-Argus. #' #' Attributes systematically present : -#' \code{explanatory_vars}, \code{response_var}, \code{safetyrule}, -#' \code{suppress}, \code{linked}, \code{output_type}. +#' `explanatory_vars`, `response_var`, `safetyrule`, +#' `suppress`, `linked`, `output_type`. #' #' Attributes present only if the corresponding option has been filled in by -#' the user: \code{shadow_var}, \code{cost_var}, \code{output_options}. \cr +#' the user: `shadow_var`, `cost_var`, `output_options`. \cr #' #' (À chaque data.frame est associé un ensemble d'attributs (métadonnées) #' permettant de conserver une trace des spécifications passées à Tau-Argus. #' #' Attributs systématiquement présents : -#' \code{explanatory_vars}, \code{response_var}, \code{safetyrule}, -#' \code{suppress}, \code{linked}, \code{output_type}. +#' `explanatory_vars`, `response_var`, `safetyrule`, +#' `suppress`, `linked`, `output_type`. #' #' Attributs présents uniquement si l'option correspondante a été renseignée par -#' l'utilisateur : \code{shadow_var}, \code{cost_var}, \code{output_options}.) +#' l'utilisateur : `shadow_var`, `cost_var`, `output_options`.) #' #' @inheritSection micro_asc_rda See also #' diff --git a/R/micro_arb.R b/R/micro_arb.R index 0360764..49cd32e 100644 --- a/R/micro_arb.R +++ b/R/micro_arb.R @@ -141,6 +141,17 @@ apriori_batch <- function(ntab, hst_names, sep = ',', ignore_err = 0 , exp_triv #' Creates a batch file (.arb) for microdata #' +#' @description +#' `r lifecycle::badge("superseded")` +#' +#' Development on `micro_arb()` is complete, and for new code we recommend +#' switching to the tabular-wise protection provided by `tab_rtauargus()` +#' or `tab_multi_manager()`, which offer a lot more features for your +#' protection problems. +#' +#' See more details in `vignette("rtauargus")` or in +#' `vignette("protect_multi_tables)`. +#' #' Creates a batch file for microdata, executable by Tau-Argus in #' command line. \cr #' (Crée un fichier batch pour microdonnées, exécutable par Tau-Argus en ligne @@ -159,11 +170,11 @@ apriori_batch <- function(ntab, hst_names, sep = ',', ignore_err = 0 , exp_triv #' Unless otherwise stated, use the syntax mentioned in the documentation #' of Tau-Argus. #' -#' Special syntax for \code{suppress} : the first parameter in the +#' Special syntax for `suppress` : the first parameter in the #' Tau-Argus syntax is the tab number. If the method is identical #' for all tabs, this first parameter will be ignored and the numbers #' are automatically recalculated for the batch. In the writing -#' \code{suppress = "GH(n,100)"}, n will thus be transformed into 1 for the +#' `suppress = "GH(n,100)"`, n will thus be transformed into 1 for the #' first tab, into 2 for the second tab, etc. #' #' (Tau-Argus peut traiter plusieurs tabulations pour un même jeu de @@ -175,24 +186,24 @@ apriori_batch <- function(ntab, hst_names, sep = ',', ignore_err = 0 , exp_triv #' Sauf mention contraire, utiliser la syntaxe mentionnée dans la documentation #' de Tau-Argus. #' -#' Syntaxe spéciale pour \code{suppress} : le premier paramètre dans la +#' Syntaxe spéciale pour `suppress` : le premier paramètre dans la #' syntaxe Tau-Argus est le numéro de la tabulation. Si la méthode est identique #' pour toutes les tabulations, ce premier paramètre sera ignoré et les numéros #' recalculés automatiquement pour le batch. Dans l'écriture -#' \code{suppress = "GH(n,100)"}, n sera ainsi transformé en 1 pour la première +#' `suppress = "GH(n,100)"`, n sera ainsi transformé en 1 pour la première #' tabulation, en 2 pour la deuxième tabulation, etc.) #' #' @section Table identifiers: -#' If the list \code{explanatory_vars} has names, these will be +#' If the list `explanatory_vars` has names, these will be #' used in the batch to give an identifier to the table, in the form of -#' of a comment line (\code{// "..."}). They will be -#' reused by the \code{import} function to name the R format arrays +#' of a comment line (`// "..."`). They will be +#' reused by the `import` function to name the R format arrays #' tables in output. #' -#' (Si la liste \code{explanatory_vars} comporte des noms, ceux-ci seront +#' (Si la liste `explanatory_vars` comporte des noms, ceux-ci seront #' utilisés dans le batch pour donner un identifiant au tableau, sous la forme -#' d'une ligne de commentaire (\code{// "..."}). Ils seront -#' réutilisés par la fonction \code{import} pour nommer les tableaux formats R +#' d'une ligne de commentaire (`// "..."`). Ils seront +#' réutilisés par la fonction `import` pour nommer les tableaux formats R #' en sortie.) #' #' @section Use an apriori file: @@ -205,8 +216,8 @@ apriori_batch <- function(ntab, hst_names, sep = ',', ignore_err = 0 , exp_triv #' #' The additional options are optional. To change the default values, #' pass a list with the hst file(s) as the first item and -#' complete with the elements having the names \code{sep} for the separator, -#' \code{ignore_err} for IgnoreError and \code{exp_triv} for ExpandTrivial. +#' complete with the elements having the names `sep` for the separator, +#' `ignore_err` for IgnoreError and `exp_triv` for ExpandTrivial. #' As for filenames, specify only one value per parameter or #' as many values as there are tabs. @@ -219,8 +230,8 @@ apriori_batch <- function(ntab, hst_names, sep = ',', ignore_err = 0 , exp_triv #' #' Les options supplémentaires sont facultatives. Pour modifier les valeurs par #' défaut, passer une liste ayant comme premier élément le(s) fichier(s) hst et -#' compléter avec les éléments portant les noms \code{sep} pour le séparateur, -#' \code{ignore_err} pour IgnoreError et \code{exp_triv} pour ExpandTrivial. +#' compléter avec les éléments portant les noms `sep` pour le séparateur, +#' `ignore_err` pour IgnoreError et `exp_triv` pour ExpandTrivial. #' Comme pour les noms de fichiers, spécifier une seule valeur par paramètre ou #' autant de valeurs que de tabulations.) #' @@ -228,51 +239,51 @@ apriori_batch <- function(ntab, hst_names, sep = ',', ignore_err = 0 , exp_triv #' extension). If not specified, a temporary file. \cr #' (nom du fichier arb généré (avec extension). Si non renseigné, un fichier #' temporaire.) -#' @param asc_filename [\strong{required}] name of the asc file +#' @param asc_filename name of the asc file #' (with extension). \cr -#' ([\strong{obligatoire}] nom du fichier asc (avec extension).) +#' ( nom du fichier asc (avec extension).) #' @inheritParams micro_asc_rda -#' @param explanatory_vars [\strong{required}] categorical variables, in +#' @param explanatory_vars categorical variables, in #' form of a list of vectors. Each element of the list is a vector of #' variable names forming a tab. -#' Example: \code{list(c("CJ", "A21"), c("SEX", "REGION"))} for the first -#' table crossing \code{CJ} x \code{A21} and the second table crossing -#' \code{SEXE} x \code{REGION}. +#' Example: `list(c("CJ", "A21"), c("SEX", "REGION"))` for the first +#' table crossing `CJ` x `A21` and the second table crossing +#' `SEXE` x `REGION`. #' If a single tabulation, a simple vector of the variables to be crossed is -#' accepted (no need for \code{list(...)}). \cr -#' ([\strong{obligatoire}] variables catégorielles, sous +#' accepted (no need for `list(...)`). \cr +#' ( variables catégorielles, sous #' forme de liste de vecteurs. Chaque élément de la liste est un vecteur des #' noms des variables formant une tabulation. -#' Exemple: \code{list(c("CJ", "A21"), c("SEX", "REGION"))} pour la première -#' table croisant \code{CJ} x \code{A21} et la seconde croisant -#' \code{SEXE} x \code{REGION} +#' Exemple: `list(c("CJ", "A21"), c("SEX", "REGION"))` pour la première +#' table croisant `CJ` x `A21` et la seconde croisant +#' `SEXE` x `REGION` #' Si une seule tabulation, un simple vecteur des variables à croiser est -#' accepté (pas besoin de \code{list(...)}).) +#' accepté (pas besoin de `list(...)`).) #' @param response_var response variable to be summed, or counted if -#' \code{""}. A single value or as many values as there are tabs. \cr -#' (variable de réponse à sommer, ou comptage si \code{""}. +#' `""`. A single value or as many values as there are tabs. \cr +#' (variable de réponse à sommer, ou comptage si `""`. #' Une seule valeur ou autant de valeurs que de tabulations.) #' @param shadow_var variable(s) for applying the primary secret. If not -#' filled in, \code{response_var} will be used by Tau-Argus. \cr +#' filled in, `response_var` will be used by Tau-Argus. \cr #' (variable(s) pour l'application du secret primaire. Si non -#' renseigné, \code{response_var} sera utilisé par Tau-Argus.) +#' renseigné, `response_var` sera utilisé par Tau-Argus.) #' @param cost_var cost variable(s) for the secondary secret. \cr #' (variable(s) de coût pour le secret secondaire.) -#' @param safety_rules [\strong{required}] primary secret rule(s). +#' @param safety_rules primary secret rule(s). #' String in Tau-Argus batch syntax. The weighting is treated #' in a separate parameter (do not specify WGT here, use the -#' \code{weighted}). \cr -#' ([\strong{obligatoire}] règle(s) de secret primaire. +#' `weighted`). \cr +#' ( règle(s) de secret primaire. #' Chaîne de caractères en syntaxe batch Tau-Argus. La pondération est traitée #' dans un paramètre à part (ne pas spécifier WGT ici, utiliser le paramètre -#' \code{weighted}).) +#' `weighted`).) #' @param weighted indicator(s) (boolean). \cr #' (indicatrice(s) de pondération (booléen).) -#' @param suppress [\strong{required}] secret management method(s) +#' @param suppress secret management method(s) #' secondary (Tau-Argus batch syntax). If the method is the same for #' each tabulation, the first parameter (table number) will be ignored and #' renumbered automatically (see section 'Syntax'). \cr -#' ([\strong{obligatoire}] méthode(s) de gestion du secret +#' ( méthode(s) de gestion du secret #' secondaire (syntaxe batch de Tau-Argus). Si la méthode est la même pour #' chaque tabulation, le premier paramètre (numéro du tableau) sera ignoré et #' renuméroté automatiquement (voir la section 'Syntax').) @@ -291,22 +302,22 @@ apriori_batch <- function(ntab, hst_names, sep = ',', ignore_err = 0 , exp_triv #' vide, autant de noms de fichiers temporaires que de tabulations seront #' générés.) #' @param output_type format of output files (Tau-Argus codification). -#' Default value of the package: \code{"2"} (csv for pivot-table). \cr +#' Default value of the package: `"2"` (csv for pivot-table). \cr #' (format des fichiers en sortie (codification Tau-Argus). -#' Valeur par défaut du package : \code{"2"} (csv for pivot-table).) +#' Valeur par défaut du package : `"2"` (csv for pivot-table).) #' @param output_options additional options for output files. default value of -#' the package: \code{"AS+"} (status display). To specify no option, \code{""}. \cr +#' the package: `"AS+"` (status display). To specify no option, `""`. \cr #' (options supplémentaires des fichiers en sortie. Valeur -#' par défaut du package : \code{"AS+"} (affichage du statut). Pour ne -#' spécifier aucune option, \code{""}.) -#' @param apriori information file(s) \emph{a priori}. See below +#' par défaut du package : `"AS+"` (affichage du statut). Pour ne +#' spécifier aucune option, `""`.) +#' @param apriori information file(s) *a priori*. See below #' for the syntax. \cr -#' (fichier(s) d'informations \emph{a priori}. Voir ci-dessous +#' (fichier(s) d'informations *a priori*. Voir ci-dessous #' pour la syntaxe.) #' @param gointeractive to have the possibility to launch the batch from the #' menu of Tau-Argus. \cr #' (pour avoir la possibilité de lancer le batch depuis le -#' menu de Tau-Argus (\code{FALSE} par défaut).) +#' menu de Tau-Argus (`FALSE` par défaut).) #' #' @return A list of two elements: arb filename and names of output files #' (useful to get back the randomly generated names) \cr diff --git a/R/micro_asc_rda.R b/R/micro_asc_rda.R index 3150072..e367612 100644 --- a/R/micro_asc_rda.R +++ b/R/micro_asc_rda.R @@ -36,7 +36,7 @@ write_rda_1var <- function(info_var) { } -#' @importFrom dplyr %>% + write_rda <- function(info_vars) { @@ -49,29 +49,41 @@ write_rda <- function(info_vars) { } info_vars <- lapply(info_vars, chemin_complet) - vapply(info_vars, write_rda_1var, character(1)) %>% - gsub("(\n)+", "\n", .) %>% # plusieurs sauts de lignes par un seul - sub("\n$", "", .) # supprime dernier saut de ligne + res <- vapply(info_vars, write_rda_1var, character(1)) + res <- gsub("(\n)+", "\n", res) # plusieurs sauts de lignes par un seul + res <- sub("\n$", "", res) # supprime dernier saut de ligne + return(res) } #' Creates asc and rda files from microdata #' +#' @description +#' `r lifecycle::badge("superseded")` +#' +#' Development on `micro_asc_rda()` is complete, and for new code we recommend +#' switching to the tabular-wise protection provided by `tab_rtauargus()` +#' or `tab_multi_manager()`, which offer a lot more features for your +#' protection problems. +#' +#' See more details in `vignette("rtauargus")` or in +#' `vignette("protect_multi_tables)`. +#' #' Creates a fixed length text file (asc) and a metadata file #' (rda) from microdata and additional information. \cr #' (Crée un fichier texte de longueur fixe (asc) et un fichier de métadonnées #' (rda) à partir de microdonnées et d'informations additionnelles.) #' -#' @param microdata [\strong{required}] data.frame containing the microdata. \cr -#' ([\strong{obligatoire}] data.frame contenant les microdonnées.) +#' @param microdata data.frame containing the microdata. \cr +#' ( data.frame contenant les microdonnées.) #' @param asc_filename name of the asc file (with extension). If not filled in, #' a temporary file. \cr #' (nom du fichier asc (avec extension). Si non renseigné, #' un fichier temporaire.) #' @param rda_filename name of the rda file (with extension). If not filled in, -#' \code{asc_filename} with the extension "rda" instead of "asc". \cr +#' `asc_filename` with the extension "rda" instead of "asc". \cr #' (nom du fichier rda (avec extension). Si non renseigné, -#' \code{asc_filename} avec l'extension "rda" à la place de "asc".) +#' `asc_filename` avec l'extension "rda" à la place de "asc".) #' @param weight_var name of the weight variable. \cr #' (nom de la variable de poids.) #' @param holding_var nom de la variable de holding. @@ -91,11 +103,11 @@ write_rda <- function(info_vars) { #' @param totcode code(s) for the total of a categorical variable (see #' section 'Specific parameters' for the syntax of this parameter). The #' variables not specified (neither by default nor explicitly) will be -#' assigned the value of \code{rtauargus.totcode}. \cr +#' assigned the value of `rtauargus.totcode`. \cr #' (code(s) pour le total d'une variable catégorielle (voir #' section 'Specific parameters' pour la syntaxe de ce paramètre). Les #' variables non spécifiées (ni par défaut, ni explicitement) se verront -#' attribuer la valeur de \code{rtauargus.totcode}.) +#' attribuer la valeur de `rtauargus.totcode`.) #' @param missing code(s) for a missing value (see section #' 'Specific parameters' for the syntax of this parameter). \cr #' (code(s) pour une valeur manquante (voir section @@ -108,17 +120,17 @@ write_rda <- function(info_vars) { #' #' @return #' Returns the names of the asc and rda files as a list (invisibly). -#' invisibly). Empty columns (filled with \code{NA} or empty strings) are not +#' invisibly). Empty columns (filled with `NA` or empty strings) are not #' strings) will not be exported to the asc file. A #' warning message will list the affected columns. \cr #' (Renvoie les noms des fichiers asc et rda sous forme de liste (de -#' manière invisible). Les colonnes vides (remplies de \code{NA} ou de chaînes +#' manière invisible). Les colonnes vides (remplies de `NA` ou de chaînes #' de caractères vides) ne seront pas exportées dans le fichier asc. Un #' message d'avertissement listera les colonnes concernées.) #' #' @section Specific parameters: #' -#' The parameters \code{totcode}, \code{missing} and \code{codelist} +#' The parameters `totcode`, `missing` and `codelist` #' are to be filled in as a vector indicating the value to take #' for each variable. #' @@ -129,13 +141,13 @@ write_rda <- function(info_vars) { #' #' For example : #' \itemize{ -#' \item{\code{totcode = "All"} : écrit \code{ "All"} for all +#' \item{`totcode = "All"` : écrit ` "All"` for all #' categorical variables} -#' \item{\code{totcode = c("All", GEO = "France")} : idem, except for the -#' \code{GEO} variable } +#' \item{`totcode = c("All", GEO = "France")` : idem, except for the +#' `GEO` variable } #' } #' -#' (Les paramètres \code{totcode}, \code{missing} et \code{codelist} +#' (Les paramètres `totcode`, `missing` et `codelist` #' sont à renseigner sous la forme d'un vecteur indiquant la valeur à prendre #' pour chaque variable. #' @@ -146,79 +158,79 @@ write_rda <- function(info_vars) { #' #' Par exemple : #' \itemize{ -#' \item{\code{totcode = "Ensemble"} : écrit \code{ "Ensemble"} pour +#' \item{`totcode = "Ensemble"` : écrit ` "Ensemble"` pour #' toutes les variables catégorielles} -#' \item{\code{totcode = c("Ensemble", GEO = "France")} : idem, sauf pour la -#' variable \code{GEO}} +#' \item{`totcode = c("Ensemble", GEO = "France")` : idem, sauf pour la +#' variable `GEO`} #' }) #' #' @section Hierarchical variables: #' -#' The parameter \code{hrc} follows the same syntax rules as \code{totcode}, -#' \code{missing} and \code{codelist} (named vector containing as many elements +#' The parameter `hrc` follows the same syntax rules as `totcode`, +#' `missing` and `codelist` (named vector containing as many elements #' as there are variables to describe). It also has the particularity #' to accept several ways of specifying the values associated with the #' hierarchical variables. #' #' To define a hierarchy based on the positions of characters -#' (\strong{hierierlevels}), pass a sequence of integers separated by +#' (**hierierlevels**), pass a sequence of integers separated by #' spaces. #' -#' \emph{Example :} \code{c(CODECOM = "2 3 0 0 0")} +#' *Example :* `c(CODECOM = "2 3 0 0 0")` #' #' If the hierarchy is defined in a separate hrc file -#' (\strong{hiercodelist}), the function expects the location of this file (and -#' a possible \code{hiercodelist} if it differs from the default option of the +#' (**hiercodelist**), the function expects the location of this file (and +#' a possible `hiercodelist` if it differs from the default option of the #' package). In this case, you can write explicitly the path to an existing file -#' file (\code{c(A38 = "a38.hrc")}), but also make a call to -#' \code{\link{write_hrc}} which will generate an hrc file from microdata. +#' file (`c(A38 = "a38.hrc")`), but also make a call to +#' [write_hrc()] which will generate an hrc file from microdata. #' -#' \emph{Example :} \code{c(A38 = write_hrc(microdata, c("A38", "A21", "A10")))} +#' *Example :* `c(A38 = write_hrc(microdata, c("A38", "A21", "A10")))` #' #' A shortcut for this call is to write the variables constituting the #' hierarchy separated by ">". In this case, the microdata and -#' hierleadstring that \code{write_hrc} uses are those declared in -#' \code{micro_asc_rda}. +#' hierleadstring that `write_hrc` uses are those declared in +#' `micro_asc_rda`. #' -#' \emph{Example :} \code{c(A38 = "A38 > A21 > A10")} \emph{(number of spaces -#' any before and after the ">")} +#' *Example :* `c(A38 = "A38 > A21 > A10")` *(number of spaces +#' any before and after the ">")* #' #' The last two methods require the creation of a temporary file. #' For a reusable hrc file, it is necessary to create it beforehand -#' using \code{write_hrc}. +#' using `write_hrc`. #' #' The three methods require that the elements of the vector in parameter #' be named (with the name of the variable), even if there is only one #' element. #' -#' (Le paramètre \code{hrc} obéit aux mêmes règles de syntaxe que \code{totcode}, -#' \code{missing} et \code{codelist} (vecteur nommé contenant autant d'éléments +#' (Le paramètre `hrc` obéit aux mêmes règles de syntaxe que `totcode`, +#' `missing` et `codelist` (vecteur nommé contenant autant d'éléments #' que de variables à décrire). Il présente de plus la particularité #' d'accepter plusieurs façons de spécifier les valeurs associées aux variables #' hiérarchiques. #' #' Pour définir une hiérarchie basée sur les positions des caractères -#' (\strong{hierlevels}), passer une suite de nombre entiers séparés par des +#' (**hierlevels**), passer une suite de nombre entiers séparés par des #' espaces. #' #' Si la hiérarchie est définie dans un fichier hrc à part -#' (\strong{hiercodelist}), la fonction attend l'emplacement de ce fichier (et -#' un éventuel \code{hierleadstring} s'il diffère de l'option par défaut du +#' (**hiercodelist**), la fonction attend l'emplacement de ce fichier (et +#' un éventuel `hierleadstring` s'il diffère de l'option par défaut du #' package). Dans ce cas, on peut écrire explicitement le chemin vers un fichier -#' existant (\code{c(A38 = "a38.hrc")}), mais aussi passer un appel à -#' \code{\link{write_hrc}} qui génèrera un fichier hrc à partir de microdonnées. +#' existant (`c(A38 = "a38.hrc")`), mais aussi passer un appel à +#' [write_hrc()] qui génèrera un fichier hrc à partir de microdonnées. #' #' Un raccourci pour cet appel est d'écrire les variables constituant la #' hiérarchie séparées par des ">". Dans ce cas, les microdonnées et -#' hierleadstring qu'utilise \code{write_hrc} sont ceux déclarés dans -#' \code{micro_asc_rda}. +#' hierleadstring qu'utilise `write_hrc` sont ceux déclarés dans +#' `micro_asc_rda`. #' -#' \emph{Exemple :} \code{c(A38 = "A38 > A21 > A10")} \emph{(nombre d'espaces -#' quelconque avant et après les ">")} +#' *Exemple :* `c(A38 = "A38 > A21 > A10")` *(nombre d'espaces +#' quelconque avant et après les ">")* #' #' Les deux dernières méthodes passent par la création d'un fichier temporaire. #' Pour un fichier hrc réutilisable, il est nécessaire de le créer au préalable -#' à l'aide de \code{write_hrc}. +#' à l'aide de `write_hrc`. #' #' Les trois méthodes nécessitent que les éléments du vecteur en paramètre #' soient nommés (avec le nom de la variable), même s'il n'y a qu'un seul @@ -226,35 +238,35 @@ write_rda <- function(info_vars) { #' #' @section Number of decimals: #' -#' The parameter \code{decimals} indicates the minimum number of decimals to be +#' The parameter `decimals` indicates the minimum number of decimals to be #' appear in the output file (whatever the number of decimals -#' actually present in \code{microdata}). It applies to all +#' actually present in `microdata`). It applies to all #' real variables (double) but not to integer variables (integer). For -#' add zeros to an integer variable, convert it with \code{as.double} +#' add zeros to an integer variable, convert it with `as.double` #' beforehand. #' #' The digits after the decimal point may be incorrect in the asc file if #' the total number of digits (before or after the decimal separator) is -#' greater than 15. See \code{\link[gdata]{write.fwf}} (function used to +#' greater than 15. See [gdata::write.fwf()] (function used to #' writing the asc file) for more details. \cr #' -#' (Le paramètre \code{decimals} indique le nombre minimal de décimales à faire +#' (Le paramètre `decimals` indique le nombre minimal de décimales à faire #' figurer dans le fichier en sortie (quel que soit le nombre de décimales -#' effectivement présent dans \code{microdata}). Il s'applique à toutes les +#' effectivement présent dans `microdata`). Il s'applique à toutes les #' variables réelles (double) mais pas aux variables entières (integer). Pour -#' ajouter des zéros à une variable entière, la convertir avec \code{as.double} +#' ajouter des zéros à une variable entière, la convertir avec `as.double` #' au préalable. #' #' Les chiffres après la virgule peuvent être incorrects dans le fichier asc si #' le nombre total de chiffres (avant ou après le séparateur décimal) est -#' supérieur à 15. Voir \code{\link[gdata]{write.fwf}} (fonction utilisée pour +#' supérieur à 15. Voir [gdata::write.fwf()] (fonction utilisée pour #' écrire le fichier asc) pour plus de détails.) #' #' @section See also: #' -#' The function \code{\link{rtauargus}}, which uses this +#' The function [micro_rtauargus()], which uses this #' function and inherits its parameters. \cr -#' (La fonction \code{\link{rtauargus}}, qui utilise cette +#' (La fonction [micro_rtauargus()], qui utilise cette #' fonction et hérite de ses paramètres.) #' #' @examples diff --git a/R/rtauargus.R b/R/micro_rtauargus.R similarity index 78% rename from R/rtauargus.R rename to R/micro_rtauargus.R index 6e4d1c0..ece835b 100644 --- a/R/rtauargus.R +++ b/R/micro_rtauargus.R @@ -1,5 +1,16 @@ #' Protects tables from microdata #' +#' @description +#' `r lifecycle::badge("superseded")` +#' +#' Development on `micro_rtauargus()` is complete, and for new code we recommend +#' switching to the tabular-wise protection provided by `tab_rtauargus()` +#' or `tab_multi_manager()`, which offer a lot more features for your +#' protection problems. +#' +#' See more details in `vignette("rtauargus")` or in +#' `vignette("protect_multi_tables)`. +#' #' Protects tables built from microdata and specifications of the crossings. #' The function allows to perform the complete process, namely the creation of #' the asc and rda files, the construction of the arb file, the effective @@ -12,13 +23,13 @@ #' #' The function executes sequentially the functions: \itemize{ #' \item{ -#' \code{\link{micro_asc_rda}} \code{->} -#' \code{\link{micro_arb}} \code{->} -#' \code{\link{run_arb}} +#' [micro_asc_rda()] `->` +#' [micro_arb()] `->` +#' [run_arb()] #' } #' } #' -#' Intermediate files without a name entered (\code{asc_filename}...) +#' Intermediate files without a name entered (`asc_filename`...) #' will be created in a temporary folder, with randomly generated names. #' This mechanism allows the user to abstract from the preparation of #' preparation of the data and to maintain the entire chain of @@ -26,29 +37,29 @@ #' #' (La fonction exécute séquentiellement les fonctions : \itemize{ #' \item{ -#' \code{\link{micro_asc_rda}} \code{->} -#' \code{\link{micro_arb}} \code{->} -#' \code{\link{run_arb}} +#' [micro_asc_rda()] `->` +#' [micro_arb()] `->` +#' [run_arb()] #' } #' } #' -#' Les fichiers intermédiaires sans nom renseigné (\code{asc_filename}...) +#' Les fichiers intermédiaires sans nom renseigné (`asc_filename`...) #' seront créés dans un dossier temporaire, avec des noms générés aléatoirement. #' Ce mécanisme permet à l'utilisateur de s'abstraire de la préparation des #' données propre à Tau-Argus et de maintenir l'intégralité de la chaîne de #' traitements dans R.) #' #' @inheritParams micro_arb -#' @param microdata [\strong{required}] data.frame containing the microdata +#' @param microdata data.frame containing the microdata #' (or path to text files already present: see section -#' \emph{Microdata already as text files}). \cr -#' ([\strong{obligatoire}] data.frame contenant les microdonnées +#' *Microdata already as text files*). \cr +#' ( data.frame contenant les microdonnées #' (ou chemin vers des fichiers texte déjà présents : voir section -#' \emph{Microdata already as text files}).) -#' @param ... optional parameters for \code{micro_asc_rda}, \code{micro_arb} -#' and \code{run_arb}. See the help for these functions. \cr -#' (paramètres optionnels pour \code{micro_asc_rda}, \code{micro_arb} -#' et \code{run_arb}. Voir l'aide de ces fonctions.) +#' *Microdata already as text files*).) +#' @param ... optional parameters for `micro_asc_rda`, `micro_arb` +#' and `run_arb`. See the help for these functions. \cr +#' (paramètres optionnels pour `micro_asc_rda`, `micro_arb` +#' et `run_arb`. Voir l'aide de ces fonctions.) #' #' @inheritSection micro_arb Syntax #' @@ -59,8 +70,8 @@ #' the second element the rda file. The rda file can be omitted if it has the #' same name as the asc file (except for the extension). #' Use this option to start the whole process without the generation -#' of the text data. Do not specify \code{asc_filename} or -#' \code{rda_filename} (used to name the text files to be created, which is +#' of the text data. Do not specify `asc_filename` or +#' `rda_filename` (used to name the text files to be created, which is #' irrelevant here). \cr #' #' Pour utiliser des fichiers asc et rda existant déjà, il est possible de @@ -70,21 +81,21 @@ #' porte le même nom que le fichier asc (à l'extension près). #' #' Utiliser cette option pour lancer le processus complet sans la génération -#' des données en texte. Ne pas spécifier \code{asc_filename} ou -#' \code{rda_filename} (sert à nommer les fichiers texte à créer, ce qui est +#' des données en texte. Ne pas spécifier `asc_filename` ou +#' `rda_filename` (sert à nommer les fichiers texte à créer, ce qui est #' sans objet ici). #' #' @return -#' If \code{import = TRUE}, a list of data.frames (protected tables), -#' \code{NULL} otherwise. \cr +#' If `import = TRUE`, a list of data.frames (protected tables), +#' `NULL` otherwise. \cr #' -#'(Si \code{import = TRUE}, une liste de data.frames (tableaux -#' secrétisés), \code{NULL} sinon.) +#'(Si `import = TRUE`, une liste de data.frames (tableaux +#' secrétisés), `NULL` sinon.) #' #' @seealso #' \code{link{rtauargus_plus}}, a version optimized for a large #' number of tables (at the cost of some usage restrictions). \cr -#' (\code{\link{rtauargus_plus}}, une version optimisée pour un grand +#' ([rtauargus_plus()], une version optimisée pour un grand #' nombre de tableaux (au prix de quelques restrictions d'usage).) #' #' @examples @@ -98,7 +109,7 @@ #' )} #' @export -rtauargus <- function(microdata, +micro_rtauargus <- function(microdata, explanatory_vars, safety_rules, suppress, diff --git a/R/multitable.R b/R/multitable.R index da416e0..91af55f 100644 --- a/R/multitable.R +++ b/R/multitable.R @@ -9,7 +9,7 @@ journal_add_line <- function(journal,...){ #' Manages the secondary secret of a list of tables #' @inheritParams tab_rtauargus -#' @param list_tables named list of dataframes representing the tables to protect +#' @param list_tables named list of `data.frame` or `data.table` representing the tables to protect #' @param list_explanatory_vars named list of character vectors of explanatory #' variables of each table mentionned in list_tables. Names of the list are the same as of the list of tables. #' @param alt_hrc named list for alternative hierarchies (useful for non nested-hierarchies) @@ -17,13 +17,13 @@ journal_add_line <- function(journal,...){ #' @param ip_start integer: Interval protection level to apply at first treatment of each table #' @param ip_end integer: Interval protection level to apply at other treatments #' @param num_iter_max integer: Maximum of treatments to do on each table (default to 10) -#' @param ... other arguments of \code{tab_rtauargus2()} +#' @param ... other arguments of `tab_rtauargus2()` #' #' @return original list of tables. Secret Results of each iteration is added to each table. #' For example, the result of first iteration is called 'is_secret_1' in each table. #' It's a boolean variable, whether the cell has to be masked or not. #' -#' @seealso \code{tab_rtauargus2} +#' @seealso `tab_rtauargus2` #' #' @examples #' library(rtauargus) @@ -58,7 +58,7 @@ journal_add_line <- function(journal,...){ #' \dontrun{ #' options( #' rtauargus.tauargus_exe = -#' "Y:/Logiciels/TauArgus/TauArgus4.2.2b1/TauArgus.exe" +#' "Y:/Logiciels/TauArgus/TauArgus4.2.3/TauArgus.exe" #' ) #' res_1 <- tab_multi_manager( #' list_tables = list_data_2_tabs, @@ -73,11 +73,39 @@ journal_add_line <- function(journal,...){ #' secret_var = "is_secret_prim", #' totcode = "Total" #' ) +#' +#' +#' # With the reduction dimensions feature +#' +#' data("datatest1") +#' data("datatest2") +#' +#' datatest2b <- datatest2 %>% +#' filter(cj == "Total", treff == "Total", type_distrib == "Total") %>% +#' select(-cj, -treff, -type_distrib) +#' +#' str(datatest2b) +#' +#' res <- tab_multi_manager( +#' list_tables = list(d1 = datatest1, d2 = datatest2b), +#' list_explanatory_vars = list( +#' d1 = names(datatest1)[1:4], +#' d2 = names(datatest2b)[1:2] +#' ), +#' dir_name = "tauargus_files", +#' value = "pizzas_tot_abs", +#' freq = "nb_obs_rnd", +#' secret_var = "is_secret_prim", +#' totcode = "Total", +#' split_tab = TRUE +#' ) +#' #' } #' #' @importFrom rlang .data #' #' @export + tab_multi_manager <- function( list_tables, list_explanatory_vars, @@ -94,6 +122,9 @@ tab_multi_manager <- function( ip_start = 10, ip_end = 0, num_iter_max = 10, + split_tab = FALSE, + nb_tab_option = "smart", + limit = 14700, ... ){ start_time <- Sys.time() @@ -109,7 +140,10 @@ tab_multi_manager <- function( params$value = value params$freq = freq params$suppress = suppress - + params$suppress = suppress + params$split_tab = split_tab + params$nb_tab_option = nb_tab_option + params$limit = limit n_tbx = length(list_tables) # nombre de tableaux @@ -214,7 +248,15 @@ tab_multi_manager <- function( }else{ cost_var_tab <- NULL } - tableau <- tableau[, c(list_explanatory_vars[[nom_tab]], value, freq, cost_var_tab, secret_var)] + secret_var_tab <- if(!is.null(params$secret_no_pl)) c(secret_var,params$secret_no_pl) else secret_var + + tableau <- as.data.frame(tableau)[, c(list_explanatory_vars[[nom_tab]], value, freq, cost_var_tab, secret_var_tab)] + + if(!is.null(params$secret_no_pl)){ + names(tableau)[names(tableau) == params$secret_no_pl] = "secret_no_pl" + } else { + tableau$secret_no_pl <- FALSE + } var_a_ajouter <- setdiff(all_expl_vars, names(tableau)) for (nom_col in var_a_ajouter){ @@ -227,7 +269,7 @@ tab_multi_manager <- function( tableau[[noms_col_T[[nom_tab]]]] <- TRUE - return(tableau) + return(as.data.frame(tableau)) } ) @@ -240,6 +282,9 @@ tab_multi_manager <- function( all = TRUE ) + table_majeure$secret_no_pl_iter <- table_majeure$secret_no_pl + secret_no_pl_iter <- "secret_no_pl_iter" + purrr::walk( noms_col_T, function(col_T){ @@ -258,11 +303,13 @@ tab_multi_manager <- function( # hrc_unif <- res_unif$hrc_unif list_hrc <- purrr::map( - list_explanatory_vars, - function(nom_vars){ - purrr::discard(hrc[nom_vars], is.na) %>% unlist() - } - ) + list_explanatory_vars, + function(nom_vars){ + purrr::discard(hrc[nom_vars], is.na) %>% unlist() + } + ) + + list_hrc <- purrr::map(list_hrc, function(l) if(length(l) == 0) NULL else l) purrr::walk( names(alt_hrc), @@ -293,15 +340,17 @@ tab_multi_manager <- function( num_iter_par_tab[!has_primary_secret] <- 1 num_iter_all = 0 - common_cells_modified <- as.data.frame(matrix(ncol = length(all_expl_vars)+1)) - names(common_cells_modified) <- c(all_expl_vars, "iteration") + # common_cells_modified <- as.data.frame(matrix(ncol = length(all_expl_vars)+1)) + # names(common_cells_modified) <- c(all_expl_vars, "iteration") + + n_common_cells_modified <- 0 journal <- file.path(dir_name,"journal.txt") if(file.exists(journal)) invisible(file.remove(journal)) journal_add_line(journal, "Start time:", format(start_time, "%Y-%m-%d %H:%M:%S")) journal_add_break_line(journal) journal_add_line(journal, "Function called to protect the tables:", func_to_call) - journal_add_line(journal, "Interval Protection Level for first iteration:", ip_start) + journal_add_line(journal, "Interval Protection Level for primary secret cells:", ip_start) journal_add_line(journal, "Interval Protection Level for other iterations:", ip_end) journal_add_line(journal, "Nb of tables to treat: ", n_tbx) journal_add_break_line(journal) @@ -323,19 +372,17 @@ tab_multi_manager <- function( nom_col_identifiante <- paste0("T_", num_tableau) tableau_a_traiter <- which(table_majeure[[nom_col_identifiante]]) - var_secret_prim <- secret_var - - var_secret_apriori <- paste0("is_secret_", num_iter_all-1, collapse = "") - - vrai_tableau <- table_majeure[tableau_a_traiter,] - if (num_iter_all == 1){ - vrai_tableau[,var_secret_apriori] <- vrai_tableau[,var_secret_prim] + var_secret_apriori <- secret_var + } else { + var_secret_apriori <- paste0("is_secret_", num_iter_all-1, collapse = "") } + vrai_tableau <- table_majeure[tableau_a_traiter,] + ex_var <- list_explanatory_vars[[num_tableau]] - vrai_tableau <- vrai_tableau[,c(ex_var, value, freq, var_secret_prim, var_secret_apriori, cost_var)] + vrai_tableau <- vrai_tableau[,c(ex_var, value, freq,var_secret_apriori,secret_no_pl_iter, cost_var)] # Other settings of the function to make secret ---- @@ -344,8 +391,8 @@ tab_multi_manager <- function( params$explanatory_vars = ex_var params$totcode = list_totcode[[num_tableau]] params$hrc = list_hrc[[num_tableau]] - params$secret_prim = var_secret_prim params$secret_var = var_secret_apriori + params$secret_no_pl = secret_no_pl_iter params$suppress = if( substr(suppress,1,3) == "MOD" & num_iter_par_tab[num_tableau] != 1 ){ @@ -360,27 +407,27 @@ tab_multi_manager <- function( suppress } params$ip = if(num_iter_par_tab[num_tableau] == 1) ip_start else ip_end + # params$safety_rules <- "MAN(0)" res <- do.call(func_to_call, params) res$is_secret <- res$Status != "V" - prim_stat <- table(res$Status)["B"] - prim_stat <- ifelse(is.na(prim_stat), 0, prim_stat) - sec_stat <- table(res$Status)["D"] - sec_stat <- ifelse(is.na(sec_stat), 0, sec_stat) - valid_stat <- table(res$Status)["V"] - valid_stat <- ifelse(is.na(valid_stat), 0, valid_stat) + + # Statistiques + prim_stat <- sum(res$Status == "B", na.rm = TRUE) + sec_stat <- sum(res$Status == "D", na.rm = TRUE) + valid_stat <- sum(res$Status == "V", na.rm = TRUE) denom_stat <- nrow(res) - res <- subset(res, select = -Status) + res <- subset(res, select = setdiff(names(res), "Status")) var_secret <- paste0("is_secret_", num_iter_all) table_majeure <- merge(table_majeure, res, all = TRUE) table_majeure[[var_secret]] <- table_majeure$is_secret - table_majeure <- subset(table_majeure, select = -is_secret) + table_majeure <- subset( + table_majeure, + select = setdiff(names(table_majeure), "is_secret") + ) - if(num_iter_all == 1) { - var_secret_apriori <- var_secret_prim - } table_majeure[[var_secret]] <- ifelse( is.na(table_majeure[[var_secret]]), @@ -388,6 +435,12 @@ tab_multi_manager <- function( table_majeure[[var_secret]] ) + table_majeure$secret_no_pl_iter <- ifelse( + table_majeure[[secret_var]], + table_majeure$secret_no_pl, + table_majeure[[var_secret]] + ) #TODO A REVOIR PR CORRIGER LES PL + lignes_modifs <- which(table_majeure[[var_secret_apriori]] != table_majeure[[var_secret]]) cur_tab <- paste0("T_", num_tableau) @@ -400,16 +453,13 @@ tab_multi_manager <- function( ) ) + # update of common cells that have been modified modified <- common_cells[common_cells[[var_secret_apriori]] != common_cells[[var_secret]],all_expl_vars] - modified <- if(sum(is.na(modified))>0) modified[1,][-1,] else modified - if(nrow(modified)>0){ - common_cells_modified <- rbind( - common_cells_modified, - cbind( - modified, - iteration = num_iter_all - ) - ) + # modified <- if(sum(is.na(modified))>0) modified[1,][-1,] else modified + if(nrow(modified) > 0){ + modified <- cbind(modified, iteration = num_iter_all) + common_cells_modified <- if(n_common_cells_modified == 0) modified else rbind(common_cells_modified, modified) + n_common_cells_modified <- n_common_cells_modified + nrow(modified) } for(tab in noms_tbx){ @@ -505,7 +555,9 @@ tab_multi_manager <- function( ) journal_add_break_line(journal) journal_add_line(journal, "Common cells hit by the secret:") - suppressWarnings(gdata::write.fwf(common_cells_modified[-1,], file = journal, append = TRUE)) + if(n_common_cells_modified > 0){ + suppressWarnings(gdata::write.fwf(common_cells_modified, file = journal, append = TRUE)) + } journal_add_break_line(journal) journal_add_line(journal, "End time: ", format(Sys.time(), "%Y-%m-%d %H:%M:%S")) journal_add_break_line(journal) diff --git a/R/options.R b/R/options.R index 0dcc2fe..d253aa8 100644 --- a/R/options.R +++ b/R/options.R @@ -85,18 +85,18 @@ df_op.rtauargus <- function(html = FALSE) { #' These options are used if a mandatory argument of a function is not set #' by the user. They let not to systematically repeat the same parameter #' for each call of a function. The name of the option is the same as the -#' name of the function prefixed by \code{rtauargus.} : +#' name of the function prefixed by `rtauargus.` : #' -#' \emph{For example, \code{rtauargus.decimals} will be used if the argument -#' \code{decimals} in the \code{micro_asc_rda} function is not set by the -#' user.} +#' *For example, `rtauargus.decimals` will be used if the argument +#' `decimals` in the `micro_asc_rda` function is not set by the +#' user.* #' #' On loading the package, all the rtauargus options, that are not already #' been set by the user, are set with their default values (see table below). #' The already defined options keep the values set by the user. #' #' The options can be set during a session with the following instruction -#' \code{options(rtauargus.}...\code{ = }...\code{)}, or with a configuration +#' `options(rtauargus.`...` = `...`)`, or with a configuration #' file where the user have written its own options with such instructions, #' but this latter is not a proper way if reproducibility is sought. #' Les options du package définissent les comportements par défaut des @@ -108,18 +108,18 @@ df_op.rtauargus <- function(html = FALSE) { #' (Ces options sont utilisées si un argument obligatoire d’une fonction n’est #' pas renseigné. Elles permettent de ne pas répéter systématiquement le même #' paramètre à chaque appel d'une fonction. Le nom de l’option est le nom de -#' l’argument d’une fonction précédé de \code{rtauargus.} : +#' l’argument d’une fonction précédé de `rtauargus.` : #' -#' \emph{Par exemple, \code{rtauargus.decimals} sera la valeur utilisée si l’argument -#' \code{decimals} de la fonction \code{micro_asc_rda} n’est pas renseigné par -#' l’utilisateur.} +#' *Par exemple, `rtauargus.decimals` sera la valeur utilisée si l’argument +#' `decimals` de la fonction `micro_asc_rda` n’est pas renseigné par +#' l’utilisateur.* #' #' Au chargement, le package attribue une valeur par défaut à toutes les options #' de rtauargus qui ne sont pas encore déclarées (cf. tableau ci-dessous). Les #' options déjà définies par l'utilisateur gardent leurs valeurs. #' #' Elles peuvent être redéfinies pour une session par une instruction -#' \code{options(rtauargus.}...\code{ = }...\code{)}, ou de manière globale si +#' `options(rtauargus.`...` = `...`)`, ou de manière globale si #' de telles instructions sont placées dans un fichier de configuration propre à #' l'utilisateur (fortement déconseillé si le programme a vocation à être #' reproductible). @@ -128,28 +128,28 @@ df_op.rtauargus <- function(html = FALSE) { #' utiliseront les valeurs par défaut du package.) #' #' @param ... names of the options to reset, separated by commas. If no name is -#' specified, all the options will be reset. The prefix \code{"rtauargus."} +#' specified, all the options will be reset. The prefix `"rtauargus."` #' is not required. \cr #' noms des options à réinitialiser, séparés par des virgules. Si #' aucun nom n'est spécifié, toutes les options du package seront -#' réinitialisées. Le préfixe \code{"rtauargus."} est facultatif. +#' réinitialisées. Le préfixe `"rtauargus."` est facultatif. #' #' @section List of options: #' \tabular{lll}{ -#' \strong{Option} \tab \strong{Default Value} \tab \strong{Function} \cr -#' \code{------------------------} \tab \code{---------------------------------} \tab \code{-------------}\cr -#' rtauargus.decimals \tab \Sexpr{rtauargus:::op.rtauargus$rtauargus.decimals} \tab \link{micro_asc_rda}\cr +#' **Option** \tab **Default Value** \tab **Function** \cr +#' `------------------------` \tab `---------------------------------` \tab `-------------`\cr +#' rtauargus.decimals \tab \Sexpr{rtauargus:::op.rtauargus$rtauargus.decimals} \tab [micro_asc_rda]\cr #' rtauargus.totcode \tab "\Sexpr{rtauargus:::op.rtauargus$rtauargus.totcode}" \tab \cr #' rtauargus.missing \tab "\Sexpr{rtauargus:::op.rtauargus$rtauargus.missing}" \tab \cr #' rtauargus.hierleadstring \tab "\Sexpr{rtauargus:::op.rtauargus$rtauargus.hierleadstring}" \tab \cr -#' \code{------------------------} \tab \code{---------------------------------} \tab \code{-------------}\cr -#' rtauargus.response_var \tab "\Sexpr{rtauargus:::op.rtauargus$rtauargus.response_var}" \tab \link{micro_arb} \cr +#' `------------------------` \tab `---------------------------------` \tab `-------------`\cr +#' rtauargus.response_var \tab "\Sexpr{rtauargus:::op.rtauargus$rtauargus.response_var}" \tab [micro_arb] \cr #' rtauargus.weighted \tab \Sexpr{rtauargus:::op.rtauargus$rtauargus.weighted} \tab \cr #' rtauargus.linked \tab \Sexpr{rtauargus:::op.rtauargus$rtauargus.linked} \tab \cr #' rtauargus.output_type \tab "\Sexpr{rtauargus:::op.rtauargus$rtauargus.output_type}" \tab \cr #' rtauargus.output_options \tab "\Sexpr{rtauargus:::op.rtauargus$rtauargus.output_options}" \tab \cr -#' \code{------------------------} \tab \code{---------------------------------} \tab \code{-------------}\cr -#' rtauargus.missing_dir \tab "\Sexpr{rtauargus:::op.rtauargus$rtauargus.missing_dir}" \tab \link{run_arb} \cr +#' `------------------------` \tab `---------------------------------` \tab `-------------`\cr +#' rtauargus.missing_dir \tab "\Sexpr{rtauargus:::op.rtauargus$rtauargus.missing_dir}" \tab [run_arb] \cr #' rtauargus.tauargus_exe \tab "\Sexpr{rtauargus:::op.rtauargus$rtauargus.tauargus_exe}" \tab \cr #' rtauargus.show_batch_console \tab \Sexpr{rtauargus:::op.rtauargus$rtauargus.show_batch_console} \tab \cr #' rtauargus.import \tab \Sexpr{rtauargus:::op.rtauargus$rtauargus.import} \tab @@ -173,7 +173,7 @@ df_op.rtauargus <- function(html = FALSE) { #' # resets everything #' reset_rtauargus_options() #' str(rtauargus_options()) -#' @seealso \link{options}, R options system \cr +#' @seealso [options], R options system \cr #' le système d'options de R dans lequel s'insèrent les options de ce package. #' @export #' @rdname rtauargus_options diff --git a/R/rtauargus-package.R b/R/rtauargus-package.R new file mode 100644 index 0000000..f60ba48 --- /dev/null +++ b/R/rtauargus-package.R @@ -0,0 +1,8 @@ +#' @keywords internal +"_PACKAGE" + +## usethis namespace: start +#' @importFrom lifecycle deprecated +#' @importFrom lifecycle badge +## usethis namespace: end +NULL diff --git a/R/rtauargus_plus.R b/R/rtauargus_plus.R index d6558a7..4f9a7b3 100644 --- a/R/rtauargus_plus.R +++ b/R/rtauargus_plus.R @@ -2,7 +2,7 @@ #' #' Optimization of the function \code{link{rtauargus}} for a large number of #' crossovers (all having the same parameters). \cr -#' (Optimisation de la fonction \code{\link{rtauargus}} pour un grand nombre de +#' (Optimisation de la fonction [micro_rtauargus()] pour un grand nombre de #' croisements (ayant tous les mêmes paramètres).) #' #' In interactive mode, Tau-Argus can process up to 10 tabs @@ -11,16 +11,16 @@ #' can be particularly time consuming, as Tau-Argus takes a lot of time #' to read large text files of microdata. #' -#' \code{rtauargus_plus} helps to improve the speed of execution. The function -#' splits the list of tabs into groups of size \code{grp_size} and -#' makes a call to \code{rtauargus} for each group. It writes an +#' `rtauargus_plus` helps to improve the speed of execution. The function +#' splits the list of tabs into groups of size `grp_size` and +#' makes a call to `micro_rtauargus` for each group. It writes an #' asc file restricted to the only variables actually used within a #' of a group. #' #' The results are then aggregated into a single list, as if Tau-Argus #' had been called only once. #' -#' Modifying \code{grp_size} will not change the result, only the time +#' Modifying `grp_size` will not change the result, only the time #' execution time. A value around 5 (default) seems to be a good compromise #' between reading too large asc files and calling Tau-Argus too often. #' Tau-Argus too much. It can be adjusted according to the number of @@ -32,57 +32,57 @@ #' peut être particulièrement long, car Tau-Argus prend beaucoup de temps #' pour lire des fichiers texte volumineux de microdonnées. #' -#' \code{rtauargus_plus} permet d'améliorer la vitesse d'exécution. La fonction -#' découpe la liste des tabulations en groupes de taille \code{grp_size} et -#' effectue un appel à \code{rtauargus} pour chaque groupe. Elle écrit un +#' `rtauargus_plus` permet d'améliorer la vitesse d'exécution. La fonction +#' découpe la liste des tabulations en groupes de taille `grp_size` et +#' effectue un appel à `micro_rtauargus` pour chaque groupe. Elle écrit un #' fichier asc restreint aux seules variables effectivement utilisées au sein #' d'un groupe. #' #' Les résultats sont ensuite agrégés en une unique liste, comme si Tau-Argus #' n'avait été appelé qu'une seule fois. #' -#' Modifier \code{grp_size} ne changera pas le résultat, seulement le temps +#' Modifier `grp_size` ne changera pas le résultat, seulement le temps #' d'exécution. Une valeur autour de 5 (défaut) semble être un bon compromis #' entre une lecture de fichiers asc trop volumineux et un nombre d'appels à #' Tau-Argus trop important. Elle peut être ajustée en fonction du nombre de #' variables communes à l'intérieur de chaque groupe de tabulations.) #' #' -#' @section Limits in relation to the function \code{rtauargus}: +#' @section Limits in relation to the function `micro_rtauargus`: #' #' In return for the speed of execution, the crossings must have the #' same characteristics (same primary secret rules, same secondary secret method, #' same secondary secret, same weighting variable, etc.). The parameters -#' \code{safety_rules}, \code{supress}, ..., must therefore contain a +#' `safety_rules`, `supress`, ..., must therefore contain a #' unique value. #' -#' Moreover, it is not possible to specify \code{asc_filename}, -#' \code{rda_filename}, \code{arb_filename} or \code{output_names} to +#' Moreover, it is not possible to specify `asc_filename`, +#' `rda_filename`, `arb_filename` or `output_names` to #' retrieve the intermediate files. These files will be written to a #' temporary folder (and overwritten with each new group). Therefore, -#' specifying \code{import = FALSE} is irrelevant and will be ignored. +#' specifying `import = FALSE` is irrelevant and will be ignored. #' #' The data must be a data.frame (asc and rda files not allowed). #' -#' If the \code{linked} option is used, the link will only be effective at +#' If the `linked` option is used, the link will only be effective at #' within each group of tabs. \cr #' #' (En contrepartie de la vitesse d'exécution, les croisements doivent avoir les #' mêmes caractéristiques (mêmes règles de secret primaire, même méthode de #' secret secondaire, même variable de pondération, etc.). Les paramètres -#' \code{safety_rules}, \code{supress}, ..., doivent donc contenir une valeur +#' `safety_rules`, `supress`, ..., doivent donc contenir une valeur #' unique. #' -#' De plus, il n'est pas possible de spécifier \code{asc_filename}, -#' \code{rda_filename}, \code{arb_filename} ou \code{output_names} pour +#' De plus, il n'est pas possible de spécifier `asc_filename`, +#' `rda_filename`, `arb_filename` ou `output_names` pour #' récupérer les fichiers intermédiaires. Ces fichiers seront écrits dans un #' dossier temporaire (et écrasés à chaque nouveau groupe). Par conséquent, -#' spécifier \code{import = FALSE} est sans objet et sera ignoré. +#' spécifier `import = FALSE` est sans objet et sera ignoré. #' #' Les données doivent obligatoirement être un data.frame (fichiers asc et rda #' pas autorisés). #' -#' Si l'option \code{linked} est utilisée, la liaison ne sera effective qu'à +#' Si l'option `linked` est utilisée, la liaison ne sera effective qu'à #' l'intérieur de chaque groupe de tabulations.) #' #' @param grp_size number of tables per Tau-Argus call (an integer between @@ -91,23 +91,23 @@ #' entre 1 et 10).) #' @inheritParams micro_asc_rda #' @inheritParams micro_arb -#' @inheritParams rtauargus -#' @param suppress [\strong{required}] secondary secret management method +#' @inheritParams micro_rtauargus +#' @param suppress secondary secret management method #' (Tau-Argus batch syntax). Only one method allowed for -#' all tables. Example : \code{"GH(.,100)"} (the dot playing the role of the +#' all tables. Example : `"GH(.,100)"` (the dot playing the role of the #' tabulation number). \cr -#' [\strong{obligatoire}] méthode de gestion du secret +#' méthode de gestion du secret #' secondaire (syntaxe batch de Tau-Argus). Une seule méthode autorisée pour -#' tous les tableaux. Exemple \code{"GH(.,100)"} (le point jouant le rôle du +#' tous les tableaux. Exemple `"GH(.,100)"` (le point jouant le rôle du #' numéro de tabulation). #' #' @return A list of data.frames (secret arrays). \cr #' (Une liste de data.frames (tableaux secrétisés).) #' -#' @seealso \code{\link{rtauargus}}, a function called repeatedly by -#' \code{rtauargus_plus}. \cr +#' @seealso [micro_rtauargus()], a function called repeatedly by +#' `rtauargus_plus`. \cr #' fonction appelée de manière répétée par -#' \code{rtauargus_plus}. +#' `rtauargus_plus`. #' #' @examples #' \dontrun{ @@ -183,7 +183,7 @@ rtauargus_plus <- function(grp_size = 5, ), .dots ) - do.call(rtauargus, params_rtauargus) + do.call(micro_rtauargus, params_rtauargus) } ) diff --git a/R/run_arb.R b/R/run_arb.R index d71ac97..6c073db 100644 --- a/R/run_arb.R +++ b/R/run_arb.R @@ -176,14 +176,14 @@ unquote <- function(s) sub("^[\"'](.+)[\"']$", "\\1", s) #' Executes the instructions contained in an .arb file for Tau-Argus. \cr #' (Exécute les instructions contenues dans un fichier .arb pour Tau-Argus.) #' -#' Only the argument \code{arb_filename} is required, because all +#' Only the argument `arb_filename` is required, because all #' necessary information is present in this file. #' #' This is the only function in the package that runs Tau-Argus. It #' therefore requires the software to be accessible from the workstation. #' #' The location of the TauArgus.exe program is defined globally when the -#' loading the package. In fact, the argument \code{tauargus_exe} will not +#' loading the package. In fact, the argument `tauargus_exe` will not #' normally not have to be specified (except to override the global option the #' time of the execution of the function). #' @@ -192,13 +192,13 @@ unquote <- function(s) sub("^[\"'](.+)[\"']$", "\\1", s) #' write the results, the variables to be used (crossings, response variable) in the #' response variable) in the metadata (rda file). \cr #' -#' (Seul l’argument \code{arb_filename} est obligatoire, car toutes les +#' (Seul l’argument `arb_filename` est obligatoire, car toutes les #' informations nécessaires sont présentes dans ce fichier. #' Il s'agit de la seule fonction du package qui exécute Tau-Argus. Elle #' nécessite donc que le logiciel soit accessible depuis le poste de travail. #' #' L'emplacement du programme TauArgus.exe est défini de manière globale au -#' chargement du package. De fait, l'argument \code{tauargus_exe} n'aura +#' chargement du package. De fait, l'argument `tauargus_exe` n'aura #' normalement pas à être spécifié (sauf pour surcharger l'option globale le #' temps de l'exécution de la fonction). #' @@ -231,17 +231,17 @@ unquote <- function(s) sub("^[\"'](.+)[\"']$", "\\1", s) #' only error messages if any (if FALSE) \cr #' (booléen, pour afficher l'exécution du batch (si TRUE) ou #' uniquement les messages d'erreurs, s'il y en a (si FALSE)) -#' @param import to import in R the files produced, \code{TRUE} by +#' @param import to import in R the files produced, `TRUE` by #' default. \cr -#' (pour importer dans R les fichiers produits, \code{TRUE} par +#' (pour importer dans R les fichiers produits, `TRUE` par #' défaut.) -#' @param ... additional parameters for \code{system()}. \cr -#' (paramètres supplémentaires pour \code{system()}.) +#' @param ... additional parameters for `system()`. \cr +#' (paramètres supplémentaires pour `system()`.) #' #' @return \itemize{ #' \item{a list of data.frame containing the results if -#' \code{import = TRUE} (via the \code{link{import}} function)} ; -#' \item{\code{NULL} otherwise}. +#' `import = TRUE` (via the \code{link{import}} function)} ; +#' \item{`NULL` otherwise}. #' } #' #' @inheritSection micro_asc_rda See also diff --git a/R/sp_from_4_to_3.R b/R/sp_from_4_to_3.R new file mode 100644 index 0000000..3c814a0 --- /dev/null +++ b/R/sp_from_4_to_3.R @@ -0,0 +1,295 @@ +# Small functions for use in from_4_to_3() + +# Returns the hierarchical variable with the fewest nodes (= subtotals) +smallest_hrc <- function(hrcfiles) { + v <- list() + for (i in 1:length(hrcfiles)) { + v <- append(v, nb_nodes(hrcfiles, names(hrcfiles[i]))) + } + index_smaller_hrc <- which.min(v) + name_smaller_hrc <- names(hrcfiles)[index_smaller_hrc] + return(name_smaller_hrc) +} + +# Returns the variable with the fewest modalities +smallest_mod <- function(dfs) { + v <- list() + for (colonne in dfs) { + v <- append(v,length(unique(colonne))) + } + index_smaller_mod <- which.min(v) + name_smaller_mod <- names(dfs)[index_smaller_mod] + return(name_smaller_mod) +} + +# Choose a categorical variable +# Preferably the non-hierarchical one with the fewest modalities +# If not available, the hierarchical variable with the fewest nodes +choose_var_priority_non_hierarchical <- function(dfs,totcode,hrcfiles){ + # The categorical variables without hierarchy + cat_vars <- names(totcode) + + non_hier_vars <- intersect( + setdiff(names(dfs), names(hrcfiles)), + cat_vars + ) + + nb_non_hier_vars<-length(non_hier_vars) + + # Principle: preferably choose non-hierarchical variables + + # If more than 1, look at the variables with the fewest modalities + # to create fewer dataframes later + if (nb_non_hier_vars > 1){ + dfs_vars_non_hier <- subset(dfs,select = non_hier_vars) + return (smallest_mod(dfs_vars_non_hier)) + } + else if(nb_non_hier_vars == 1){ + return (non_hier_vars[1]) + } + # Otherwise choose the hierarchical variable with the fewest subtotals + else { + return (smallest_hrc(hrcfiles)) + } +} + +# Returns the hierarchical variable with the most nodes +bigger_hrc <- function(hrcfiles) { + v <- list() + for (i in 1:length(hrcfiles)) { + v <- append(v, nb_nodes(hrcfiles, names(hrcfiles[i]))) + } + index_bigger_hrc <- which.max(v) + name_bigger_hrc <- names(hrcfiles)[index_bigger_hrc] + return(name_bigger_hrc) +} + +# Returns the variable with the most modalities +bigger_mod <- function(dfs) { + v <- list() + for (colonne in dfs) { + v <- append(v, length(unique(colonne))) + } + index_bigger_mod <- which.max(v) + name_bigger_mod <- names(dfs)[index_bigger_mod] + return(name_bigger_mod) +} + +# Choose a categorical variable +# Preferably the hierarchical one with the most nodes +# If not available, the non-hierarchical variable with the most modalities +choose_var_priority_hierarchical <- function(dfs, totcode, hrcfiles) { + # Principle: preferably choose hierarchical variables + + # If no hierarchical variable, choose non-hierarchical variable with the most modalities + if (length(hrcfiles) == 0) { + return(bigger_mod(dfs[names(dfs) %in% names(totcode)])) + # Otherwise, choose the hierarchical variable with the most subtotals + } else { + return(bigger_hrc(hrcfiles)) + } +} + +chose_var_to_merge <- function(dfs, totcode, hrcfiles, maximize_nb_tabs = FALSE) { + if(maximize_nb_tabs){ + return(choose_var_priority_hierarchical(dfs, totcode, hrcfiles)) + } else { + return(choose_var_priority_non_hierarchical(dfs, totcode, hrcfiles)) + } +} + +#' Function reducing from 4 to 3 categorical variables +#' +#' @param dfs data.frame with 4 categorical variables (n >= 2 in the general case) +#' @param dfs_name name of the dataframe +#' @param totcode named vector of totals for categorical variables +#' @param hrcfiles named vector indicating the hrc files of hierarchical variables +#' among the categorical variables of dfs +#' @param sep_dir allows forcing the writing of hrc into a separate folder, +#' default is FALSE +#' @param hrc_dir folder to write hrc files if writing to a new folder is forced +#' or if no folder is specified in hrcfiles +#' @param v1 allows forcing the value of the first variable to merge, +#' not specified by default (NULL) +#' @param v2 allows forcing the value of the second variable to merge, +#' not specified by default (NULL) +#' @param sep separator used during concatenation of variables +#' @param maximize_nb_tabs specifies whether to prefer selecting hierarchical variables with +#' the most nodes in priority (TRUE), generating more tables but with smaller sizes, +#' or non-hierarchical variables with the fewest modalities (FALSE) to create fewer tables +#' +#' @return A list containing the following components: +#' \itemize{ +#' \item `tabs`: named list of 3-dimensional dataframes +#' (n-1 dimensions in the general case) with nested hierarchies +#' \item `hrc`: named list of hrc specific to the variable created +#' through merging +#' \item `alt_tot`: named list of totals +#' \item `vars`: named list of vectors representing the merged variables +#' during the two stages of dimension reduction +#' } +#' +#' @examples +#' library(dplyr) +#' data <- expand.grid( +#' ACT = c("Total", "A", "B", "A1", "A2", "B1", "B2"), +#' GEO = c("Total", "G1", "G2"), +#' SEX = c("Total", "F", "M"), +#' AGE = c("Total", "AGE1", "AGE2"), +#' stringsAsFactors = FALSE +#' ) %>% +#' as.data.frame() +#' +#' data <- data %>% mutate(VALUE = 1) +#' +#' hrc_act <- "hrc_ACT.hrc" +#' +#' sdcHierarchies::hier_create(root = "Total", nodes = c("A","B")) %>% +#' sdcHierarchies::hier_add(root = "A", nodes = c("A1","A2")) %>% +#' sdcHierarchies::hier_add(root = "B", nodes = c("B1","B2")) %>% +#' sdcHierarchies::hier_convert(as = "argus") %>% +#' slice(-1) %>% +#' mutate(levels = substring(paste0(level,name),3)) %>% +#' select(levels) %>% +#' write.table(file = hrc_act, row.names = FALSE, col.names = FALSE, quote = FALSE) +#' +#' # Results of the function +#' res1 <- from_4_to_3( +#' dfs = data, +#' dfs_name = "tab", +#' totcode = c(SEX = "Total", AGE = "Total", GEO = "Total", ACT = "Total"), +#' hrcfiles = c(ACT = hrc_act), +#' sep_dir = TRUE, +#' hrc_dir = "output" +#' ) +#' +#' # Maximize the number of tables +#' res2 <- from_4_to_3( +#' dfs = data, +#' dfs_name = "tab", +#' totcode = c(SEX = "Total", AGE = "Total", GEO = "Total", ACT = "Total"), +#' hrcfiles = c(ACT = hrc_act), +#' sep_dir = TRUE, +#' hrc_dir = "output", +#' maximize_nb_tabs = TRUE +#' ) +#' @keywords internal +#' @export +from_4_to_3 <- function( + dfs, + dfs_name, + totcode, + hrcfiles = NULL, + sep_dir = FALSE, + hrc_dir = "hrc_alt", + v1 = NULL, + v2 = NULL, + sep = "_", + maximize_nb_tabs = FALSE) +{ + # Update the output directory containing the hierarchies + if( (length(hrcfiles) != 0) & !sep_dir){ + dir_name <- dirname(hrcfiles[[1]]) + } else { + dir_name <- hrc_dir + } + + # Categorical variables without hierarchy + cat_vars <- names(totcode) + + non_hier_vars <- intersect( + setdiff(names(dfs), names(hrcfiles)), + cat_vars + ) + + # Choice of variables and verification of those given as arguments + + nb_non_hier_vars <- 0 # Hierarchical variable selected so far + + # First variable + if (!is.null(v1)){ + if (!(v1 %in% cat_vars)){ + stop(paste("v1 is not a categorical variable, v1 = ", v1, + "Categorical variables are: ",paste(cat_vars, collapse = ", ")), sep = "") + } + } else { + # a variable is chosen, avoiding v2 + v1 <- chose_var_to_merge(dfs = dfs[setdiff(names(dfs),v2)], + totcode = totcode[setdiff(names(totcode),v2)], + hrcfiles = hrcfiles[setdiff(names(hrcfiles),v2)], + maximize_nb_tabs = maximize_nb_tabs) + } + + if (v1 %in% non_hier_vars){ + # Update the number of selected hierarchical variables + nb_non_hier_vars <- nb_non_hier_vars + 1 + } + + # Second variable + if (!is.null(v2)){ + if (!(v2 %in% cat_vars)){ + stop(paste("v2 is not a categorical variable, v2 = ", v2, + "Categorical variables are: ",paste(cat_vars, collapse = ", ")), sep = "") + } + if (v1 == v2){ + stop("Error. You are trying to merge a variable with itself") + } + + } else { + # a variable is chosen, avoiding v1 + v2 <- chose_var_to_merge(dfs = dfs[setdiff(names(dfs),v1)], + totcode = totcode[setdiff(names(totcode),v1)], + hrcfiles = hrcfiles[!(names(hrcfiles) == v1)], + maximize_nb_tabs = maximize_nb_tabs) + } + + if (v2 %in% non_hier_vars){ + # Update the number of selected hierarchical variables + nb_non_hier_vars <- nb_non_hier_vars + 1 + } + + # The corresponding function is called + + # Case 2 non-hierarchical variables + if(nb_non_hier_vars == 2){ + return(from_4_to_3_case_0_hr(dfs = dfs, + dfs_name = dfs_name, + v1 = v1, + v2 = v2, + totcode = totcode, + dir_name = dir_name, + sep = sep) + ) + + # Case 1 non-hierarchical variable + }else if(nb_non_hier_vars == 1){ + # v2 must be hierarchical, v1 non-hierarchical + # So the variables are put in the right order + if (v2 %in% non_hier_vars){ + tmp <- v2 + v2 <- v1 + v1 <- tmp + } + return(from_4_to_3_case_1_hr(dfs = dfs, + dfs_name = dfs_name, + v1 = v1, + v2 = v2, + totcode = totcode, + hrcfiles = hrcfiles, + dir_name = dir_name, + sep = sep) + ) + + # Case 0 non-hierarchical variable + }else{ + return(from_4_to_3_case_2_hr(dfs = dfs, + dfs_name = dfs_name, + v1 = v1, + v2 = v2, + totcode = totcode, + hrcfiles = hrcfiles, + dir_name = dir_name, + sep = sep) + ) + } +} diff --git a/R/sp_from_4_to_3_case_0_hr.R b/R/sp_from_4_to_3_case_0_hr.R new file mode 100644 index 0000000..1041585 --- /dev/null +++ b/R/sp_from_4_to_3_case_0_hr.R @@ -0,0 +1,226 @@ +#' Transition from 4 to 3 variables by merging two non-hierarchical variables +#' +#' @param dfs data.frame with 4 categorical variables (n >= 2 in the general case) +#' @param dfs_name name of the data.frame in the list provided by the user +#' @param v1 non-hierarchical categorical variable +#' @param v2 non-hierarchical categorical variable +#' @param totcode named vector of totals for categorical variables +#' @param dir_name folder where to write the hrc files +#' if no folder is specified in hrcfiles +#' @param sep separator used when concatenating variables +#' +#' @return A list containing: +#' \itemize{ +#' \item `tabs`: named list of 3-dimensional dataframes +#' (n-1 dimensions in the general case) with nested hierarchies +#' \item `hrc`: named list of hrc specific to the variable created via merging +#' \item `alt_tot`: named list of totals +#' \item `vars`: named list of vectors representing the merged variables +#' during the two steps of dimension reduction +#' } +#' +#' @examples +#' library(dplyr) +#' data <- expand.grid( +#' ACT = c("Total", "A", "B", "A1", "A2", "B1", "B2"), +#' SEX = c("Total", "F", "M","F1","F2","M1","M2"), +#' AGE = c("Total", "AGE1", "AGE2", "AGE11", "AGE12", "AGE21", "AGE22"), +#' ECO = c("PIB","Households","Companies"), +#' stringsAsFactors = FALSE, +#' KEEP.OUT.ATTRS = FALSE +#' ) %>% +#' as.data.frame() +#' +#' data <- data %>% mutate(VALUE = 1:n()) +#' +#' hrc_act <- "hrc_ACT.hrc" +#' sdcHierarchies::hier_create(root = "Total", nodes = c("A","B")) %>% +#' sdcHierarchies::hier_add(root = "A", nodes = c("A1","A2")) %>% +#' sdcHierarchies::hier_convert(as = "argus") %>% +#' slice(-1) %>% +#' mutate(levels = substring(paste0(level,name),3)) %>% +#' select(levels) %>% +#' write.table(file = hrc_act, row.names = FALSE, col.names = FALSE, quote = FALSE) +#' +#' hrc_sex <- "hrc_SEX.hrc" +#' sdcHierarchies::hier_create(root = "Total", nodes = c("F","M")) %>% +#' sdcHierarchies::hier_add(root = "F", nodes = c("F1","F2")) %>% +#' sdcHierarchies::hier_add(root = "M", nodes = c("M1","M2")) %>% +#' sdcHierarchies::hier_convert(as = "argus") %>% +#' slice(-1) %>% +#' mutate(levels = substring(paste0(level,name),3)) %>% +#' select(levels) %>% +#' write.table(file = hrc_sex, row.names = FALSE, col.names = FALSE, quote = FALSE) +#' +#' res1 <- from_4_to_3_case_0_hr(dfs = data, +#' dfs_name = "dfs_name", +#' v1 = "ECO",v2 = "AGE", +#' totcode = c(ACT = "Total",SEX = "Total", +#' AGE = "Total",ECO = "PIB"), +#' dir_name = "output") +#' @keywords internal +#' @export +from_4_to_3_case_0_hr <- function( + dfs, + dfs_name, + v1, + v2, + totcode, + dir_name, + sep = "_") +{ + # the different totals + var1_total <- totcode[v1] + var2_total <- totcode[v2] + + # the different modalities of the 2 variables + mods1 <- unique(dfs[[v1]]) + mods2 <- unique(dfs[[v2]]) + + var1_mods_except_total <- mods1[mods1 != var1_total] + var2_mods_except_total <- mods2[mods2 != var2_total] + + # Traitement ad hoc des feuilles uniques (pour Julien) + # Add a fake modality if there is only one modality except total + # to avoid error with rtauargus::write_hrc2 + if (length(var1_mods_except_total)==1){ + var1_mods_except_total<-c(var1_mods_except_total,paste(var1_mods_except_total, + "ZZZ", sep = "")) + } + + if (length(var2_mods_except_total)==1){ + var2_mods_except_total<-c(var2_mods_except_total,paste(var2_mods_except_total, + "ZZZ", sep = "")) + } + + # number of modality for each var + var1_mods_n <- length(var1_mods_except_total) + var2_mods_n <- length(var2_mods_except_total) + + # generalization creation of the tables with merged variables + table_and_hierarchy_creator <- function(var_i_total, + var_j_total, + var_i_mods_except_total, + var_j_mods_except_total, + var_j_mods_n, + vi,vj,i) + { + # Introduction of notations: + # let i = 1, j = 2 + # let i = 2, j = 1 + + if (i == 1){ + j <- 2 + } else { + j <- 1 + } + + # Construction of the levels for the correspondence table + tabi_lvl1 <- expand.grid( + v1 = sort(rep(var_i_mods_except_total, var_j_mods_n)), + v2 = var_j_total, + stringsAsFactors = FALSE + ) %>% as.data.frame() + + v_i <- paste("v",i,sep="") + v_j <- paste("v",j,sep="") + + tabi_lvl1$v3 <- paste(tabi_lvl1[[v_i]], tabi_lvl1[[v_j]], sep = sep) + + # Creation of the level 2 hierarchy + tabi_lvl2 <- expand.grid( + v1 = var_i_mods_except_total, + v2 = var_j_mods_except_total, + stringsAsFactors = FALSE + ) %>% as.data.frame() + + tabi_lvl2 <- tabi_lvl2[order(tabi_lvl2$v1, tabi_lvl2$v2), ] + + tabi_lvl2$v3 <- paste(tabi_lvl2[[v_i]], tabi_lvl2[[v_j]], sep = sep) + + # Creation of the correspondence table + tabi_corresp <- data.frame( + Lvl1 = tabi_lvl1$v3, + Lvl2 = tabi_lvl2$v3, + stringsAsFactors = FALSE + ) + + tabi <- dfs[(dfs[[vi]] != var_i_total) | + (dfs[[vi]] == var_i_total & dfs[[vj]] == var_j_total), ] + tabi[[paste(v1, v2, sep = sep)]]<- paste(tabi[[v1]],tabi[[v2]],sep = sep) + + tabi[[v1]]<-NULL + tabi[[v2]]<-NULL + + return(list(tabi,tabi_corresp)) + } + + # We apply the function for "i=1, j=2" then for "i=2,j=1" + res1 <- table_and_hierarchy_creator(var1_total, + var2_total, + var1_mods_except_total, + var2_mods_except_total, + var2_mods_n, + v1,v2,1) + tab1 <- res1[[1]] + tab1_corresp <- res1[[2]] + + res2 <- table_and_hierarchy_creator(var2_total, + var1_total, + var2_mods_except_total, + var1_mods_except_total, + var1_mods_n, + v2,v1,2) + tab2 <- res2[[1]] + tab2_corresp <- res2[[2]] + + # Construction of hierarchies + # to do : + # use file.path()? + # do not write if the file already exists? + + hrc_tab1 <- rtauargus::write_hrc2(tab1_corresp, + file_name = paste(dir_name,"/", + paste("hrc",dfs_name, + v1,sep = "_"), + ".hrc", + sep=""), + adjust_unique_roots = TRUE + ) + + hrc_tab2 <- rtauargus::write_hrc2(tab2_corresp, + file_name = paste(dir_name,"/", + paste("hrc",dfs_name, + v2,sep = "_"), + ".hrc", + sep=""), + adjust_unique_roots = TRUE + ) + + tabs <- list(tab1, tab2) + + names(tabs) <- c(paste(dfs_name,v1, sep="_"), + paste(dfs_name,v2, sep="_")) + + hrcs <- list(hrc_tab1, + hrc_tab2) + + names(hrcs) <- names(tabs) + + total_total = paste(totcode[v1], + totcode[v2], + sep = sep) + + alt_tot=list(total_total, + total_total) + + names(alt_tot)<- names(tabs) + + return( + list( + tabs = tabs, + hrcs = hrcs, + alt_tot = alt_tot, + vars = c(v1, v2)) + ) +} diff --git a/R/sp_from_4_to_3_case_1_hr.R b/R/sp_from_4_to_3_case_1_hr.R new file mode 100644 index 0000000..917ed55 --- /dev/null +++ b/R/sp_from_4_to_3_case_1_hr.R @@ -0,0 +1,146 @@ +#' Transition from 4 to 3 variables by merging a hierarchical +#' and a non-hierarchical variable +#' +#' @param dfs data.frame with 4 categorical variables (n >= 2 in the general case) +#' @param dfs_name name of the data.frame in the list provided by the user +#' @param v1 non-hierarchical categorical variable +#' @param v2 hierarchical categorical variable +#' @param totcode named vector of totals for categorical variables +#' @param hrcfiles named vector indicating the hrc files of hierarchical variables +#' among the categorical variables of dfs +#' @param dir_name directory where to write the hrc files +#' if no folder is specified in hrcfiles +#' @param sep separator used when concatenating variables +#' +#' @return A list containing: +#' \itemize{ +#' \item `tabs`: named list of 3-dimensional dataframes +#' (n-1 dimensions in the general case) with nested hierarchies +#' \item `hrc`: named list of hrc specific to the variable created by fusion +#' \item `alt_tot`: named list of totals +#' \item `vars`: named list of vectors representing the merged variables +#' during the two stages of dimension reduction +#' } +#' +#' @examples +#' library(dplyr) +#' data <- expand.grid( +#' ACT = c("Total", "A", "B", "A1", "A2", "B1", "B2"), +#' SEX = c("Total", "F", "M","F1","F2","M1","M2"), +#' AGE = c("Total", "AGE1", "AGE2", "AGE11", "AGE12", "AGE21", "AGE22"), +#' ECO = c("PIB","Ménages","Entreprises"), +#' stringsAsFactors = FALSE, +#' KEEP.OUT.ATTRS = FALSE +#' ) %>% +#' as.data.frame() +#' +#' data <- data %>% mutate(VALUE = 1:n()) +#' +#' hrc_act <- "hrc_ACT.hrc" +#' sdcHierarchies::hier_create(root = "Total", nodes = c("A","B")) %>% +#' sdcHierarchies::hier_add(root = "A", nodes = c("A1","A2")) %>% +#' sdcHierarchies::hier_convert(as = "argus") %>% +#' slice(-1) %>% +#' mutate(levels = substring(paste0(level,name),3)) %>% +#' select(levels) %>% +#' write.table(file = hrc_act, row.names = FALSE, col.names = FALSE, quote = FALSE) +#' +#' hrc_sex <- "hrc_SEX.hrc" +#' sdcHierarchies::hier_create(root = "Total", nodes = c("F","M")) %>% +#' sdcHierarchies::hier_add(root = "F", nodes = c("F1","F2")) %>% +#' sdcHierarchies::hier_add(root = "M", nodes = c("M1","M2")) %>% +#' sdcHierarchies::hier_convert(as = "argus") %>% +#' slice(-1) %>% +#' mutate(levels = substring(paste0(level,name),3)) %>% +#' select(levels) %>% +#' write.table(file = hrc_sex, row.names = FALSE, col.names = FALSE, quote = FALSE) +#' +#' res1 <- from_4_to_3_case_1_hr(dfs = data, +#' dfs_name = "dfs_name", +#' v1 = "ECO",v2 = "SEX", +#' totcode = c(ACT = "Total",SEX = "Total", +#' AGE = "Total",ECO = "PIB"), +#' hrcfiles = c(ACT = hrc_act, SEX = hrc_sex), +#' dir_name = "output") +#' @keywords internal +#' @export +from_4_to_3_case_1_hr <- function( + dfs, + dfs_name, + v1, + v2, + totcode, + hrcfiles, + dir_name, + sep = "_") +{ + ############################# + ## Creation of code_split ## + ############################# + hrc <- hrcfiles[[v2]] + total <- totcode[[v2]] + + res_sdc <- sdcHierarchies::hier_import(inp = hrc, from = "hrc", root = total) %>% + sdcHierarchies::hier_convert(as = "sdc") + + # Code split gives us the hierarchies as well as the hierarchy levels + # Allows to select a node of the tree and its direct branches + codes_split <- lapply( + res_sdc$dims, + names + ) + + ########################### + # Reduction of hierarchy # + ########################### + + liste_df_4_var_0_hr <- lapply( + codes_split, + function(codes){ + res <- dfs %>% + filter(dfs[[v2]] %in% codes) + } + ) + # We now have data.frames with 0 hierarchical variables + # therefore we can apply the dedicated method + + # Updating the arguments then call the function from_4_to_3_case_0_hr + call_4_to_3_0_hr <- function(dfs, i){ + + if (i <= length(codes_split)) { + totcode[v2] <- codes_split[[i]][1] + dfs_name <- paste(dfs_name, totcode[v2], sep = "_") + + from_4_to_3_case_0_hr(dfs = dfs, + dfs_name = dfs_name, + v1 = v1, + v2 = v2, + totcode = totcode, + dir_name = dir_name, + sep = sep) + } + else { + print(paste("Index", i, "is out of bounds for codes_split.")) + return(NULL) + } + } + + # We transform all our 4 var tables into 3 var + res <- lapply(seq_along(liste_df_4_var_0_hr), function(i) { + call_4_to_3_0_hr(liste_df_4_var_0_hr[[i]], i) + }) + + + # We change the object so that it is the same as in the other cases + tabs <- unlist(lapply(res, function(x) x$tabs), recursive = FALSE) + hrcs <- unlist(lapply(res, function(x) x$hrcs), recursive = FALSE) + alt_tot <- unlist(lapply(res, function(x) x$alt_tot), recursive = FALSE) + + return( + list( + tabs = tabs, + hrcs = hrcs, + alt_tot = alt_tot, + vars = c(v1, v2)) + ) +} diff --git a/R/sp_from_4_to_3_case_2_hr.R b/R/sp_from_4_to_3_case_2_hr.R new file mode 100644 index 0000000..0b8ddcf --- /dev/null +++ b/R/sp_from_4_to_3_case_2_hr.R @@ -0,0 +1,154 @@ +#' Transition from 4 to 3 variables via the merging of two hierarchical variables +#' +#' @param dfs data.frame with 4 categorical variables (n >= 2 in the general case) +#' @param dfs_name name of the data.frame in the list provided by the user +#' @param v1 hierarchical categorical variable +#' @param v2 hierarchical categorical variable +#' @param totcode named vector of totals for categorical variables +#' @param hrcfiles named vector indicating the hrc files of hierarchical variables +#' among the categorical variables of dfs +#' @param dir_name folder where to write the hrc files +#' if no folder is specified in hrcfiles +#' @param sep separator used during the concatenation of variables +#' +#' @return A list containing the following components: +#' \itemize{ +#' \item `tabs`: named list of 3-dimensional dataframes +#' (n-1 dimensions in the general case) with nested hierarchies +#' \item `hrcs`: named list of hrc specific to the variable +#' created via the merge +#' \item `alt_tot`: named list of totals +#' \item `vars`: named list of vectors representing the merged variables +#' during the two stages of dimension reduction +#' } +#' +#' @examples +#' library(dplyr) +#' data <- expand.grid( +#' ACT = c("Total", "A", "B", "A1", "A2", "B1", "B2"), +#' SEX = c("Total", "F", "M","F1","F2","M1","M2"), +#' AGE = c("Total", "AGE1", "AGE2", "AGE11", "AGE12", "AGE21", "AGE22"), +#' ECO = c("PIB","Ménages","Entreprises"), +#' stringsAsFactors = FALSE, +#' KEEP.OUT.ATTRS = FALSE +#' ) %>% +#' as.data.frame() +#' +#' data <- data %>% mutate(VALUE = 1:n()) +#' +#' hrc_act <- "hrc_ACT.hrc" +#' sdcHierarchies::hier_create(root = "Total", nodes = c("A","B")) %>% +#' sdcHierarchies::hier_add(root = "A", nodes = c("A1","A2")) %>% +#' sdcHierarchies::hier_convert(as = "argus") %>% +#' slice(-1) %>% +#' mutate(levels = substring(paste0(level,name),3)) %>% +#' select(levels) %>% +#' write.table(file = hrc_act, row.names = FALSE, col.names = FALSE, quote = FALSE) +#' +#' hrc_sex <- "hrc_SEX.hrc" +#' sdcHierarchies::hier_create(root = "Total", nodes = c("F","M")) %>% +#' sdcHierarchies::hier_add(root = "F", nodes = c("F1","F2")) %>% +#' sdcHierarchies::hier_add(root = "M", nodes = c("M1","M2")) %>% +#' sdcHierarchies::hier_convert(as = "argus") %>% +#' slice(-1) %>% +#' mutate(levels = substring(paste0(level,name),3)) %>% +#' select(levels) %>% +#' write.table(file = hrc_sex, row.names = FALSE, col.names = FALSE, quote = FALSE) +#' +#' res <- from_4_to_3_case_2_hr(dfs = data, +#' dfs_name = "dfs_name", +#' v1 = "ACT",v2 = "SEX", +#' totcode = c(ACT = "Total",SEX = "Total", +#' AGE = "Total",ECO = "PIB"), +#' hrcfiles = c(ACT = hrc_act, SEX = hrc_sex), +#' dir_name = "output") +#' @keywords internal +#' @export +from_4_to_3_case_2_hr <- function( + dfs, + dfs_name, + v1, + v2, + totcode, + hrcfiles, + dir_name, + sep = "_"){ + ############################# + ## Creating code_split ## + ############################# + hrc1 <- hrcfiles[[v1]] + total1 <- totcode[[v1]] + + # Creating the table providing the hierarchy levels + res_sdc <- sdcHierarchies::hier_import(inp = hrc1, from = "hrc", root = total1) %>% + sdcHierarchies::hier_convert(as = "sdc") + + codes_split_1 <- lapply( + res_sdc$dims, + names + ) + + ########################### + # Hierarchy Reduction # + ########################### + + # fonc_liste_df_4_var_1_non_hr <- function(codes_split,dfs){ + # lapply( + # codes_split_1, + # function(codes){ + # res <- dfs %>% + # filter(dfs[[v1]] %in% codes) + # } + # ) + # } + + liste_df_4_var_1_hr <- lapply( + codes_split_1, + function(codes){ + res <- dfs %>% + filter(dfs[[v1]] %in% codes) + } + ) + + # We now have data.frames with 1 hierarchical variables (v1) + # therefore we can apply the dedicated method + + # Update arguments then call the function from_4_to_3_case_1_hr + call_4_to_3_1_hr <- function(dfs, i){ + + if (i <= length(codes_split_1)) { + totcode[v1] <- codes_split_1[[i]][1] + dfs_name <- paste(dfs_name, totcode[v1], sep = "_") + + from_4_to_3_case_1_hr(dfs = dfs, + dfs_name = dfs_name, + v1 = v1, + v2 = v2, + totcode = totcode, + hrcfiles = hrcfiles, + dir_name = dir_name, + sep = sep) + } + else { + print(paste("Index", i, "is out of bounds for codes_split.")) + return(NULL) + } + } + + # We transform all our 4-var tables into 3-var tables + res <- lapply(seq_along(liste_df_4_var_1_hr), function(i) { + call_4_to_3_1_hr(liste_df_4_var_1_hr[[i]], i) + }) + + tabs <- unlist(lapply(res, function(x) x$tabs), recursive = FALSE) + hrcs <- unlist(lapply(res, function(x) x$hrcs), recursive = FALSE) + alt_tot <- unlist(lapply(res, function(x) x$alt_tot), recursive = FALSE) + + return( + list( + tabs = tabs, + hrcs = hrcs, + alt_tot = alt_tot, + vars = c(v1, v2)) + ) +} diff --git a/R/sp_from_5_to_3.R b/R/sp_from_5_to_3.R new file mode 100644 index 0000000..137f92b --- /dev/null +++ b/R/sp_from_5_to_3.R @@ -0,0 +1,329 @@ +# Count the number of nodes in a hierarchical file +# Expects 2 arguments: +# - Either a named list and a variable, +# - Or an hrc (hierarchical file) and hrc_name = FALSE +nb_nodes <- function(hrcfiles, v = NULL, hrc_name = TRUE) { + # Check if the variable has an associated hrc file or if hrc_name == FALSE + if (hrc_name && !(v %in% names(hrcfiles)) || (!hrc_name && is.null(hrcfiles))) { + # Non-hierarchical variable or hrcfiles == NULL + return(1) + } + + # Take the specified file if hrc_name = TRUE, otherwise take the hrc directly provided + hrc <- ifelse(hrc_name, hrcfiles[[v]], hrcfiles) + + # Unimportant value for the following steps + total <- "This_Is_My_Total" + + # Convert to hierarchy + res_sdc <- sdcHierarchies::hier_import(inp = hrc, from = "hrc", root = total) %>% + sdcHierarchies::hier_convert(as = "sdc") + + # Return the number of nodes + return(length(res_sdc$dims)) +} + +#' Function reducing from 5 to 3 categorical variables +#' +#' @param dfs data.frame with 5 categorical variables (n >= 3 in the general case) +#' @param dfs_name name of the data.frame in the list provided by the user +#' @param totcode named vector of totals for categorical variables +#' @param hrcfiles named vector indicating the hrc files of hierarchical variables +#' among the categorical variables of dfs +#' @param sep_dir allows forcing the writing of hrc files in a separate folder +#' defaulted to FALSE +#' @param hrc_dir folder where to write the hrc files if forcing the writing +#' in a new folder or if no folder is specified in hrcfiles +#' @param v1 allows forcing the value of the first variable to merge +#' when reducing from 5 to 4 dimensions, not specified by default (NULL) +#' @param v2 allows forcing the value of the second variable to merge +#' when reducing from 5 to 4 dimensions, not specified by default (NULL) +#' @param v3 allows forcing the value of the first variable to merge +#' when reducing from 4 to 3 dimensions, not specified by default (NULL) +#' @param v4 allows forcing the value of the second variable to merge +#' when reducing from 4 to 3 dimensions, not specified by default (NULL) +#' @param sep separator used during concatenation of variables +#' @param maximize_nb_tabs specifies whether to prefer selecting hierarchical variables with +#' the most nodes as a priority (TRUE), which generates more tables +#' but of smaller size, or non-hierarchical variables with the least modality (FALSE) +#' to create fewer tables +#' @param verbose prints the different steps of the function to notify +#' the user of the progress, mainly for the general function gen_tabs_5_4_to_3() +#' +#' @return a list containing the following components: +#' \itemize{ +#' \item `tabs`: named list of dataframes with 3 dimensions +#' (n-2 dimensions in the general case) endowed with nested hierarchies +#' \item `hrcs5_4`: named list of hrc specific to the variable created +#' via the merge when reducing from 5 to 4 dimensions +#' \item `hrcs4_3`: named list of hrc specific to the variable created +#' via the merge when reducing from 4 to 3 dimensions +#' \item `alt_tot5_4`: named list of totals when reducing from 5 to 4 dimensions +#' \item `alt_tot4_3`: named list of totals when reducing from 4 to 3 dimensions +#' \item `vars`: named list of vectors representing the merged variables +#' during the two steps of dimension reduction +#' } +#' +#' @examples +#' library(dplyr) +#' data <- expand.grid( +#' ACT = c("Total", "A", "B", "A1", "A2", "B1", "B2"), +#' GEO = c("Total", "GA", "GB", "GA1", "GA2", "GB1", "GB2"), +#' SEX = c("Total", "F", "M","F1","F2","M1","M2"), +#' AGE = c("Total", "AGE1", "AGE2", "AGE11", "AGE12", "AGE21", "AGE22"), +#' ECO = c("PIB","Ménages","Entreprises"), +#' stringsAsFactors = FALSE, +#' KEEP.OUT.ATTRS = FALSE +#' ) %>% +#' as.data.frame() +#' +#' data <- data %>% mutate(VALUE = 1:n()) +#' +#' hrc_act <- "hrc_ACT.hrc" +#' sdcHierarchies::hier_create(root = "Total", nodes = c("A","B")) %>% +#' sdcHierarchies::hier_add(root = "A", nodes = c("A1","A2")) %>% +#' sdcHierarchies::hier_convert(as = "argus") %>% +#' slice(-1) %>% +#' mutate(levels = substring(paste0(level,name),3)) %>% +#' select(levels) %>% +#' write.table(file = hrc_act, row.names = FALSE, col.names = FALSE, quote = FALSE) +#' +#' hrc_geo <- "hrc_GEO.hrc" +#' sdcHierarchies::hier_create(root = "Total", nodes = c("GA","GB")) %>% +#' sdcHierarchies::hier_add(root = "GA", nodes = c("GA1","GA2")) %>% +#' sdcHierarchies::hier_add(root = "GB", nodes = c("GB1","GB2")) %>% +#' sdcHierarchies::hier_convert(as = "argus") %>% +#' slice(-1) %>% +#' mutate(levels = substring(paste0(level,name),3)) %>% +#' select(levels) %>% +#' write.table(file = hrc_geo, row.names = FALSE, col.names = FALSE, quote = FALSE) +#' +#' hrc_sex <- "hrc_SEX.hrc" +#' sdcHierarchies::hier_create(root = "Total", nodes = c("F","M")) %>% +#' sdcHierarchies::hier_add(root = "F", nodes = c("F1","F2")) %>% +#' sdcHierarchies::hier_add(root = "M", nodes = c("M1","M2")) %>% +#' sdcHierarchies::hier_convert(as = "argus") %>% +#' slice(-1) %>% +#' mutate(levels = substring(paste0(level,name),3)) %>% +#' select(levels) %>% +#' write.table(file = hrc_sex, row.names = FALSE, col.names = FALSE, quote = FALSE) +#' +#' # Results of the function +#' res1 <- from_5_to_3( +#' dfs = data, +#' dfs_name = "tab", +#' totcode = c(SEX="Total",AGE="Total", GEO="Total", ACT="Total", ECO = "PIB"), +#' hrcfiles = c(ACT = hrc_act, GEO = hrc_geo, SEX = hrc_sex), +#' sep_dir = TRUE, +#' hrc_dir = "output", +#' v1 = "ACT", +#' v2 = "AGE", +#' v3 = "SEX", +#' v4 = "ECO" +#' ) +#' +#' res2 <- from_5_to_3( +#' dfs = data, +#' dfs_name = "tab", +#' totcode = c(SEX="Total",AGE="Total", GEO="Total", ACT="Total", ECO = "PIB"), +#' hrcfiles = c(ACT = hrc_act, GEO = hrc_geo, SEX = hrc_sex), +#' sep_dir = TRUE, +#' hrc_dir = "output", +#' verbose = TRUE +#' ) +#' @keywords internal +#' @export +from_5_to_3 <- function( + dfs, + dfs_name, + totcode, + hrcfiles = NULL, + sep_dir = FALSE, + hrc_dir = "hrc_alt", + v1 = NULL, + v2 = NULL, + v3 = NULL, + v4 = NULL, + sep = "_", + maximize_nb_tabs = FALSE, + verbose = FALSE) +{ + # Update the output folder containing the hierarchies + if( (length(hrcfiles) != 0) & !sep_dir){ + dir_name <- dirname(hrcfiles[[1]]) + } else { + dir_name <- hrc_dir + } + + # We remove a dimension from our starting dataframe + res_5_4 <- from_4_to_3(dfs = dfs, + dfs_name = dfs_name, + totcode = totcode, + hrcfiles = hrcfiles, + sep_dir = TRUE, + hrc_dir = dir_name, + v1 = v1, + v2 = v2, + sep = sep, + maximize_nb_tabs = maximize_nb_tabs) + if (verbose){ + cat(paste(dfs_name,"has generated",length(res_5_4$tabs),"tables in total\n")) + cat("Reducing from 4 to 3...\n") + } + + # Retrieving the merged variables + v1f <- res_5_4$vars[[1]] + v2f <- res_5_4$vars[[2]] + new_var = paste(v1f, v2f, sep=sep) + + # Updating the totals + totcode2 <- totcode + totcode2 <- totcode2[!(names(totcode2) %in% c(v1f, v2f))] + # totcode2[[new_var]] <- 1 + + # Updating hrc files + hrcfiles2 <- hrcfiles + hrcfiles2 <- hrcfiles2[!(names(hrcfiles2) %in% c(v1f, v2f))] + + # Categorical variables without hierarchy in our 4D tables + cat_vars <- c(names(totcode2),new_var) + + non_hier_vars <- intersect( + setdiff(names(dfs), names(hrcfiles2)), + cat_vars + ) + + # Choice of variables for the 4 -> 3 transition and verification of those provided in argument + # We now choose v3 and v4 to be sure that the same variable + # is created within all the sub-tables + + # First variable for the 4 to 3 transition + if (!is.null(v3)){ + if (!(v3 %in% cat_vars)){ + stop(paste("v3 is not a categorical variable, v3 = ", v3, + "The categorical variables are: ",paste(cat_vars, collapse = ", ")), sep = "") + } + } else { + # we choose a variable avoiding v4 + v3 <- chose_var_to_merge(dfs = dfs[setdiff(names(dfs),v4)], + totcode = totcode2[setdiff(names(totcode2),v4)], + hrcfiles = hrcfiles2[setdiff(names(hrcfiles2),v4)], + maximize_nb_tabs = maximize_nb_tabs) + + if (!is.null(v4)){ + # We need to do two different if statements otherwise NULL != new_var crashes! + if (v4 != new_var & maximize_nb_tabs == TRUE){ + v3 <- new_var + } + # If v4 = NULL no need to compare v4 != new_var + } else if (maximize_nb_tabs == TRUE){ + v3 <- new_var + } + } + + # Second variable for the 4 to 3 transition + if (!is.null(v4)){ + if (!(v4 %in% cat_vars)){ + stop(paste("v4 is not a categorical variable, v4 = ", v4, + "The categorical variables are: ",paste(cat_vars, collapse = ", ")), sep = "") + } + if (v3 == v4){ + stop("Error. You are trying to merge a variable with itself") + } + + } else { + # we choose a variable avoiding v3 + v4 <- chose_var_to_merge(dfs = dfs[setdiff(names(dfs),v3)], + totcode = totcode2[setdiff(names(totcode2),v3)], + hrcfiles = hrcfiles2[setdiff(names(hrcfiles2),v3)], + maximize_nb_tabs = maximize_nb_tabs) + + # Rq : v3 can not be NULL + if (v3 != new_var & maximize_nb_tabs == TRUE){ + v4 <- new_var + } + } + + appel_4_3_gen <- function(nom_dfsb){ + # Update the arguments of the function + dfsb <- res_5_4$tabs[[nom_dfsb]] + + hrcfiles2b <- c(hrcfiles2, res_5_4$hrcs[[nom_dfsb]]) + names(hrcfiles2b)[length(hrcfiles2b)] <- new_var + + totcode2[[new_var]] <- res_5_4$alt_tot[[nom_dfsb]] + + from_4_to_3(dfs = dfsb, + dfs_name = nom_dfsb, + totcode = totcode2, + hrcfiles = hrcfiles2b, + sep_dir = TRUE, + hrc_dir = dir_name, + v1 = v3, + v2 = v4, + sep = sep) + } + + # Transform all our 4-var tables into 3-var tables + res_5_3 <- lapply( + names(res_5_4$tabs), + appel_4_3_gen + ) + + tabs <- unlist(lapply(res_5_3, function(x) x$tabs), recursive = FALSE) + hrcs4_3 <- unlist(lapply(res_5_3, function(x) x$hrcs), recursive = FALSE) + alt_tot4_3 <- unlist(lapply(res_5_3, function(x) x$alt_tot), recursive = FALSE) + + vars1 <- res_5_4$vars + vars2 <- res_5_3[[1]]$vars # merged variables are always the same + vars_tot <- list(vars1,vars2) + names(vars_tot) <- c("five_to_three","four_to_three") + + # Memorization of res5_4 + + # Case we merge 4 different variables + if (!(new_var %in% c(v3,v4))){ + # We repeat as many times res5_4[i] as the table will create + # 3-dimensional tables + + # Each 4-dimensional table will create the same number of 3-dimensional tables + # because the selected variables have the same modes in each of them + nb_rep <- length(tabs) / length(res_5_4$tabs) + hrcs5_4 <- as.list(unlist(lapply(res_5_4$hrcs, + function(x) rep(x,nb_rep)))) + + alt_tot5_4 <- as.list(unlist(lapply(res_5_4$alt_tot, + function(x) rep(x,nb_rep)))) + + # If we merge 3 variables into one, the number of tables + # created by each table changes! + } else { + # Store the name of the variable that is not new_var in a new object + non_fused_var <- ifelse(v3 == new_var, v4, v3) + + # Calculate the value of nb_nodes once for each res_5_4$hrcs[[x]] + # to avoid calculating the same quantity twice + results <- lapply(1:length(res_5_4$hrcs), function(x) { + nb_node_value <- 2 * nb_nodes(res_5_4$hrcs[[x]], hrc_name = FALSE) * + nb_nodes(hrcfiles2, non_fused_var) + + # Use the calculated value for hrcs5_4 and alt_tot5_4 + list( + hrcs = rep(res_5_4$hrcs[[x]], nb_node_value), + alt_tot = rep(res_5_4$alt_tot[[x]], nb_node_value) + ) + }) + + # Extract the values for hrcs5_4 and alt_tot5_4 + hrcs5_4 <- as.list(unlist(lapply(results, function(x) x$hrcs))) + alt_tot5_4 <- as.list(unlist(lapply(results, function(x) x$alt_tot))) + } + + return(list(tabs = tabs, + hrcs5_4 = hrcs5_4, + hrcs4_3 = hrcs4_3, + alt_tot5_4 = alt_tot5_4, + alt_tot4_3 = alt_tot4_3, + vars = vars_tot) + ) +} diff --git a/R/sp_reduce_dims.R b/R/sp_reduce_dims.R new file mode 100644 index 0000000..44e8f72 --- /dev/null +++ b/R/sp_reduce_dims.R @@ -0,0 +1,921 @@ +#' General function that selects the appropriate separator and applies dimension reduction. +#' +#' @param dfs data.frame with 4 or 5 categorical variables +#' @param dfs_name name of the data.frame in the list provided by the user +#' @param totcode named vector of totals for categorical variables +#' @param hrcfiles named vector indicating the hrc files of hierarchical variables +#' among the categorical variables of dfs +#' @param sep_dir allows forcing the writing of hrc into a separate folder, +#' default is FALSE +#' @param hrc_dir folder to write hrc files if writing to a new folder is forced +#' or if no folder is specified in hrcfiles +#' @param vars_to_merge NULL or vector of variables to be merged: +#' 2 in dimension 4; 3 or 4 in dimension 5 +#' @param nb_tab_option strategy to follow for choosing variables automatically: +#' \itemize{ +#' \item `'min'`: minimize the number of tables; +#' \item `'max'`: maximize the number of tables; +#' \item `'smart'`: minimize the number of tables under the constraint +#' of their row count. +#' } +#' @param limit maximum allowed number of rows in the smart or over_split = TRUE case +#' @param over_split indicates if we split in several tables the tables bigger than +#' limit at the end of the reduction process ; it decreases the number +#' of hierarchy of these tables +#' @param vec_sep vector of candidate separators to use +#' @param verbose print the different steps of the function to inform the user +#' of progress +#' +#' @return A list containing: +#' \itemize{ +#' \item `tabs`: named list of 3-dimensional dataframes +#' with nested hierarchies +#' \item `alt_hrc`: named list of hrc specific to the variables created +#' during merging to go to dimension 3 +#' \item `alt_totcode`: named list of totals specific to the variables +#' created during merging to go to dimension 3 +#' \item `vars`: categorical variables of the output dataframes +#' \item `sep`: separator used to link the variables +#' \item `totcode`: named vector of totals for all categorical variables +#' \item `hrcfiles`: named vector of hrc for categorical variables +#' (except the merged one) +#' \item `fus_vars`: named vector of vectors representing the merged +#' variables during dimension reduction +#' } +#' +#' @importFrom sdcHierarchies hier_import hier_convert +#' @importFrom stringr str_detect +#' @importFrom dplyr select mutate filter +#' +#' @examples +#' library(dplyr) +#' # Examples for dimension 4 +#' +#' data <- expand.grid( +#' ACT = c("Total", "A", "B", "A1", "A2","A3", "B1", +#' "B2","B3","B4","C","D","E","F","G","B5"), +#' GEO = c("Total", "G1", "G2"), +#' SEX = c("Total", "F", "M"), +#' AGE = c("Total", "AGE1", "AGE2"), +#' stringsAsFactors = FALSE +#' ) %>% +#' as.data.frame() %>% +#' mutate(VALUE = 1) +#' +#' if(!dir.exists("hrc")) dir.create("hrc") +#' hrc_act <- "hrc/hrc_ACT4.hrc" +#' +#' sdcHierarchies::hier_create( +#' root = "Total", +#' nodes = c("A","B","C","D","E","F","G") +#' ) %>% +#' sdcHierarchies::hier_add(root = "A", nodes = c("A1","A2","A3")) %>% +#' sdcHierarchies::hier_add(root = "B", nodes = c("B1","B2","B3","B4","B5")) %>% +#' sdcHierarchies::hier_convert(as = "argus") %>% +#' slice(-1) %>% +#' mutate(levels = substring(paste0(level,name),3)) %>% +#' select(levels) %>% +#' write.table( +#' file = hrc_act, row.names = FALSE, col.names = FALSE, quote = FALSE +#' ) +#' +#' # Reduce dim by forcing variables to be merged +#' res1 <- reduce_dims( +#' dfs = data, +#' dfs_name = "tab", +#' totcode = c(SEX = "Total", AGE = "Total", GEO = "Total", ACT = "Total"), +#' hrcfiles = c(ACT = hrc_act), +#' sep_dir = TRUE, +#' vars_to_merge = c("ACT", "GEO"), +#' hrc_dir = "output", +#' verbose = TRUE +#' ) +#' +#' # Split the output in order to be under the limit & forcing variables to be merged +#' res1b <- reduce_dims( +#' dfs = data, +#' dfs_name = "tab", +#' totcode = c(SEX = "Total", AGE = "Total", GEO = "Total", ACT = "Total"), +#' hrcfiles = c(ACT = hrc_act), +#' sep_dir = TRUE, +#' hrc_dir = "output", +#' nb_tab_option = 'smart', +#' over_split = TRUE, +#' verbose = TRUE, +#' limit = 100 +#' ) +#' +#' # Result of the function (minimizes the number of created tables by default) +#' res2 <- reduce_dims( +#' dfs = data, +#' dfs_name = "tab", +#' totcode = c(SEX = "Total", AGE = "Total", GEO = "Total", ACT = "Total"), +#' hrcfiles = c(ACT = hrc_act), +#' sep_dir = TRUE, +#' hrc_dir = "output", +#' verbose = TRUE +#' ) +#' +#' # Result of the function (maximize the number of created tables) +#' res3 <- reduce_dims( +#' dfs = data, +#' dfs_name = "tab", +#' totcode = c(SEX = "Total", AGE = "Total", GEO = "Total", ACT = "Total"), +#' hrcfiles = c(ACT = hrc_act), +#' sep_dir = TRUE, +#' hrc_dir = "output", +#' nb_tab_option = "max", +#' verbose = TRUE +#' ) +#' +#' # Example for dimension 5 +#' +#' data <- expand.grid( +#' ACT = c("Total_A", paste0("A", seq(1,5),"_"),paste0("A1_", seq(1,7)),paste0("A2_", seq(1,9))), +#' GEO = c("Total_G", "GA", "GB", "GA1", "GA2", "GB1", "GB2","GA3","GB3","GB4"), +#' SEX = c("Total_S", "F", "M","F1","F2","M1","M2"), +#' AGE = c("Ensemble", "AGE1", "AGE2", "AGE11", "AGE12", "AGE21", "AGE22"), +#' ECO = c("PIB","Ménages","Entreprises"), +#' stringsAsFactors = FALSE, +#' KEEP.OUT.ATTRS = FALSE +#' ) %>% +#' as.data.frame() %>% +#' mutate(VALUE = 1:n()) +#' +#' hrc_act <- "hrc/hrc_ACT5.hrc" +#' sdcHierarchies::hier_create(root = "Total_A", nodes = paste0("A", seq(1,5),"_")) %>% +#' sdcHierarchies::hier_add(root = "A1_", nodes = paste0("A1_", seq(1,7))) %>% +#' sdcHierarchies::hier_add(root = "A2_", nodes = paste0("A2_", seq(1,9))) %>% +#' sdcHierarchies::hier_convert(as = "argus") %>% +#' slice(-1) %>% +#' mutate(levels = substring(paste0(level,name),3)) %>% +#' select(levels) %>% +#' write.table(file = hrc_act, row.names = FALSE, col.names = FALSE, quote = FALSE) +#' +#' hrc_age <- "hrc/hrc_AGE5.hrc" +#' sdcHierarchies::hier_create(root = "Ensemble", nodes = c("AGE1", "AGE2")) %>% +#' sdcHierarchies::hier_add(root = "AGE1", nodes = c("AGE11", "AGE12")) %>% +#' sdcHierarchies::hier_add(root = "AGE2", nodes = c("AGE21", "AGE22")) %>% +#' sdcHierarchies::hier_convert(as = "argus") %>% +#' slice(-1) %>% +#' mutate(levels = substring(paste0(level,name),3)) %>% +#' select(levels) %>% +#' write.table(file = hrc_age, row.names = FALSE, col.names = FALSE, quote = FALSE) +#' +#' hrc_geo <- "hrc/hrc_GEO5.hrc" +#' sdcHierarchies::hier_create(root = "Total_G", nodes = c("GA","GB")) %>% +#' sdcHierarchies::hier_add(root = "GA", nodes = c("GA1","GA2","GA3")) %>% +#' sdcHierarchies::hier_add(root = "GB", nodes = c("GB1","GB2","GB3","GB4")) %>% +#' sdcHierarchies::hier_convert(as = "argus") %>% +#' slice(-1) %>% +#' mutate(levels = substring(paste0(level,name),3)) %>% +#' select(levels) %>% +#' write.table(file = hrc_geo, row.names = FALSE, col.names = FALSE, quote = FALSE) +#' +#' # Results of the function +#' res4 <- reduce_dims( +#' dfs = data, +#' dfs_name = "tab", +#' totcode = c(SEX = "Total_S", AGE = "Ensemble", GEO = "Total_G", ACT = "Total_A", ECO = "PIB"), +#' hrcfiles = c(ACT = hrc_act, GEO = hrc_geo, AGE = hrc_age), +#' sep_dir = TRUE, +#' hrc_dir = "output", +#' verbose = TRUE +#' ) +#' +#' res5 <- reduce_dims( +#' dfs = data, +#' dfs_name = "tab", +#' totcode = c(SEX = "Total_S", AGE = "Ensemble", GEO = "Total_G", ACT = "Total_A", ECO = "PIB"), +#' hrcfiles = c(ACT = hrc_act, GEO = hrc_geo), +#' sep_dir = TRUE, +#' hrc_dir = "output", +#' nb_tab_option = 'smart', +#' limit = 1300, +#' verbose = TRUE +#' ) +#' +#' res6 <- reduce_dims( +#' dfs = data, +#' dfs_name = "tab", +#' totcode = c(SEX = "Total_S", AGE = "Ensemble", GEO = "Total_G", ACT = "Total_A", ECO = "PIB"), +#' hrcfiles = c(ACT = hrc_act, GEO = hrc_geo), +#' sep_dir = TRUE, +#' hrc_dir = "output", +#' nb_tab_option = 'min', +#' verbose = TRUE, +#' limit = 4470, +#' over_split = TRUE +#' ) +#' @keywords internal +#' @export +reduce_dims <- function( + dfs, + dfs_name, + totcode, + hrcfiles = NULL, + sep_dir = FALSE, + hrc_dir = "hrc_alt", + vars_to_merge = NULL, + nb_tab_option = "min", + limit = NULL, + over_split = FALSE, + vec_sep = c("___","_XXX_","_YYY_", "_TTT_", "_UVW_"), + verbose = FALSE +){ + + # TODO OR NOT: + # to save time: parallelize the lapply for variable selection + # lapply for reducing from 4 to 3 dimensions + # in the case of dimension 5 + + dfs <- as.data.frame(dfs) + + + # Check if dfs_name is a character string + if (!is.character(dfs_name)){ + stop("dfs_name must be a character string.") + } + + # Check if all modalities of totcode are present in dfs + if (any(!names(totcode) %in% names(dfs))){ + stop("At least one modality in totcode is not present in dfs!") + } + + # Check if the number of dimensions in totcode is either 4 or 5 + if (!(length(totcode) %in% c(4,5))){ + stop("Please provide a dataframe with 4 or 5 categorical variables!") + } + + # Check if the number of variables to merge is valid for 4-dimensional data + if (length(totcode) == 4 & !length(vars_to_merge) %in% c(0,2)){ + stop("For 4-dimensional data, please specify 2 variables or leave vars_to_merge as NULL!") + } + + # Check if the number of variables to merge is valid for 5-dimensional data + if (length(totcode) == 5 & !length(vars_to_merge) %in% c(0,3,4)){ + stop("For 5-dimensional data, please specify 2 or 3 variables or leave vars_to_merge as NULL!") + } + + # Check if all modalities of hrcfiles are present in dfs + if (any(!names(hrcfiles) %in% names(dfs))){ + stop("At least one modality in hrcfiles is not present in dfs!") + } + + # Check if sep_dir is a logical value + if (!is.logical(sep_dir)){ + stop("sep_dir must be a logical value.") + } + + # Check if hrc_dir is a character string + if (!is.character(hrc_dir)){ + stop("hrc_dir must be a character string.") + } + + # Check if nb_tab_option is one of the valid options + if (!nb_tab_option %in% c('min', 'max', 'smart')){ + stop("nb_tab_option must be 'min', 'max', or 'smart'!") + } + + # If vars_to_merge is specified, check if all variables are present in totcode + if (!is.null(vars_to_merge)){ + if (any(!vars_to_merge %in% names(totcode))){ + stop("vars_to_merge contains at least one variable that is not in totcode!") + } + } + + # Check if verbose is a logical value + if (!is.logical(verbose)){ + stop("verbose must be a logical value.") + } + + # Check if verbose is a logical value + if (!is.logical(over_split)){ + stop("over_split must be a logical value.") + } + + # limit is not used if the user does not use over_split or nb_tab_option + # we consider it to be an error if the users specifies it + if (over_split | nb_tab_option == "smart"){ + if (is.null(limit)){ + stop("You must specify a limit (number) if you use over_split = TRUE or nb_tab_option = \"smart\"") + } + + # Convert limit to numeric + limit <- as.numeric(limit) + + } else { + if (!is.null(limit)){ + stop("You must not specify a limit (number) if you do not use over_split = TRUE or nb_tab_option = \"smart\"") + } + } + + + + # Choose the separator + data_var_cat <- dfs[names(dfs) %in% names(totcode)] + sep <- chose_sep(data_var_cat, vec_sep) + + if (length(totcode) == 5) { + # If the user specified the variables to merge + if (length(vars_to_merge) == 3) { + v1 <- vars_to_merge[[1]] + v2 <- vars_to_merge[[2]] + v3 <- vars_to_merge[[3]] + v4 <- paste(v1, v2, sep = sep) + + } else if (length(vars_to_merge) == 4) { + v1 <- vars_to_merge[[1]] + v2 <- vars_to_merge[[2]] + v3 <- vars_to_merge[[3]] + v4 <- vars_to_merge[[4]] + + } else { + # If the user did not specify the variables to merge, we need to calculate them + + if (nb_tab_option == 'smart') { + + if (verbose) { + cat("Choosing variables...\n") + } + + # Propose combinations of variables to merge + choice_3_var <- var_to_merge(dfs = dfs, + totcode = totcode, + hrcfiles = hrcfiles, + nb_var = 3, + limit = limit, + nb_tab_option = nb_tab_option) + + choice_4_var <- var_to_merge(dfs = dfs, + totcode = totcode, + hrcfiles = hrcfiles, + nb_var = 4, + limit = limit, + nb_tab_option = nb_tab_option) + + # Choose the best combination + # The less nb of tab is the row limit is respected + # or the less nb or row if the limit cannot be respected + if ( + (choice_3_var$nb_tab < choice_4_var$nb_tab & + max(choice_4_var$max_row,choice_3_var$max_row) < limit) | + + (choice_3_var$max_row < choice_4_var$max_row & + choice_4_var$max_row > limit) + ) + { + + v1 <- choice_3_var$vars[[1]] + v2 <- choice_3_var$vars[[2]] + v3 <- choice_3_var$vars[[3]] + v4 <- paste(v1, v2, sep = sep) + + if (choice_3_var$max_row > limit){ + cat(c("Warning when choosing variables: +The limit of ",limit," cannot be achieved. +The largest table has ",choice_3_var$max_row," rows.\n")) + } + + } else { + v1 <- choice_4_var$vars[[1]] + v2 <- choice_4_var$vars[[2]] + v3 <- choice_4_var$vars[[3]] + v4 <- choice_4_var$vars[[4]] + + if (choice_3_var$max_row > limit){ + cat(c("Warning when choosing variables: +The limit of ",limit," cannot be achieved. +The largest table has ",choice_3_var$max_row," rows.\n")) + } + } + + # Return to the primitive implementation to minimize or maximize + # the number of tables since the old implementation is not bad and is + # faster than calculating the size and number of generated tables + } else { + v1 <- NULL + v2 <- NULL + v3 <- NULL + v4 <- NULL + maximize_nb_tabs <- if (nb_tab_option == 'max') TRUE else FALSE + } + } + + if (verbose) { + cat(" +Reducing from 5 to 4...\n") + } + + res <- from_5_to_3(dfs = dfs, + dfs_name = dfs_name, + totcode = totcode, + hrcfiles = hrcfiles, + sep_dir = sep_dir, + hrc_dir = hrc_dir, + v1 = v1, v2 = v2, + v3 = v3, v4 = v4, + sep = sep, + maximize_nb_tabs = maximize_nb_tabs, + verbose = verbose) + + } else if (length(totcode) == 4) { + + # If the user specified the variables to merge + if (length(vars_to_merge) == 2) { + v1 <- vars_to_merge[[1]] + v2 <- vars_to_merge[[2]] + + } else { + # If the user did not specify the variables to merge, we need to calculate them + + if (nb_tab_option == 'smart') { + + if (verbose) { + cat("Choosing variables...\n") + } + + + choice_2_var <- var_to_merge(dfs = dfs, + totcode = totcode, + hrcfiles = hrcfiles, + nb_var = 2, + limit = limit, + nb_tab_option = nb_tab_option) + v1 <- choice_2_var$vars[[1]] + v2 <- choice_2_var$vars[[2]] + + if (choice_2_var$max_row > limit){ + cat(c("Warning when choosing variables: +The limit of ",limit," cannot be achieved. +The largest table has ",choice_2_var$max_row," rows.\n")) + } + + # Return to the primitive implementation to minimize or maximize + # the number of tables since the old implementation is not bad and is + # faster than calculating the size and number of generated tables + } else { + v1 <- NULL + v2 <- NULL + maximize_nb_tabs <- if (nb_tab_option == 'max') TRUE else FALSE + } + } + + if (verbose) { + cat(" +Reducing from 4 to 3...\n") + } + + res <- from_4_to_3(dfs = dfs, + dfs_name = dfs_name, + totcode = totcode, + hrcfiles = hrcfiles, + sep_dir = sep_dir, + hrc_dir = hrc_dir, + v1 = v1, v2 = v2, + sep = sep, + maximize_nb_tabs = maximize_nb_tabs) + } + + if (verbose) { + cat(paste(dfs_name,"has generated",length(res$tabs),"tables in total\n\n")) + } + + # Put a format usable by rtauargus + res <- sp_format(res = res, + dfs_name = dfs_name, + sep = sep, + totcode = totcode, + hrcfiles = hrcfiles) + + # Split too big table + if (over_split) { + + if (verbose) { + cat("Spliting...\n") + } + + # Collect of created vars + if (length(totcode) == 4){ + liste_var_fus <- paste(res$fus_vars[1], + res$fus_vars[2], + sep = res$sep) + } else { + v1 <- res$fus_vars[[1]][1] + v2 <- res$fus_vars[[1]][2] + + v1_v2 <- paste(v1,v2, sep = res$sep) + + v3 <- res$fus_vars[[2]][1] + v4 <- res$fus_vars[[2]][2] + + # 3 variables merged together + if (v1_v2 %in% c(v3,v4)){ + liste_var_fus <- list(paste(v3,v4, sep = res$sep)) + + # 2 couples created + } else { + liste_var_fus <- list(v1_v2, + paste(v3,v4, sep = res$sep)) + } + } + + for (var_fus in liste_var_fus){ + + if (verbose) { + cat(paste("",var_fus,"\n")) + } + + res <- split_tab(res = res, + limit = limit, + var_fus = var_fus) + } + + if (verbose) { + cat(paste(dfs_name,"has generated",length(res$tabs),"tables in total\n\n")) + } + + # The user specified a limit (smart or over_split case) + if (!is.null(limit)){ + max_row <- max(sapply(res$tabs, nrow)) + + if (max_row > limit){ + cat(c("Warning after splitting : +The limit of ",limit," cannot be achieved. +The largest table has ",max_row," rows.\n\n")) + } + } + } + + return(res) +} + +# split tables according to var_fuse if the nb of row exceed limit +# it creates smaller tabs with a hier variable less +#' @importFrom stats setNames +split_tab <- function(res, var_fus, limit) { + # table to split because they are too big + + res$to_split <- sapply(res$tabs, function(x) nrow(x) > limit) + table_to_split <-names(res$to_split[res$to_split == TRUE]) + + # data to stock + + all_tot_stock <- list() + tabs2 <- list() + list_vars <- list() + list_alt_hrcs <- list() + + # loop for table to treat + + for (t in table_to_split) { + + # Create of how to split + + hrc <- res$alt_hrc[[t]][[var_fus]] + total <- res$alt_totcode[[t]][[var_fus]] + other_total <-res$alt_totcode[[t]][names(res$alt_totcode[[t]]) != (var_fus)] + + res_sdc <-sdcHierarchies::hier_import(inp = hrc, from = "hrc",root = total) %>% + sdcHierarchies::hier_convert(as = "sdc") + + codes_split <- lapply(res_sdc$dims,names) + n <- length(codes_split) + + # Names use for tauargus + new_names <- lapply(1:n, function(i) paste(t, i, sep = "_")) + + # Create tabs by filtering + tabs <- lapply(codes_split, + function(codes) { + res <- res$tabs[[t]] %>% + filter(res$tabs[[t]][[var_fus]] %in% codes) + }) + + names(tabs) <- new_names + tabs2 <- append(tabs2, tabs) + + # alt_totcode for tauargus + + liste_alt_tot <- setNames(lapply(1:n, function(i) { + totali <- c(codes_split[[i]][1]) + totali <- setNames(list(totali), var_fus) + totali <- c(totali, other_total) + return(totali) }), new_names) + all_tot_stock <- append(all_tot_stock, liste_alt_tot) + + # list of variables for the created tables + + var <- replicate(n, list(res$vars[[1]])) + list_add <- replicate(n, list(res$vars[[1]])) + names(list_add) <- new_names + list_vars <- append(list_vars, list_add) + + # remove hierarchies from the variable we split and naming it + + res$alt_hrc[[t]][[var_fus]] <- NULL + + if (length(res$alt_hrc[[t]]) != 0) { + + hrc_e <- list(res$alt_hrc[[t]]) + names(hrc_e) <- names(res$alt_hrc[[t]]) + + alt_hrcs <- replicate(n, hrc_e) + names(alt_hrcs) <- new_names + + list_alt_hrcs <- append(list_alt_hrcs, alt_hrcs) + } + } + + # adding the names tables we created to the already existing tables + + table <- names(res$tabs[!(names(res$tabs) %in% table_to_split)]) + tabs_tot <- append(res$tabs[table], tabs2) + alt_totcode <- append(res$alt_totcode[table],all_tot_stock) + vars <- append(res$vars[table], list_vars) + hrcs <- append( res$alt_hrc[table],list_alt_hrcs) + if (length(hrcs) == 0) { hrcs <- NULL } + + + res = list( + tabs = tabs_tot, + vars = vars, + sep = res$sep, + alt_hrc = hrcs, + totcode = res$totcode, + alt_totcode = alt_totcode, + hrc = res$hrc, + fus_vars = res$fus_vars + ) + return(res) +} + +chose_sep <- function( + data, + liste_sep) +{ + + liste_var <- names(data) + liste_mod <- unique(unlist(lapply(data, unique))) + liste_mod <- c(liste_mod, liste_var) + n_sep <- length(liste_sep) + + i = 0 + is_in_mod = TRUE + while (i <= n_sep & is_in_mod) { + i <- i + 1 + sep <- liste_sep[i] + is_in_mod = sum(unlist(lapply(liste_mod, function(x) stringr::str_detect(x, sep)))) > 0 + } + + # We have a working separator! + if (i <= n_sep) { + # Remove the "\" in front of the separator + #sep <- stringr::str_sub(liste_sep[i], start = 2) + sep <- liste_sep[i] + + # Return the concatenated separator thrice + return(paste0(sep, + collapse = "")) + } else { + # Return a default separator (four underscores) + return(paste(rep("_AZERTY_", 2), + collapse = "")) + } +} + +#' Change the result of dimension reduction to be directly usable +#' in rtauargus +#' +#' @param res result of variable merging composed of name_non_changed_vars, a list of lists of tables, +#' a list of hierarchical files, a list of subtotals associated with these files, +#' and a list of vectors of variables or a vector of variables depending on the base size +#' of the dataframes +#' @param dfs_name the name of the entered dataframes +#' @param sep character +#' @param totcode character named vector +#' @param hrcfiles character named vector +#' +#' @return A list containing: +#' \itemize{ +#' \item `tabs`: named list of 3-dimensional dataframes +#' with nested hierarchies +#' \item `alt_hrc`: named list of hrc specific to the variables +#' created during merging to go to dimension 3 +#' \item `alt_totcode`: named list of totals specific to the variables +#' created during merging to go to dimension 3 +#' \item `vars`: categorical variables of the output dataframes +#' \item `sep`: separator used to link the variables +#' \item `totcode`: named vector of totals for all categorical variables +#' \item `hrcfiles`: named vector of hrc for categorical variables +#' (except the merged one) +#' \item `fus_vars`: named vector of vectors representing the merged +#' variables during dimension reduction +#' } +#' @importFrom stats setNames +#' @examples +#' library(dplyr) +#' data <- expand.grid( +#' ACT = c("Total", "A", "B", "A1", "A2", "B1", "B2"), +#' GEO = c("Total", "G1", "G2"), +#' SEX = c("Total", "F", "M"), +#' AGE = c("Total", "AGE1", "AGE2"), +#' stringsAsFactors = FALSE +#' ) %>% +#' as.data.frame() +#' +#' data <- data %>% mutate(VALUE = 1) +#' +#' hrc_act <- "hrc_ACT.hrc" +#' +#' sdcHierarchies::hier_create(root = "Total", nodes = c("A","B")) %>% +#' sdcHierarchies::hier_add(root = "A", nodes = c("A1","A2")) %>% +#' sdcHierarchies::hier_add(root = "B", nodes = c("B1","B2")) %>% +#' sdcHierarchies::hier_convert(as = "argus") %>% +#' slice(-1) %>% +#' mutate(levels = substring(paste0(level,name),3)) %>% +#' select(levels) %>% +#' write.table(file = hrc_act, row.names = FALSE, col.names = FALSE, quote = FALSE) +#' +#' # Results of the function +#' res1 <- from_4_to_3( +#' dfs = data, +#' dfs_name = "tab", +#' totcode = c(SEX = "Total", AGE = "Total", GEO = "Total", ACT = "Total"), +#' hrcfiles = c(ACT = hrc_act), +#' sep_dir = TRUE, +#' hrc_dir = "output" +#' ) +#' +#' res <- sp_format(res1, +#' dfs_name = "tab", +#' sep = "_", +#' totcode = c(SEX="Total",AGE="Total", +#' GEO="Total", ACT="Total"), +#' hrcfiles = c(ACT = hrc_act) +#' ) +#' @keywords internal +#' @export +sp_format <- function( + res, + dfs_name, + sep, + totcode, + hrcfiles) +{ + if (is.character(res$vars[1])) { + return(format4(res, dfs_name, sep, totcode, hrcfiles)) + } + if (is.list(res$vars)) { + return(format5(res, dfs_name, sep, totcode, hrcfiles)) + } +} + +# Format for tables with 4 variables +format4 <- function(res, dfs_name, sep, totcode, hrcfiles) { + # Data + + v1 <- res$vars[1] + v2 <- res$vars[2] + tabs <- res$tabs + n <- length(tabs) + var_cross <- paste(v1, v2, sep = sep) + + if (v1 %in% names(totcode)) { + tot1 <- totcode[[v1]] + } else + tot1 <- paste(res$fus_vars[1], res$fus_vars[2], sep = sep) + if (v2 %in% names(totcode)) { + tot2 <- totcode[[v2]] + } else + tot2 <- paste(res$fus_vars[1], res$fus_vars[2], sep = sep) + + tot_cross <- paste(tot1, tot2, sep = sep) + + name_non_changed_vars <- intersect(names(res$tabs[[1]]), names(totcode)) + old_totcode <- totcode[names(totcode) %in% name_non_changed_vars] + names(tot_cross) <- var_cross + totcode_2 <- c(old_totcode, tot_cross) + + v <- c(name_non_changed_vars, var_cross) + list_vars <- replicate(n, v, simplify = FALSE) + names(list_vars) <- c(paste0(dfs_name, 1:n, sep = "")) + + names(tabs) <- c(paste0(dfs_name, 1:n, sep = "")) + + + # new_names of alt_hrc + res2 <- setNames( + lapply( + seq_along(res$tabs), + function(i) setNames(list(res$hrcs[[i]]), var_cross) + ), + paste(dfs_name, seq_along(res$tabs), sep = "") + ) + + # new_names of subtotals + res3 <- setNames( + lapply( + seq_along(res$tabs), + function(i) setNames(list(res$alt_tot[[i]]), var_cross) + ), + paste(dfs_name, seq_along(res$tabs), sep = "") + ) + hrcfiles <- hrcfiles[(names(hrcfiles) %in% names(totcode_2))] + if (length(hrcfiles) == 0) {hrcfiles <- NULL} + + return ( + list( + tabs = tabs, + alt_hrc = res2, + alt_totcode = res3, + vars = list_vars, + sep = sep, + totcode = totcode_2, + hrc = hrcfiles, + fus_vars = res$vars + ) + ) +} + +# Format for tables with 5 variables +#' @importFrom stats setNames +format5 <- function(res, dfs_name, sep, totcode, hrcfiles) { + if (is.list(res$vars)) { + # Retrieve the different variables + v1 <- res$vars[[2]][1] + v2 <- res$vars[[2]][2] + v3 <- res$vars[[1]][1] + v4 <- res$vars[[1]][2] + var_cross <- paste(v1, v2, sep = sep) + var_cross2 <- paste(v3, v4, sep = sep) + + # Merging 3 variables into one + # So the information related to two merged variables during 5->4 + # is no longer useful to us since the variable no longer exists in dimension 3 + if (var_cross2 %in% c(v1, v2)) { + res2 <- list( + tabs = res$tabs, + hrcs = res$hrcs4_3, + alt_tot = res$alt_tot4_3, + vars = res$vars[[2]], + sep = sep, + fus_vars = c(v3, v4) + ) + res2 <- sp_format(res2, dfs_name, sep, totcode, hrcfiles) + + # Keep the information of the merged variables at each step + res2$fus_vars <- res$vars + return(res2) + } + + tot_cross <- paste(totcode[[v1]], totcode[[v2]], sep = sep) + tot_cross2 <- paste(totcode[[v3]], totcode[[v4]], sep = sep) + tabs <- res$tabs + name_non_changed_vars <- intersect(names(res$tabs[[1]]), names(totcode)) + old_totcode <- totcode[names(totcode) %in% name_non_changed_vars] + + names(tot_cross) <- var_cross + names(tot_cross2) <- var_cross2 + totcode_2 <- c(old_totcode, tot_cross, tot_cross2) + + n <- length(res$tabs) + v <- c(name_non_changed_vars, var_cross, var_cross2) + list_vars <- replicate(n, v, simplify = FALSE) + names(list_vars) <- c(paste0(dfs_name, 1:n, sep = "")) + names(tabs) <- c(paste0(dfs_name, 1:n, sep = "")) + + # new_names of alt_hrc + + res2 <- setNames(lapply(seq_along(res$tabs), function(i) { + list1 <- setNames(list(res$hrcs4_3[[i]]), var_cross) + list2 <- setNames(list(res$hrcs5_4[[i]]), var_cross2) + c(list1, list2) + }), + paste(dfs_name, seq_along(res$tabs), sep = "")) + + # new_names of subtotals + + res3 <- setNames(lapply(seq_along(res$tabs), function(i) { + list1 <- setNames(list(res$alt_tot4_3[[i]]), var_cross) + list2 <- setNames(list(res$alt_tot5_4[[i]]), var_cross2) + c(list1, list2) + }), + paste(dfs_name, seq_along(res$tabs), sep = "")) + + } + hrcfiles <- hrcfiles[(names(hrcfiles) %in% names(totcode_2))] + if (length(hrcfiles) == 0) {hrcfiles <- NULL} + return ( + list( + tabs = tabs, + alt_hrc = res2, + alt_totcode = res3, + vars = list_vars, + sep = sep, + totcode = totcode_2, + hrc = hrcfiles, + fus_vars = res$vars + ) + ) +} diff --git a/R/sp_restore_format.R b/R/sp_restore_format.R new file mode 100644 index 0000000..fd028ad --- /dev/null +++ b/R/sp_restore_format.R @@ -0,0 +1,192 @@ +#' Function to reverse the process of dimension reduction +#' @param masq a list of data.frames on which the secret has been applied +#' @param res the result of the dimension reduction function (to retrieve +#' the merged variables) and the separator (sep). +#' +#' @return the original dataframe with 4 or 5 dimensions +#' +#' @examples +#' # Examples with dimension 4 +#' library(dplyr) +#' data <- expand.grid( +#' ACT = c("Total", "A", "B", "A1", "A2","A3", "B1", "B2","B3","B4","C", +#' "name_non_changed_vars","E","F","G","B5"), +#' GEO = c("Total", "G1", "G2"), +#' SEX = c("Total", "F", "M"), +#' AGE = c("Total", "AGE1", "AGE2"), +#' stringsAsFactors = FALSE +#' ) %>% +#' as.data.frame() +#' +#' data <- data %>% mutate(VALUE = 1) +#' +#' hrc_act <- "hrc_ACT.hrc" +#' +#' sdcHierarchies::hier_create( +#' root = "Total", +#' nodes = c("A","B","C","name_non_changed_vars","E","F","G") +#' ) %>% +#' sdcHierarchies::hier_add(root = "A", nodes = c("A1","A2","A3")) %>% +#' sdcHierarchies::hier_add(root = "B", nodes = c("B1","B2","B3","B4","B5")) %>% +#' sdcHierarchies::hier_convert(as = "argus") %>% +#' slice(-1) %>% +#' mutate(levels = substring(paste0(level,name),3)) %>% +#' select(levels) %>% +#' write.table(file = hrc_act, row.names = FALSE, col.names = FALSE, quote = FALSE) +#' +#' # Result of the function by forcing some variables to be merged +#' res_red_dim <- reduce_dims( +#' dfs = data, +#' dfs_name = "tab", +#' totcode = c(SEX="Total",AGE="Total", GEO="Total", ACT="Total"), +#' hrcfiles = c(ACT = hrc_act), +#' sep_dir = TRUE, +#' hrc_dir = "output", +#' vars_to_merge = c("ACT","GEO") +#' ) +#' +#' res1 <- restore_format(masq = res_red_dim$tabs, res = res_red_dim) +#' dim(setdiff(res1,data))[1] == 0 +#' +#' # return TRUE +#' # We have exactly the sames cases in the datatable after splitting and unsplitting data +#' +#' # Exemple dimension 5 +#' +#' data <- expand.grid( +#' ACT = c("Total_A", paste0("A", seq(1,5),"_"),paste0("A1_", seq(1,7)), +#' paste0("A2_", seq(1,9))), +#' GEO = c("Total_G", "GA", "GB", "GA1", "GA2", "GB1", "GB2","GA3","GB3","GB4"), +#' SEX = c("Total_S", "F", "M","F1","F2","M1","M2"), +#' AGE = c("Ensemble", "AGE1", "AGE2", "AGE11", "AGE12", "AGE21", "AGE22"), +#' ECO = c("PIB","Ménages","Entreprises"), +#' stringsAsFactors = FALSE, +#' KEEP.OUT.ATTRS = FALSE +#' ) %>% +#' as.data.frame() +#' +#' data <- data %>% mutate(VALUE = 1:n()) +#' +#' hrc_act <- "hrc_ACT.hrc" +#' sdcHierarchies::hier_create(root = "Total_A", nodes = paste0("A", seq(1,5),"_")) %>% +#' sdcHierarchies::hier_add(root = "A1_", nodes = paste0("A1_", seq(1,7))) %>% +#' sdcHierarchies::hier_add(root = "A2_", nodes = paste0("A2_", seq(1,9))) %>% +#' sdcHierarchies::hier_convert(as = "argus") %>% +#' slice(-1) %>% +#' mutate(levels = substring(paste0(level,name),3)) %>% +#' select(levels) %>% +#' write.table(file = hrc_act, row.names = FALSE, col.names = FALSE, quote = FALSE) +#' +#' hrc_geo <- "hrc_GEO.hrc" +#' sdcHierarchies::hier_create(root = "Total_G", nodes = c("GA","GB")) %>% +#' sdcHierarchies::hier_add(root = "GA", nodes = c("GA1","GA2","GA3")) %>% +#' sdcHierarchies::hier_add(root = "GB", nodes = c("GB1","GB2","GB3","GB4")) %>% +#' sdcHierarchies::hier_convert(as = "argus") %>% +#' slice(-1) %>% +#' mutate(levels = substring(paste0(level,name),3)) %>% +#' select(levels) %>% +#' write.table(file = hrc_geo, row.names = FALSE, col.names = FALSE, quote = FALSE) +#' +#' # function's result +#' +#' res_red_dim <- reduce_dims( +#' dfs = data, +#' dfs_name = "tab", +#' totcode = c(SEX="Total_S",AGE="Ensemble", GEO="Total_G", ACT="Total_A", ECO = "PIB"), +#' hrcfiles = c(ACT = hrc_act, GEO = hrc_geo), +#' sep_dir = TRUE, +#' hrc_dir = "output" +#' ) +#' +#' res2 <- restore_format(masq = res_red_dim$tabs, res = res_red_dim) +#' @keywords internal +#' @export +restore_format <- function(masq, res) { + + sep <- res$sep + sep_regex <- gsub("([+])", "\\\\\\1", sep) + + + + # Unique values from 'masq' (a list) are concatenated into a data frame + + masq_liste_empilee <- unique(do.call("rbind", unname(masq))) + + if (is.character(res$fus_vars)) { + # Case with 4 categorical variables + # variable + + v1 <- res$fus_vars[1] + v2 <- res$fus_vars[2] + + v1_v2 <- paste(v1, v2, sep = sep) + + result <- separer4_3(masq_liste_empilee, v1, v2,v1_v2, sep_regex) + return(result) + } + + # Case with 5 dimensions + # variable + + v1<-res$fus_vars$five_to_three[1] + v2<-res$fus_vars$five_to_three[2] + v3<-res$fus_vars$four_to_three[1] + v4<-res$fus_vars$four_to_three[2] + v1_v2 <- paste(v1, v2, sep = sep) + + if (!(v1_v2 == v3 | v1_v2 == v4)) { + # Case of fusion between 3 different variables + v3_v4 <- paste(v3, v4, sep = sep) + # Split based on 'v1', 'v2', and 'v1_v2' using 'separer4_3' function + split1 <- separer4_3(masq_liste_empilee, v1, v2, v1_v2, sep_regex) + # Further split based on 'v3', 'v4', and 'v3_v4' + result <- separer4_3(split1, v3, v4, v3_v4, sep_regex) + + } else { + # Case of fusion with an already fused variable + v3_v4 <- paste(v3, v4, sep = sep) + + if(v1_v2 == v3){ + # Split based on 'v1', 'v2', and 'v4' using 'separer5_3' function + result<-separer5_3(masq_liste_empilee, v1,v2, v4, v3_v4, sep_regex) + }else{ + # Split based on 'v1', 'v2', and 'v3' using 'separer5_3' function + result<-separer5_3(masq_liste_empilee, v1,v2,v3, v3_v4, sep_regex) + + } + + } + + return(result) +} + + + +# Function for splitting the merged variable v1_v2_v3 into v1, v2, and v3 +separer5_3 <- function(df, v1, v2, v3,v3_v4, sep_regex) { + splits <- strsplit(df[[v3_v4]], split = sep_regex) + df[[v3]] <- sapply(splits, `[`, 1) + df[[v1]] <- sapply(splits, `[`, 2) + df[[v2]] <- sapply(splits, `[`, 3) + df[[v3_v4]] <- NULL + + # Réorganiser les colonnes + new_order <- c(v3, v1, v2, setdiff(names(df), c(v3, v1, v2))) + df <- df[, new_order] + + df +} + + +# Function for splitting the merged variable v1_v2 into v1 and v2 +separer4_3 <- function(df, v1, v2, v1_v2, sep_regex) { + splits <- strsplit(df[[v1_v2]], split = sep_regex) + df[[v1]] <- sapply(splits, `[`, 1) + df[[v2]] <- sapply(splits, `[`, 2) + df[[v1_v2]] <- NULL + + # Réorganiser les colonnes + new_order <- c(v1, v2, setdiff(names(df), c(v1, v2))) + df <- df[, new_order] + df +} diff --git a/R/sp_tab_rtauargus.R b/R/sp_tab_rtauargus.R new file mode 100644 index 0000000..0ba3c78 --- /dev/null +++ b/R/sp_tab_rtauargus.R @@ -0,0 +1,147 @@ +#' Call Tau-Argus to protect a 4 or 5 dimensions table by splitting it +#' in several 3 dimensions table. +#' +#' @description +#' `r lifecycle::badge("experimental")` +#' +#' @inheritParams tab_rtauargus +#' +#' @param limit numeric, used to choose which variable to merge (if nb_tab_option = 'smart') +#' and split table with a number of row above this limit in order to avoid +#' tauargus failures +#' @param nb_tab_option strategy to follow for choosing variables automatically: +#' \itemize{ +#' \item `'min'`: minimize the number of tables; +#' \item `'max'`: maximize the number of tables; +#' \item `'smart'`: minimize the number of tables under the constraint +#' of their row count. +#' } +#' @param dfs_name name used to write hrc files when reducing dims +#' @param ... additional parameters#' +#' +#' @return The original tabular is returned with additional variables indicating +#' whether or not the cell has to be masked according to Tau-Argus +#' +#' @examples +#'\dontrun{ +#' #Please don't forget to specify the localisation of Tau-Argus in your computer +#' options( +#' rtauargus.tauargus_exe = +#' "Y:/Logiciels/TauArgus/TauArgus4.2.3/TauArgus.exe" +#' ) +#' +#' data(datatest1) +#' expl_vars <- c("A10", "treff","type_distrib","cj") +#' +#' res_dim4 <- tab_rtauargus4( +#' tabular = datatest1, +#' files_name = "datatest1", +#' dir_name = "tauargus_files", +#' explanatory_vars = expl_vars, +#' totcode = setNames(rep("Total", 4), expl_vars), +#' secret_var = "is_secret_prim", +#' value = "pizzas_tot_abs", +#' freq = "nb_obs_rnd", +#' verbose = TRUE, +#' nb_tab_option = "min", +#' verbose = TRUE +#' ) +#' +#' # With a data of 5 variables +#' +#' data(datatest2) +#' expl_vars <- c("A10", "treff","type_distrib","cj","nuts1") +#' +#' res_dim5 <- tab_rtauargus4( +#' tabular = datatest2, +#' files_name = "datatest2", +#' dir_name = "tauargus_files", +#' explanatory_vars = expl_vars, +#' totcode = setNames(rep("Total", 5), expl_vars), +#' secret_var = "is_secret_prim", +#' value = "pizzas_tot_abs", +#' freq = "nb_obs_rnd", +#' verbose = TRUE, +#' nb_tab_option = "min", # split into the minimum of tables. +#' verbose = TRUE, +#' suppress = "GH(1,100)" # We use hypercube to save time. +#' ) +#' } +#' @importFrom stats setNames +#' @export +tab_rtauargus4 <- function( + tabular, + explanatory_vars, + dir_name, + secret_var, + totcode, + files_name = NULL, + hrc = NULL, + secret_no_pl = NULL, + cost_var = NULL, + value = "value", + freq = "freq", + ip = 10, + suppress = "MOD(1,5,1,0,0)", + safety_rules = paste0("MAN(",ip,")"), + nb_tab_option = "smart", + limit = 14700L, + dfs_name = 'tab', + ... +){ + + .dots = list(...) + + hrc_path <- file.path(dir_name, "hrc") + if (!dir.exists(hrc_path)){ + dir.create(hrc_path, recursive = TRUE) + } + + # TODO: + # deleting created hrc files at the end of the function ? + + # Reduce dims for 4 or 5 dimensions table + if (length(explanatory_vars) %in% c(4, 5)) { + + cat("\nReducing dims...\n",dfs_name,"\n\n") + + list_tables <- reduce_dims( + dfs = tabular, + dfs_name = dfs_name, + totcode = totcode, + hrcfiles = hrc, + hrc_dir = hrc_path, + nb_tab_option = nb_tab_option, + limit = limit, + over_split = TRUE, + verbose = TRUE, # to generalize later + sep_dir = TRUE + ) + + params_multi <- formals(fun = "tab_multi_manager") + params_multi <- params_multi[1:(length(params_multi)-1)] + call <- sys.call(); call[[1]] <- as.name('list') + new_params <- eval.parent(call) + + for(param in intersect(names(params_multi), names(new_params))){ + params_multi[[param]] <- new_params[[param]] + } + + params_multi$list_tables = list_tables$tabs + params_multi$list_explanatory_vars = list_tables$vars + params_multi$hrc = list_tables$hrc + params_multi$totcode = list_tables$totcode + params_multi$alt_hrc = list_tables$alt_hrc + params_multi$alt_totcode = list_tables$alt_totcode + + masq_list <- do.call("tab_multi_manager", params_multi) + + + result <- restore_format(masq_list, list_tables) + + return(result) + } else { + stop("Do not use table with more than 5 dimensions. + Split_tab = TRUE is not compatible with these large tables.") + } +} diff --git a/R/sp_var_to_merge.R b/R/sp_var_to_merge.R new file mode 100644 index 0000000..62daf78 --- /dev/null +++ b/R/sp_var_to_merge.R @@ -0,0 +1,900 @@ +#' General function to choose variables to merge, +#' limiting the number of generated tables while ensuring not to generate +#' tables that are too large. +#' +#' @param dfs data.frame +#' @param totcode named vector of totals for categorical variables +#' @param hrcfiles named vector of hrc files for categorical variables +#' @param nb_var number of variables to merge +#' @param nb_tab_option strategy to follow for choosing variables automatically: +#' \itemize{ +#' \item `'min'`: minimize the number of tables; +#' \item `'max'`: maximize the number of tables; +#' \item `'smart'`: minimize the number of tables under the constraint of their row count. +#' } +#' @param limit maximum allowed row count in the 'smart' case +#' +#' @return A list of vectors representing the chosen variables to merge +#' +#' @examples +#' library(dplyr) +#' data <- expand.grid( +#' ACT = c("Total", "A", "B", "A1", "A2", "B1", "B2"), +#' GEO = c("Total", "GA", "GB", "GA1", "GA2"), +#' SEX = c("Total", "F", "M"), +#' AGE = c("Total", "AGE1", "AGE2"), +#' stringsAsFactors = FALSE, +#' KEEP.OUT.ATTRS = FALSE +#' ) %>% +#' as.data.frame() +#' +#' data <- data %>% mutate(VALUE = 1:n()) +#' +#' hrc_act <- "hrc_ACT.hrc" +#' sdcHierarchies::hier_create(root = "Total", nodes = c("A","B")) %>% +#' sdcHierarchies::hier_add(root = "A", nodes = c("A1","A2")) %>% +#' sdcHierarchies::hier_add(root = "B", nodes = c("B1","B2")) %>% +#' sdcHierarchies::hier_convert(as = "argus") %>% +#' slice(-1) %>% +#' mutate(levels = substring(paste0(level,name),3)) %>% +#' select(levels) %>% +#' write.table(file = hrc_act, row.names = FALSE, col.names = FALSE, quote = FALSE) +#' +#' hrc_geo <- "hrc_GEO.hrc" +#' sdcHierarchies::hier_create(root = "Total", nodes = c("GA","GB")) %>% +#' sdcHierarchies::hier_add(root = "GA", nodes = c("GA1","GA2")) %>% +#' sdcHierarchies::hier_convert(as = "argus") %>% +#' slice(-1) %>% +#' mutate(levels = substring(paste0(level,name),3)) %>% +#' select(levels) %>% +#' write.table(file = hrc_geo, row.names = FALSE, col.names = FALSE, quote = FALSE) +#' +#' totcode <- c(SEX="Total",AGE="Total", GEO="Total", ACT="Total") +#' +#' hrcfiles <- c(ACT = hrc_act, GEO = hrc_geo) +#' +#' # Consistent: choose two hierarchical variables +#' res1 <- var_to_merge(dfs = data, +#' totcode = totcode, +#' hrcfiles = hrcfiles, +#' nb_var = 2, +#' nb_tab_option = 'max') +#' res1 +#' max(unlist(length_tabs(dfs = data, +#' hrcfiles = hrcfiles, +#' totcode = totcode, +#' v1 = res1$vars[1], v2 = res1$vars[2]))) +#' +#' # Consistent: choose two non-hierarchical variables +#' res2 <- var_to_merge(dfs = data, +#' totcode = totcode, +#' hrcfiles = hrcfiles, +#' nb_var = 2, +#' nb_tab_option = 'min') +#' res2 +#' max(unlist(length_tabs(dfs = data, +#' hrcfiles = hrcfiles, +#' totcode = totcode, +#' v1 = res2$vars[1], v2 = res2$vars[2]))) +#' +#' res3 <- var_to_merge(dfs = data, +#' totcode = totcode, +#' hrcfiles = hrcfiles, +#' limit = 200, +#' nb_var = 2, +#' nb_tab_option = 'smart') +#' res3 +#' max(unlist(length_tabs(dfs = data, +#' hrcfiles = hrcfiles, +#' totcode = totcode, +#' v1 = res3$vars[1], v2 = res3$vars[2]))) +#' +#' # Obtains 147, which is well below 200 +#' +#' res4 <- var_to_merge(dfs = data, +#' totcode = totcode, +#' hrcfiles = hrcfiles, +#' limit = 5, +#' nb_var = 2, +#' nb_tab_option = 'smart') +#' res4 +#' max(unlist(length_tabs(dfs = data, +#' hrcfiles = hrcfiles, +#' totcode = totcode, +#' v1 = res4$vars[1], v2 = res4$vars[2]))) +#' +#' # Receives a warning: unable to reach the announced value +#' # There are 63 rows (equivalent to the max +#' # -> this is what reduces the table size) +#' # And the warning announces 63 rows, which is consistent with the output +#' +#' @keywords internal +#' @export +var_to_merge <- function( + dfs, + totcode, + hrcfiles = NULL, + nb_var = 4, + nb_tab_option = "min", + limit = 150) +{ + # Case of 2 pairs in dimension 5 + if (nb_var == 4){ + result_comb <- generate_two_pairs(totcode) + + # Case of a triplet in dimension 5 + } else if (nb_var == 3){ + result_comb <- generate_a_triplet(totcode) + + # Case of dimension 4 + } else { + result_comb <- generate_a_pair(totcode) + } + + return(var_to_merge_fragment(dfs = dfs, + result_comb = result_comb, + totcode = totcode, + hrcfiles = hrcfiles, + limit = limit, + nb_tab_option = nb_tab_option)) +} + +var_to_merge_fragment <- function( + dfs, + result_comb, + totcode, + hrcfiles = NULL, + limit = 150, + nb_tab_option = "smart") +{ + # Calculate the number of tables and maximum rows for each combination of variables + res_func <- lapply(result_comb, function(x) length_tabs( + dfs = dfs, + v1 = x[1], + v2 = x[2], + v3 = x[3], + v4 = x[4], + totcode = totcode, + hrcfiles = hrcfiles)) + + # Get the maximum rows and number of created tables + res_max <- sapply(res_func, function(x) max(unlist(x))) + res_len <- sapply(res_func, function(x) length(unlist(x))) + + # Create a dataframe for better filtering + df <- data.frame(res_max = res_max, res_len = res_len) + + # Save the row number by adding a column + df$original_index <- seq(nrow(df)) + + # Case: minimize the number of tables + if (nb_tab_option == "min"){ + min_nb_tab <- min(df$res_len) + filtered_df <- df[df$res_len == min_nb_tab, ] + + # Get the index of the filtered table + min_index <- which.min(filtered_df$res_max) + # Print the original index + i <- filtered_df$original_index[min_index] + + return(list(vars = result_comb[[i]], + max_row = filtered_df$res_max[min_index], + nb_tab = filtered_df$res_len[min_index]) + ) + + # Case: maximize the number of tables + } else if (nb_tab_option == "max"){ + max_nb_tab <- max(df$res_len) + filtered_df <- df[df$res_len == max_nb_tab, ] + + # Get the index of the filtered table + min_index <- which.min(filtered_df$res_max) + # Print the original index + i <- filtered_df$original_index[min_index] + + return(list(vars = result_comb[[i]], + max_row = filtered_df$res_max[min_index], + nb_tab = filtered_df$res_len[min_index]) + ) + + # Case: 'smart' - maximize under the constraint of the size limit + } else { + # Filter based on the maximum rows condition + filtered_df <- df[df$res_max < limit, ] + + # If at least one case satisfies this condition + if (nrow(filtered_df) > 0){ + # Get the index of the filtered table + min_index <- which.min(filtered_df$res_len) + + # Print the original index + i <- filtered_df$original_index[min_index] + + return(list(vars = result_comb[[i]], + max_row = filtered_df$res_max[min_index], + nb_tab = filtered_df$res_len[min_index]) + ) + + } else { + # Return the result with the fewest tables among those + # with the shortest tables + min_res_max <- min(df$res_max) + + # Silence warning since it is only display at the end... + # warning(c(" + # The limit of ",limit," cannot be achieved. + # The largest table has ",min_res_max," rows.")) + + filtered_df <- df[df$res_max == min_res_max, ] + + # Get the index of the filtered table + min_index <- which.min(filtered_df$res_len) + + # Print the original index + i <- filtered_df$original_index[min_index] + + return(list(vars = result_comb[[i]], + max_row = filtered_df$res_max[min_index], + nb_tab = filtered_df$res_len[min_index]) + ) + } + } +} + +#' @importFrom utils combn +generate_a_pair <- function(totcode) { + # Retrieve the categorical variables from the dataframe + cat_vars <- names(totcode) + + # Use combn to get all combinations of two elements + comb <- combn(cat_vars, 2) + + # Transform the results into a list of vectors + result <- split(t(comb), seq(ncol(comb))) + + return(result) +} + +#' @importFrom utils combn +generate_two_pairs <- function(totcode) { + # Retrieve the categorical variables from the dataframe + cat_vars <- names(totcode) + + # Get all combinations of four elements + comb <- combn(cat_vars, 4) + + # For each combination, obtain two disjoint pairs + result <- lapply(seq(ncol(comb)), function(i) { + quad <- comb[, i] + pair_comb <- t(combn(quad, 2)) + + # Create two disjoint pairs for each combination + pairs <- lapply(seq(nrow(pair_comb)), function(j) { + pair1 <- pair_comb[j, ] + pair2 <- setdiff(quad, pair1) + + # Convert the pairs to strings + pair1_str <- paste(sort(pair1), collapse = ",") + pair2_str <- paste(sort(pair2), collapse = ",") + + # Create a string representing both pairs + both_pairs_str <- paste(sort(c(pair1_str, pair2_str)), collapse = ",") + return(both_pairs_str) + }) + return(pairs) + }) + + # Flatten the result + result <- unlist(result, recursive = FALSE) + + # Remove duplicates + unique_pairs <- unique(result) + + # Convert the strings back to vectors + result <- lapply(unique_pairs, function(pair_str) { + pairs <- strsplit(pair_str, ",")[[1]] + return(pairs) + }) + + return(result) +} + +#' @importFrom utils combn +generate_a_triplet <- function(totcode) { + # Retrieve the categorical variables from the dataframe + cat_vars <- names(totcode) + + # Get all combinations of three elements + comb <- combn(cat_vars, 3) + + # Transform the result into a list of vectors + result <- split(t(comb), seq(ncol(comb))) + + return(result) +} + +#' Calculation of the table sizes generated a priori during the reduction of dimension +#' from 4 or 5 dimensions to 3 dimensions +#' +#' @param dfs a data.frame +#' +#' Variable in the 5->4 or 4->3 step +#' @param v1 the first merged variable +#' @param v2 the second merged variable +#' +#' Variable in the case of 4->3 passage in the 4->3 process +#' do not specify v1_v2 if three variables are merged into one +#' @param v3 the third original variable to be merged +#' @param v4 the fourth original variable to be merged +#' @param totcode character named vector +#' @param hrcfiles named vector of hrc files related to the variables +#' +#' @return a list of the lengths of the tables created during the dimension reduction +#' +#' @examples +#' # Dimension 4 +#' library(dplyr) +#' data <- expand.grid( +#' ACT = c("Total", "A", "B", "A1", "A2","A3", "B1", "B2","B3","B4","C", +#' "name_non_changed_vars","E","F","G","B5"), +#' GEO = c("Total", "G1", "G2"), +#' SEX = c("Total", "F", "M"), +#' AGE = c("Total", "AGE1", "AGE2"), +#' stringsAsFactors = FALSE +#' ) %>% +#' as.data.frame() +#' +#' data <- data %>% mutate(VALUE = 1) +#' +#' +#' hrc_act <- "hrc_ACT.hrc" +#' +#' sdcHierarchies::hier_create( +#' root = "Total", +#' nodes = c("A","B","C","name_non_changed_vars","E","F","G") +#' ) %>% +#' sdcHierarchies::hier_add(root = "A", nodes = c("A1","A2","A3")) %>% +#' sdcHierarchies::hier_add(root = "B", nodes = c("B1","B2","B3","B4","B5")) %>% +#' sdcHierarchies::hier_convert(as = "argus") %>% +#' slice(-1) %>% +#' mutate(levels = substring(paste0(level,name),3)) %>% +#' select(levels) %>% +#' write.table(file = hrc_act, row.names = FALSE, col.names = FALSE, quote = FALSE) +#' +#' # Function results +#' +#' res1 <- length_tabs(dfs = data, +#' hrcfiles = c(ACT = hrc_act), +#' totcode = c(SEX="Total",AGE="Total", GEO="Total", ACT="Total"), +#' v1 = "ACT", +#' v2 = "GEO") +#' +#' # Dimension 5 +#' data <- expand.grid( +#' ACT = c("Total_A", paste0("A", seq(1,5),"_"), +#' paste0("A1_", seq(1,7)),paste0("A2_", seq(1,9))), +#' GEO = c("Total_G", "GA", "GB", "GA1", "GA2", "GB1", "GB2","GA3","GB3","GB4"), +#' SEX = c("Total_S", "F", "M","F1","F2","M1","M2"), +#' AGE = c("Ensemble", "AGE1", "AGE2", "AGE11", "AGE12", "AGE21", "AGE22"), +#' ECO = c("PIB","Ménages","Entreprises"), +#' stringsAsFactors = FALSE, +#' KEEP.OUT.ATTRS = FALSE +#' ) %>% +#' as.data.frame() +#' +#' data <- data %>% mutate(VALUE = 1:n()) +#' +#' hrc_act <- "hrc_ACT.hrc" +#' sdcHierarchies::hier_create(root = "Total_A", nodes = paste0("A", seq(1,5),"_")) %>% +#' sdcHierarchies::hier_add(root = "A1_", nodes = paste0("A1_", seq(1,7))) %>% +#' sdcHierarchies::hier_add(root = "A2_", nodes = paste0("A2_", seq(1,9))) %>% +#' sdcHierarchies::hier_convert(as = "argus") %>% +#' slice(-1) %>% +#' mutate(levels = substring(paste0(level,name),3)) %>% +#' select(levels) %>% +#' write.table(file = hrc_act, row.names = FALSE, col.names = FALSE, quote = FALSE) +#' +#' hrc_geo <- "hrc_GEO.hrc" +#' sdcHierarchies::hier_create(root = "Total_G", nodes = c("GA","GB")) %>% +#' sdcHierarchies::hier_add(root = "GA", nodes = c("GA1","GA2","GA3")) %>% +#' sdcHierarchies::hier_add(root = "GB", nodes = c("GB1","GB2","GB3","GB4")) %>% +#' sdcHierarchies::hier_convert(as = "argus") %>% +#' slice(-1) %>% +#' mutate(levels = substring(paste0(level,name),3)) %>% +#' select(levels) %>% +#' write.table(file = hrc_geo, row.names = FALSE, col.names = FALSE, quote = FALSE) +#' +#' res2 <- length_tabs(dfs = data, +#' hrcfiles = c(ACT = hrc_act, GEO = hrc_geo), +#' totcode = c(SEX="Total_S",AGE="Ensemble", GEO="Total_G", +#' ACT="Total_A", ECO = "PIB"), +#' v1 = "ACT",v2 = "AGE", +#' v3 = "GEO",v4 = "SEX") +#' +#' res3 <- length_tabs(dfs = data, +#' hrcfiles = c(ACT = hrc_act, GEO = hrc_geo), +#' totcode = c(SEX="Total_S",AGE="Ensemble", GEO="Total_G", +#' ACT="Total_A", ECO = "PIB"), +#' v1 = "ACT",v2 = "AGE",v3 = "GEO") +#' @keywords internal +#' @export +length_tabs <- function( + dfs, + v1, + v2, + v3 = NULL, + v4 = NULL, + totcode, + hrcfiles = NULL) +{ + + # To generalize the function to handle NA for an external function + v3 <- if (!is.null(v3) && is.na(v3)) NULL else v3 + v4 <- if (!is.null(v4) && is.na(v4)) NULL else v4 + + # If 4 variables are specified -> 5 dimensions case, 2 couples are created + if (!is.null(v4)) { + return(length_tabs_5_4_var(dfs = dfs, + hrcfiles = hrcfiles, + v1 = v1, v2 = v2, + v3 = v3, v4 = v4, + totcode = totcode)) + + # If 3 variables are specified -> 5 dimensions case, a trio is merged + } else if (!is.null(v3)) { + return(length_tabs_5_3_var(dfs = dfs, + hrcfiles = hrcfiles, + v1 = v1, v2 = v2, v3 = v3, + totcode = totcode)) + + # If 2 variables are specified -> 4 dimensions case + } else { + return(length_tabs_4(dfs = dfs, + hrcfiles = hrcfiles, + v1 = v1, v2 = v2, + totcode = totcode)) + } +} + +# case : 4 dimensions +length_tabs_4 <- function(dfs,v1,v2,totcode,hrcfiles=NULL){ + + # Retrieval of groupings {nodes + branch} + # based on whether the variable is hierarchical or not + + # We need to list and then unlist + # otherwise the ifelse returns the first element of import_hierarchy (big total) + # instead of returning all the nodes + level_v1 <- unlist(ifelse(v1 %in% names(hrcfiles), + list(import_hierarchy(hrcfiles[[v1]])), + list(list(unique(dfs[[v1]])))), + recursive = FALSE) + + level_v2 <- unlist(ifelse(v2 %in% names(hrcfiles), + list(import_hierarchy(hrcfiles[[v2]])), + list(list(unique(dfs[[v2]])))), + recursive = FALSE) + + # If case 1 non hrc (not hierarchical) and v2 in hrcfiles, then we need to reorder + if (!(v2 %in% names(hrcfiles)) & (v1 %in% names(hrcfiles))) { + tmp <- level_v1 + level_v1 <- level_v2 + level_v2 <- tmp + } + + # We do all possible combinations between v1 and v2 + # which represents the tables created during the creation of v1_v2 in the 5->4 step + + # For each of these tables, there are two possible hierarchies + # one with the totals of v1, and the other with the totals of v2 + # thus, for one of the modalities, we do not make any combination with its total + # hence the -1 + # and finally, we add the grand total, hence the +1 + nb_rows <- lapply(1:length(level_v1), function(i) { + lapply(1:length(level_v2), function(j) { + c((length(level_v1[[i]]) - 1) * length(level_v2[[j]]) + 1, + length(level_v1[[i]]) * (length(level_v2[[j]]) - 1) + 1) + }) + }) + + # Now we need to multiply by the modalities of the non-merged variables + + list_non_merged_vars <- names(totcode[!(names(totcode) %in% c(v1, v2))]) + + mod_non_merged_vars <- lapply(list_non_merged_vars, + function(x) length(unique(dfs[[x]]))) + + prod_numbers <- prod(unlist(mod_non_merged_vars)) + + nb_rows_tot <- lapply(unlist(nb_rows), function(x) x * prod_numbers) + + return(nb_rows_tot) +} + +# case : 5 dimensions, two pairs of merged variables +length_tabs_5_4_var <- function(dfs, v1, v2, v3, v4, totcode, hrcfiles = NULL) { + + # Retrieve groupings {nodes + branches} based on whether the variable is hierarchical or not, transitioning from 5 dimensions to 4 dimensions. + + # List and then unlist the results; ifelse returns all nodes instead of just the first one. + level_v1 <- unlist(ifelse(v1 %in% names(hrcfiles), + list(import_hierarchy(hrcfiles[[v1]])), + list(list(unique(dfs[[v1]])))), + recursive = FALSE) + + level_v2 <- unlist(ifelse(v2 %in% names(hrcfiles), + list(import_hierarchy(hrcfiles[[v2]])), + list(list(unique(dfs[[v2]])))), + recursive = FALSE) + + # Swap level_v1 and level_v2 in case v2 is not hierarchical but v1 is (to maintain order). + if (!(v2 %in% names(hrcfiles)) & (v1 %in% names(hrcfiles))) { + tmp <- level_v1 + level_v1 <- level_v2 + level_v2 <- tmp + } + + level_v3 <- unlist(ifelse(v3 %in% names(hrcfiles), + list(import_hierarchy(hrcfiles[[v3]])), + list(list(unique(dfs[[v3]])))), + recursive = FALSE) + + level_v4 <- unlist(ifelse(v4 %in% names(hrcfiles), + list(import_hierarchy(hrcfiles[[v4]])), + list(list(unique(dfs[[v4]])))), + recursive = FALSE) + + # Swap level_v3 and level_v4 in case v4 is not hierarchical but v3 is (to maintain order). + if (!(v4 %in% names(hrcfiles)) & (v3 %in% names(hrcfiles))) { + tmp <- level_v3 + level_v3 <- level_v4 + level_v4 <- tmp + + tmp <- v3 + v3 <- v4 + v4 <- tmp + } + + # Calculate the length of resulting 4-dimensional datasets for each combination of variables. + + nb_rows <- lapply(1:length(level_v1), function(i) { + lapply(1:length(level_v2), function(j) { + + c( + lapply(1:length(level_v3), function(k) { + lapply(1:length(level_v4), function(l) { + + # A formula to calculate the length of the arrays. + c( ((length(level_v1[[i]]) - 1) * length(level_v2[[j]]) + 1) * + ((length(level_v3[[k]]) - 1) * length(level_v4[[l]]) + 1), + + ((length(level_v1[[i]]) - 1) * length(level_v2[[j]]) + 1) * + (length(level_v3[[k]]) * (length(level_v4[[l]]) - 1) + 1) + ) + }) + }), + + lapply(1:length(level_v3), function(k) { + lapply(1:length(level_v4), function(l) { + + c( (length(level_v1[[i]]) * (length(level_v2[[j]]) - 1) + 1) * + ((length(level_v3[[k]]) - 1) * length(level_v4[[l]]) + 1), + + (length(level_v1[[i]]) * (length(level_v2[[j]]) - 1) + 1) * + (length(level_v3[[k]]) * (length(level_v4[[l]]) - 1) + 1) + ) + }) + }) + ) + + }) + }) + + # Calculate the total number of rows by multiplying with the unique modalities of non-merged variables. + + list_non_fused_vars <- names(totcode[!(names(totcode) %in% c(v1, v2, v3, v4))]) + + non_fused_vars_mod <- lapply(list_non_fused_vars, + function(x) length(unique(dfs[[x]]))) + + prod_numbers <- prod(unlist(non_fused_vars_mod)) + + nb_rows_tot <- lapply(unlist(nb_rows), function(x) x * prod_numbers) + + return(nb_rows_tot) +} + +# case : 5 dimensions, three variables merged into one +length_tabs_5_3_var <- function(dfs, v1, v2, v3, totcode, hrcfiles = NULL) { + + # Case of at least one hierarchical variable + if (length(setdiff(names(hrcfiles), c(v1, v2, v3))) != length(hrcfiles)) { + + # WARNING + # This case is a work in progress (WIP) + # Only the different lengths of modalities are calculated + # But we do not know specifically the length of table i, for example + # However, this is not currently critical + # All modalities appear the correct number of times, but not in the correct order + + # Transition from 5 dimensions to 4 dimensions + + # List and then unlist the results; ifelse returns all nodes instead of just the first one. + level_v1 <- unlist(ifelse(v1 %in% names(hrcfiles), + list(import_hierarchy(hrcfiles[[v1]])), + list(list(unique(dfs[[v1]])))), + recursive = FALSE) + + level_v2 <- unlist(ifelse(v2 %in% names(hrcfiles), + list(import_hierarchy(hrcfiles[[v2]])), + list(list(unique(dfs[[v2]])))), + recursive = FALSE) + + # Swap level_v1 and level_v2 if v2 is not hierarchical but v1 is (to maintain order). + if (!(v2 %in% names(hrcfiles)) & (v1 %in% names(hrcfiles))) { + tmp <- level_v1 + level_v1 <- level_v2 + level_v2 <- tmp + } + + # Transition from 4 dimensions to 3 dimensions + + # List and then unlist the results; ifelse returns all nodes instead of just the first one. + level_v3 <- unlist(ifelse(v3 %in% names(hrcfiles), + list(import_hierarchy(hrcfiles[[v3]])), + list(list(unique(dfs[[v3]])))), + recursive = FALSE) + + + nb_rows <- lapply(1:length(level_v1), function(i) { + + lapply(1:length(level_v3), function(k) { + + c( (length(level_v1[[i]]) - 1) * length(level_v3[[k]]) + 1, + length(level_v1[[i]]) * (length(level_v3[[k]]) - 1) + 1 + ) + }) + + lapply(1:length(level_v2), function(j) { + lapply(1:length(level_v3), function(k) { + + c( + rep(c((length(level_v2[[j]]) - 1) * length(level_v3[[k]]) + 1, + length(level_v2[[j]]) * (length(level_v3[[k]]) - 1) + 1 + ), + times = length(level_v1[[i]]) + ), + + rep(c((length(level_v1[[i]]) - 1) * length(level_v3[[k]]) + 1, + length(level_v1[[i]]) * (length(level_v3[[k]]) - 1) + 1 + ), + times = length(level_v2[[j]]) + ) + ) + }) + }) + }) + + # Case of 3 non-hierarchical variables: exact result (the length of table i is known) + } else { + + n_mod_v1 <- length(unique(dfs[[v1]])) + n_mod_v2 <- length(unique(dfs[[v2]])) + n_mod_v3 <- length(unique(dfs[[v3]])) + + nb_rows <- c( + 1 + (n_mod_v3 - 1) * n_mod_v1, + 1 + n_mod_v3 * (n_mod_v1 - 1), + + rep(c(1 + (n_mod_v3 - 1) * n_mod_v2, + 1 + n_mod_v3 * (n_mod_v2 - 1)) + , n_mod_v1), + + rep(c(1 + (n_mod_v3 - 1) * n_mod_v1, + 1 + n_mod_v3 * (n_mod_v1 - 1)) + , n_mod_v2 - 1) + ) + } + + # Calculate the total number of rows by multiplying with the unique modalities of non-merged variables. + + list_non_fused_vars <- names(totcode[!(names(totcode) %in% c(v1, v2, v3))]) + + non_fused_vars_mod <- lapply(list_non_fused_vars, + function(x) length(unique(dfs[[x]]))) + + prod_numbers <- prod(unlist(non_fused_vars_mod)) + + nb_rows_tot <- lapply(unlist(nb_rows), function(x) x * prod_numbers) + + return(nb_rows_tot) +} + +# Function to manage the import of the hierarchy +import_hierarchy <- function(hrcfile) { + total <- "BIG_Total" + res_sdc <- sdcHierarchies::hier_import(inp = hrcfile, from = "hrc", root = total) %>% + sdcHierarchies::hier_convert(as = "sdc") + # Store all sets of parent + direct child + levels <- lapply(res_sdc$dims, names) + return(levels) +} + +#' Calculate the number of tables generated when merging 3 variables +#' in the transition from 5 to 3 dimensions +#' +#' @param v1 first variable to be merged +#' @param v2 second variable to be merged +#' @param v3 third variable to be merged ( +#' variable that will be merged with v1 and v2 if v4 is not specified) +#' @param v4 fourth variable to be merged (with v3) +#' @param hrcfiles named list of hrc files +#' @param data data.frame (used only in the case where a trio is formed) +#' +#' @return an integer representing the number of tables generated +#' +#' @examples +#' # Dimension 4 +#' library(dplyr) +#' data <- expand.grid( +#' ACT = c("Total", "A", "B", "A1", "A2", "B1", "B2"), +#' GEO = c("Total", "G1", "G2"), +#' SEX = c("Total", "F", "M"), +#' AGE = c("Total", "AGE1", "AGE2"), +#' stringsAsFactors = FALSE +#' ) %>% +#' as.data.frame() +#' +#' data <- data %>% mutate(VALUE = 1) +#' +#' hrc_act <- "hrc_ACT.hrc" +#' +#' sdcHierarchies::hier_create(root = "Total", nodes = c("A", "B")) %>% +#' sdcHierarchies::hier_add(root = "A", nodes = c("A1", "A2")) %>% +#' sdcHierarchies::hier_add(root = "B", nodes = c("B1", "B2")) %>% +#' sdcHierarchies::hier_convert(as = "argus") %>% +#' slice(-1) %>% +#' mutate(levels = substring(paste0(level, name), 3)) %>% +#' select(levels) %>% +#' write.table(file = hrc_act, row.names = FALSE, col.names = FALSE, quote = FALSE) +#' +#' # 1 pair created +#' nb_tab_generated(v1 = "ACT", v2 = "GEO", +#' hrcfiles = c(ACT = hrc_act)) +#' +#' # Dimension 5 +#' data <- expand.grid( +#' ACT = c("Total", "A", "B", "A1", "A2", "B1", "B2"), +#' GEO = c("Total", "GA", "GB", "GA1", "GA2", "GB1", "GB2"), +#' SEX = c("Total", "F", "M", "F1", "F2", "M1", "M2"), +#' AGE = c("Total", "AGE1", "AGE2", "AGE11", "AGE12", "AGE21", "AGE22"), +#' ECO = c("PIB", "Ménages", "Entreprises"), +#' stringsAsFactors = FALSE, +#' KEEP.OUT.ATTRS = FALSE +#' ) %>% +#' as.data.frame() +#' +#' data <- data %>% mutate(VALUE = 1:n()) +#' +#' hrc_act <- "hrc_ACT.hrc" +#' sdcHierarchies::hier_create(root = "Total", nodes = c("A", "B")) %>% +#' sdcHierarchies::hier_add(root = "A", nodes = c("A1", "A2")) %>% +#' sdcHierarchies::hier_convert(as = "argus") %>% +#' slice(-1) %>% +#' mutate(levels = substring(paste0(level, name), 3)) %>% +#' select(levels) %>% +#' write.table(file = hrc_act, row.names = FALSE, col.names = FALSE, quote = FALSE) +#' +#' hrc_geo <- "hrc_GEO.hrc" +#' sdcHierarchies::hier_create(root = "Total", nodes = c("GA", "GB")) %>% +#' sdcHierarchies::hier_add(root = "GA", nodes = c("GA1", "GA2")) %>% +#' sdcHierarchies::hier_add(root = "GB", nodes = c("GB1", "GB2")) %>% +#' sdcHierarchies::hier_convert(as = "argus") %>% +#' slice(-1) %>% +#' mutate(levels = substring(paste0(level, name), 3)) %>% +#' select(levels) %>% +#' write.table(file = hrc_geo, row.names = FALSE, col.names = FALSE, quote = FALSE) +#' +#' hrc_sex <- "hrc_SEX.hrc" +#' sdcHierarchies::hier_create(root = "Total", nodes = c("F", "M")) %>% +#' sdcHierarchies::hier_add(root = "F", nodes = c("F1", "F2")) %>% +#' sdcHierarchies::hier_add(root = "M", nodes = c("M1", "M2")) %>% +#' sdcHierarchies::hier_convert(as = "argus") %>% +#' slice(-1) %>% +#' mutate(levels = substring(paste0(level, name), 3)) %>% +#' select(levels) %>% +#' write.table(file = hrc_sex, row.names = FALSE, col.names = FALSE, quote = FALSE) +#' +#' # Trio merged +#' nb_tab_generated(data = data, +#' v1 = "ACT", v2 = "GEO", v3 = "SEX", +#' hrcfiles = c(ACT = hrc_act, GEO = hrc_geo, SEX = hrc_sex)) +#' +#' # 2 pairs created +#' nb_tab_generated(v1 = "ACT", v2 = "GEO", +#' v3 = "SEX", v4 = "EXO", +#' hrcfiles = c(ACT = hrc_act, GEO = hrc_geo, SEX = hrc_sex)) +#' @keywords internal +#' @export +nb_tab_generated <- function( + v1, + v2, + v3 = NULL, + v4 = NULL, + hrcfiles = NULL, + data = NULL) +{ + + # Case dimension 5: 2 couples created + if (!is.null(v4)) { + return(4 * nb_nodes(hrcfiles = hrcfiles, v = v1) * + nb_nodes(hrcfiles = hrcfiles, v = v2) * + nb_nodes(hrcfiles = hrcfiles, v = v3) * + nb_nodes(hrcfiles = hrcfiles, v = v4)) + + # Case dimension 5: one triplet merged + } else if (!is.null(v3)) { + + # 2 hierarchical variables merged + if (!is.null(hrcfiles) & v1 %in% names(hrcfiles) & v2 %in% names(hrcfiles)) { + + # The hierarchy of each variable + level_v1 <- import_hierarchy(hrcfiles[[v1]]) + level_v2 <- import_hierarchy(hrcfiles[[v2]]) + + # Store the sum of nodes of v1_v2 for each table + # We consider all possible combinations between v1 and v2 + # => represents the tables created during the creation of v1_v2 in the 5->4 step + + # For each of these tables, there are two possible hierarchies + # one with the totals of v1, and the other with the totals of v2 + # the number of nodes is equal to their number of modalities + nb_noeuds_var <- sum(sapply(1:length(level_v1), function(i) { + sum(sapply(1:length(level_v2), function(j) { + length(level_v1[[i]]) + length(level_v2[[j]]) + })) + })) + + # 2 non-hierarchical variables merged + } else if (is.null(hrcfiles) | !(v1 %in% names(hrcfiles)) & !(v2 %in% names(hrcfiles))) { + # There is only one table in the end + # which can have two hierarchies + # totals on v1, or totals on v2 + # the number of nodes is equivalent to the number of modalities + nb_noeuds_var <- length(unique(data[[v1]])) + length(unique(data[[v2]])) + + # 1 hierarchical variable and 1 non-hierarchical variable merged + } else { + var_hier <- ifelse(v1 %in% names(hrcfiles), v1, v2) + mod_var_non_hier <- ifelse(var_hier == v1, + length(unique(data[[v2]])), + length(unique(data[[v1]]))) + + # Analysis of the hierarchy of var_hier + level_var_hier <- import_hierarchy(hrcfiles[[var_hier]]) + + # We consider all possible combinations between v1 and v2 + # => represents the tables created during the creation of v1_v2 in the 5->4 step + + # For each of these tables, there are two possible hierarchies + # one with the totals of v1, and the other with the totals of v2 + # the number of nodes is equal to their number of modalities + nb_noeuds_var <- sum(sapply(1:length(level_var_hier), function(i) { + length(level_var_hier[[i]]) + mod_var_non_hier + })) + } + + # nb_nodes corresponds to the number of tables that need to be created + # to make v1_v2 non-hierarchical + # for each of these tables, v3 needs to be made non-hierarchical + # and we create as many tables as its hierarchy has nodes + # finally, for each created table, two hierarchies are possible + # totals on v1_v2 and totals on v3 + return(2 * nb_noeuds_var * nb_nodes(hrcfiles, v = v3)) + + # Case dimension 4 + } else { + return(2 * nb_nodes(hrcfiles = hrcfiles, v = v1) * + nb_nodes(hrcfiles = hrcfiles, v = v2)) + } +} diff --git a/R/tab_arb.R b/R/tab_arb.R index a4b3328..0928e23 100644 --- a/R/tab_arb.R +++ b/R/tab_arb.R @@ -62,7 +62,7 @@ norm_apriori_params_tab <- function(params) { # Génère s -tab_apriori_batch <- function(hst_names, sep = separator, ignore_err = 0 , exp_triv = 0) { +tab_apriori_batch <- function(hst_names, sep, ignore_err = 0 , exp_triv = 0) { paste0( ' "', @@ -92,23 +92,23 @@ tab_apriori_batch <- function(hst_names, sep = separator, ignore_err = 0 , exp_t #' Sauf mention contraire, utiliser la syntaxe mentionnée dans la documentation #' de Tau-Argus. #' -#' Syntax pour \code{suppress} : +#' Syntax pour `suppress` : #' First parameter is the table number, in this case it will always be 1 \cr #' #' (Pour les données tabulées la fonction traite un tableau à la fois -#' Syntaxe spéciale pour \code{suppress} : +#' Syntaxe spéciale pour `suppress` : #' le premier paramètre dans la syntaxe Tau-Argus est le numéro du tableau. #' Dans le cas de cette fonction ce sera toujours 1) #' #' -#' @section Informations \emph{hst_filename}: +#' @section Informations *hst_filename*: #' It's possible to add an apriori file (.hst) for a tabular, it can be generated #' by the table_rda() function. #' #' Other options are not mandatory. To modify default value use a vector with the #' .hst file path as first element and then complete with those parameters : -#' \code{sep} for the separator, -#' \code{ignore_err} for IgnoreError and \code{exp_triv} for ExpandTrivial. \cr +#' `sep` for the separator, +#' `ignore_err` for IgnoreError and `exp_triv` for ExpandTrivial. \cr #' #' (Il est possible de fournir un fichier apriori (.hst) pour un tableau, #' il peut être fourni par la fonction table_rda() @@ -116,60 +116,60 @@ tab_apriori_batch <- function(hst_names, sep = separator, ignore_err = 0 , exp_t #' #' Les options supplémentaires sont facultatives. Pour modifier les valeurs par #' défaut, passer une liste ayant comme premier élément le(s) fichier(s) hst et -#' compléter avec les éléments portant les noms \code{sep} pour le séparateur, -#' \code{ignore_err} pour IgnoreError et \code{exp_triv} pour ExpandTrivial. +#' compléter avec les éléments portant les noms `sep` pour le séparateur, +#' `ignore_err` pour IgnoreError et `exp_triv` pour ExpandTrivial. #' Comme pour les noms de fichiers, spécifier une seule valeur par paramètre ou #' autant de valeurs que de tabulations.) #' #' @param arb_filename path of the arb filename, if unspecified, creates a temporary #' file \cr #' (nom du fichier arb généré (avec extension). Si non renseigné, un fichier temporaire.) -#' @param tab_filename [\strong{mandatory}] path of the tab_filename.\cr -#' ([\strong{obligatoire}]nom du fichier .tab (avec extension).) +#' @param tab_filename path of the tab_filename.\cr +#' (nom du fichier .tab (avec extension).) #' @inheritParams tab_rda #' @param hst_filename Apriori file name, syntax detailed below, #' Example : hst_filename = "path_to_file/apriori.hst"\cr -#' (fichier(s) d'informations \emph{a priori}. Voir ci-dessous +#' (fichier(s) d'informations *a priori*. Voir ci-dessous #' pour la syntaxe. Exemple : hst_filename = "path_to_file/apriori.hst") -#' @param explanatory_vars [\strong{mandatory}] +#' @param explanatory_vars #' Explanatory vars in a vector -#' Example : \code{c("CJ", "A21")} for the tabular \code{CJ} x \code{A21} \cr -#' ([\strong{obligatoire}] variables catégorielles, sous forme de vecteur. -#' Exemple : \code{c("CJ", "A21")} pour le premier -#' Pour un tableau croisant \code{CJ} x \code{A21}) +#' Example : `c("CJ", "A21")` for the tabular `CJ` x `A21` \cr +#' ( variables catégorielles, sous forme de vecteur. +#' Exemple : `c("CJ", "A21")` pour le premier +#' Pour un tableau croisant `CJ` x `A21`) #' @param value Colname for response variable in the tabular -#' \code{""}. For frequency table \cr +#' `""`. For frequency table \cr #' (Nom de la variable de réponse dans le tableau -#' \code{""}. Permet de tariter les tableaux de fréquence) -#' @param safety_rules [\strong{mandatory}] +#' `""`. Permet de tariter les tableaux de fréquence) +#' @param safety_rules #' Rules for primary suppression with Argus syntax, if the primary suppression #' has been dealt with an apriori file specify manual safety range :"MAN(10)" #' for example.\cr -#' ([\strong{obligatoire}] Règle(s) de secret primaire. +#' ( Règle(s) de secret primaire. #' Chaîne de caractères en syntaxe batch Tau-Argus. Si le secret primaire #' a été traité dans un fichier d'apriori : utiliser "MAN(10)") -#' @param suppress [\strong{mandatory}] +#' @param suppress #' Algortihm for secondary suppression (Tau-Argus batch syntax), and the #' parameters for it.\cr -#' ([\strong{obligatoire}] Algorithme de gestion du secret secondaire +#' ( Algorithme de gestion du secret secondaire #' (syntaxe batch de Tau-Argus), ainsi que les potentiels paramètres associés) #' @param output_names output file name\cr #' (nom du fichier en sortie.) #' @param output_type Type of the output file (Argus codification) -#' By default \code{"2"} (csv for pivot-table). -#' For SBS files use \code{"4"}\cr +#' By default `"2"` (csv for pivot-table). +#' For SBS files use `"4"`\cr #' (Format des fichiers en sortie (codification Tau-Argus). -#' Valeur par défaut du package : \code{"2"} (csv for pivot-table). -#' Pour le format SBS utiliser \code{"4"}) +#' Valeur par défaut du package : `"2"` (csv for pivot-table). +#' Pour le format SBS utiliser `"4"`) #' @param output_options Additionnal parameter for the output, -#' by default : code{"AS+"} (print Status). To specify no options : \code{""}.\cr +#' by default : code{"AS+"} (print Status). To specify no options : `""`.\cr #' (Options supplémentaires des fichiers en sortie. Valeur -#' par défaut du package : \code{"AS+"} (affichage du statut). Pour ne -#' spécifier aucune option, \code{""}.) +#' par défaut du package : `"AS+"` (affichage du statut). Pour ne +#' spécifier aucune option, `""`.) #' @param gointeractive Boolean, if TRUE will open a Tau-Argus window and launch -#' the batch in it (\code{FALSE} by default). \cr +#' the batch in it (`FALSE` by default). \cr #' (Possibilité de lancer le batch depuis le menu de Tau-Argus -#' (\code{FALSE} par défaut).) +#' (`FALSE` par défaut).) #' #' @return A list containing two elements : #' the arb file name and the output name (usefull if the name is generated randomly) \cr diff --git a/R/tab_rda.R b/R/tab_rda.R index 375b899..93aab5d 100644 --- a/R/tab_rda.R +++ b/R/tab_rda.R @@ -2,22 +2,19 @@ creer_hst <- function(tabular, explanatory_vars, value, secret_var, - secret_prim, + secret_no_pl, cost_var, ip, separator) { - if(any(!is.null(c(secret_var,secret_prim)))){ - if(is.null(secret_prim)){ - secret_prim <- secret_var - } - - if(is.null(secret_var)){ - secret_var <- secret_prim + if(!is.null(secret_var)){ + if(is.null(secret_no_pl)){ + tabular$secret_no_pl <- FALSE + secret_no_pl <- "secret_no_pl" } } - if((!is.null(secret_var))) { + if(!is.null(secret_var)) { tabular$label_apriori <-ifelse(tabular[[secret_var]],"u","s") tab_hst_secret = tabular[ @@ -28,13 +25,17 @@ creer_hst <- function(tabular, #Genere le fichier hst lié au coût - if ((!is.null(cost_var)) ){ + if (!is.null(cost_var)){ tabular$label_apriori <-paste0("c",separator,tabular[[cost_var]]) - if(any(!is.null(c(secret_var,secret_prim)))){ + if(!is.null(secret_var)){ + + is_cost_value_identical <- tabular[[cost_var]] == tabular[[value]] + is_secret_val <- tabular[[secret_var]] + tab_hst_cost = tabular[ - tabular[[secret_var]], + !(is_secret_val | is_cost_value_identical), c(explanatory_vars[(explanatory_vars %in% colnames(tabular))],"label_apriori") ] } else { @@ -45,10 +46,10 @@ creer_hst <- function(tabular, } else {tab_hst_cost <- data.frame()} - if ((!is.null(ip)) & (is.numeric(ip)) & any(!is.null(c(secret_var,secret_prim)))) { - tabular$val_ip <- ifelse(tabular[[secret_prim]] & (tabular[[value]] != 0), - round((ip/100)*tabular[[value]],1), - format(0.00001,scientific = F)) + if ((!is.null(ip)) & (is.numeric(ip)) & !is.null(secret_var)) { + tabular$val_ip <- ifelse(tabular[[secret_no_pl]] | (tabular[[value]] == 0), + format(0.00001,scientific = F), + round((ip/100)*tabular[[value]],1)) tabular$label_apriori <- paste0("pl",separator,tabular[["val_ip"]],separator,tabular[["val_ip"]]) @@ -98,7 +99,7 @@ write_rda_1var_tab <- function(info_var) { } -#' @importFrom dplyr %>% + write_rda_tab <- function(info_vars) { @@ -111,9 +112,9 @@ write_rda_tab <- function(info_vars) { } info_vars <- lapply(info_vars, chemin_complet) - vapply(info_vars, write_rda_1var_tab, character(1)) %>% - gsub("(\n)+", "\n", .) %>% # plusieurs sauts de lignes par un seul - sub("\n$", "", .) # supprime dernier saut de ligne + res <- vapply(info_vars, write_rda_1var_tab, character(1)) + res <- gsub("(\n)+", "\n", res) # plusieurs sauts de lignes par un seul + res <- sub("\n$", "", res) # supprime dernier saut de ligne } @@ -127,10 +128,10 @@ write_rda_tab <- function(info_vars) { #' un fichier tabular (tab) et un fichier de métadonnées #' (rda) à partir de données tabulées et d'informations additionnelles. #' -#' @param tabular [\strong{mandatory}] +#' @param tabular #' data.frame which contains the tabulated data and #' an additional boolean variable that indicates the primary secret of type boolean \cr -#' ([\strong{obligatoire}] data.frame contenant les données tabulées et +#' ( data.frame contenant les données tabulées et #' une variable supplémentaire indiquant le secret primaire de type booléen.) #' @param tab_filename tab file name (with .tab extension) \cr #' nom du fichier tab (avec extension .tab) @@ -138,14 +139,14 @@ write_rda_tab <- function(info_vars) { #' nom du fichier rda (avec extension) #' @param hst_filename hst file name (with .hst extension) \cr #' nom du fichier hst (avec extension) -#' @param explanatory_vars [\strong{mandatory}] Vector of explanatory variables \cr -#' [\strong{obligatoire}] Variables catégorielles, sous forme de vecteurs \cr -#' Example : \code{c("A21", "TREFF", "REG")} for a table crossing -#' \code{A21} x \code{TREFF} x \code{REG} +#' @param explanatory_vars Vector of explanatory variables \cr +#' Variables catégorielles, sous forme de vecteurs \cr +#' Example : `c("A21", "TREFF", "REG")` for a table crossing +#' `A21` x `TREFF` x `REG` #' (Variable indiquant le secret primaire de type booléen: #' prend la valeur "TRUE" quand les cellules du tableau doivent être masquées #' par le secret primaire, "FALSE" sinon. Permet de créer un fichier d'apriori) -#' @param secret_var Boolean variable which specifies the secret, primary or not : +#' @param secret_var Nae of the boolean variable which specifies the secret, primary or not : #' equal to "TRUE" if a cell is concerned by the secret,"FALSE" otherwise. #' will be exported in the apriori file. \cr #' (Variable indiquant le secret de type booléen: @@ -169,17 +170,17 @@ write_rda_tab <- function(info_vars) { #' @param totcode Code(s) which represent the total of a categorical variable #' (see section 'Specific parameters' for this parameter's syntax). #' If unspecified for a variable(neither by default nor explicitly) -#' it will be set to \code{rtauargus.totcode}. \cr +#' it will be set to `rtauargus.totcode`. \cr #' (Code(s) pour le total d'une variable catégorielle (voir #' section 'Specific parameters' pour la syntaxe de ce paramètre). Les #' variables non spécifiées (ni par défaut, ni explicitement) se verront -#' attribuer la valeur de \code{rtauargus.totcode}.) +#' attribuer la valeur de `rtauargus.totcode`.) #' @param value Name of the column containing the value of the cells. \cr #' (Nom de la colonne contenant la valeur des cellules) #' @param freq Name of the column containing the cell frequency. \cr #' (Nom de la colonne contenant les effectifs pour une cellule) -#' @param ip Value of the safety margin in \% (must be an integer). -#' (Valeur pour les intervalles de protection en \%, doit être entier ) +#' @param ip Value of the safety margin in % (must be an integer). +#' (Valeur pour les intervalles de protection en %, doit être entier ) #' @param maxscore Name of the column containing, the value of the largest #' contributor of a cell. \cr #' (Nom de la colonne contenant la valeur du plus gros contributeur @@ -202,9 +203,10 @@ write_rda_tab <- function(info_vars) { #' (see section 'Specific parameters' for the syntax of this parameter). \cr #' (Fichier(s) contenant les libellés des variables catégorielles #' (voir section 'Specific parameters' pour la syntaxe de ce paramètre).) -#' @param secret_prim Boolean variable which gives the primary secret : equal to -#' "TRUE" if a cell is concerned by the primary secret,"FALSE" otherwise. -#' will be exported in the apriori file \cr +#' @param secret_no_pl name of a boolean variable which indicates the cells +#' on which the protection levels won't be applied. If `secret_no_pl = NULL` +#' (default), the protection levels are applied on each cell which gets a `TRUE` +#' status for the `secret_var`.\cr #' #' @return Return the rda file name as a list (invisible).\cr #' (Renvoie le nom du fichier rda sous forme de liste (de @@ -216,7 +218,7 @@ write_rda_tab <- function(info_vars) { #' The apriori file (.hst) summarizes for each value of the table #' if they are concerned by the primary secret or not. #' With this file tau-argus will not need to set the primary secret itself. -#' The parameter \code{secret_var} indicates the name of the primary secret variable. +#' The parameter `secret_var` indicates the name of the primary secret variable. #' If there is the additional boolean variable which indicates the primary secret #' in the table (of tabulated data), the function tab_rda will create #' an apriori file in a format conforming to tauargus. \cr @@ -226,7 +228,7 @@ write_rda_tab <- function(info_vars) { #' du tableau si elles sont concernées par le secret primaire ou non. #' Avec ce fichier tau-argus n'aura plus besoin de poser le secret primaire lui même, #' il se basera sur le fichier d'apriori pour le faire. -#' Le paramètre \code{secret_var} indique le nom de la variable du secret primaire. +#' Le paramètre `secret_var` indique le nom de la variable du secret primaire. #' Si l'on rajoute cette variable supplémentaire indiquant #' le secret primaire (de type booléen) au tableau de données tabulées, la fonction #' tab_rda permet de créer un fichier d'apriori au format conforme pour tauargus. @@ -234,13 +236,13 @@ write_rda_tab <- function(info_vars) { #' #' @section Specific parameters: #' -#' The parameters \code{totcode}, and \code{codelist} +#' The parameters `totcode`, and `codelist` #' must be given in the form of a vector indicating the value to take for each variable. #' The names of the elements of the vector give the variable concerned and #' the elements of the vector give the value of the parameter for Tau-Argus. #' An unnamed element will set the default value for each variable. \cr #' -#' (Les paramètres \code{totcode}, et \code{codelist} +#' (Les paramètres `totcode`, et `codelist` #' sont à renseigner sous la forme d'un vecteur indiquant la valeur à prendre #' pour chaque variable. #' @@ -251,18 +253,18 @@ write_rda_tab <- function(info_vars) { #' #' For example : #' \itemize{ -#' \item{\code{totcode = "global"} : writes \code{ "global"} for each +#' \item{`totcode = "global"` : writes ` "global"` for each #' explanatory vars} -#' \item{\code{totcode = c("global", size="total", income="total")} : -#' \code{ "global"} for each variable except for \code{size} and -#' \code{income}} assigned with \code{ "total"} +#' \item{`totcode = c("global", size="total", income="total")` : +#' ` "global"` for each variable except for `size` and +#' `income`} assigned with ` "total"` #' by default : { "Total"} -#' \item{\code{totcode = "global"} : écrit \code{ "global"} pour +#' \item{`totcode = "global"` : écrit ` "global"` pour #' toutes les variables catégorielles} -#' \item{\code{totcode = c("global", size="total", income="total")} : -#' \code{ "global"} pour toutes les variables catégorielles -#' sauf \code{size} and \code{income}} qui se verront affecter -#' le total : \code{ "total"} +#' \item{`totcode = c("global", size="total", income="total")` : +#' ` "global"` pour toutes les variables catégorielles +#' sauf `size` and `income`} qui se verront affecter +#' le total : ` "total"` #' Par defaut : { "Total"} #' } #' @@ -270,46 +272,46 @@ write_rda_tab <- function(info_vars) { #' #' @section Hierarchical variables: #' -#' Parameter \code{hrc} has the same syntax as \code{totcode} and -#' \code{codelist} (named vector containing as many elements as variables to describe). -#' Hierarchy is defined in an separate hrc file (\strong{hiercodelist}). +#' Parameter `hrc` has the same syntax as `totcode` and +#' `codelist` (named vector containing as many elements as variables to describe). +#' Hierarchy is defined in an separate hrc file (**hiercodelist**). #' which can be written with the function \code{link{write_hrc2}}. -#' The function expects the location of this file (and a possible \code{hierleadstring} +#' The function expects the location of this file (and a possible `hierleadstring` #' if it differs from the default option of the package : @. #' The path to the existing file is explicitly given. #' The elements of the vector in parameter must be named (with the name of the variable), #' even if there is only one element. #' -#' emph{Example :}\code{c(category="category.hrc")} \cr +#' emph{Example :}`c(category="category.hrc")` \cr #' -#' (Le paramètre \code{hrc} obéit aux mêmes règles de syntaxe que \code{totcode} -#' et \code{codelist} (vecteur nommé contenant autant d'éléments +#' (Le paramètre `hrc` obéit aux mêmes règles de syntaxe que `totcode` +#' et `codelist` (vecteur nommé contenant autant d'éléments #' que de variables à décrire). #' -#' La hiérarchie est définie dans un fichier hrc à part (\strong{hiercodelist}) -#' qui peut être écrit à l'aide de la fonction \code{\link{write_hrc2}}. +#' La hiérarchie est définie dans un fichier hrc à part (**hiercodelist**) +#' qui peut être écrit à l'aide de la fonction [write_hrc2()]. #' -#' La fonction attend l'emplacement de ce fichier (et un éventuel \code{hierleadstring} +#' La fonction attend l'emplacement de ce fichier (et un éventuel `hierleadstring` #' s'il diffère de l'option par défaut du package). #' Le chemin vers le fichier existant est explicitement donné. #' Les éléments du vecteur en paramètre doivent nommés (avec le nom de la variable), #' même s'il n'y a qu'un seul élément. #' -#'\emph{Exemple :}\code{c(category="category.hrc")}) +#'*Exemple :*`c(category="category.hrc")`) #' #' #' @section Number of decimals: -#' Parameter \code{decimals} indicates the minimum number of decimal places to +#' Parameter `decimals` indicates the minimum number of decimal places to #' include in the output file -#' (whatever the number of decimals actually present in \code{tabular}). +#' (whatever the number of decimals actually present in `tabular`). #' It applies to all real variables (double) but not to integer variables. -#' To add zeros to an integer variable, convert it with \code{as.double} beforehand.\cr +#' To add zeros to an integer variable, convert it with `as.double` beforehand.\cr #' -#' (Le paramètre \code{decimals} indique le nombre minimal de décimales à faire +#' (Le paramètre `decimals` indique le nombre minimal de décimales à faire #' figurer dans le fichier en sortie (quel que soit le nombre de décimales -#' effectivement présent dans \code{tabular}). Il s'applique à toutes les +#' effectivement présent dans `tabular`). Il s'applique à toutes les #' variables réelles (double) mais pas aux variables entières (integer). Pour -#' ajouter des zéros à une variable entière, la convertir avec \code{as.double} +#' ajouter des zéros à une variable entière, la convertir avec `as.double` #' au préalable.) #' #' @examples @@ -383,7 +385,7 @@ tab_rda <- function( hierleadstring = getOption("rtauargus.hierleadstring"), codelist = NULL, separator = getOption("rtauargus.separator"), - secret_prim = NULL + secret_no_pl = NULL ){ @@ -442,7 +444,7 @@ tab_rda <- function( col_tabular <- c( explanatory_vars, secret_var, - secret_prim, + secret_no_pl, cost_var, value, freq, @@ -476,17 +478,21 @@ tab_rda <- function( if((!is.null(secret_var)) && any(is.na(tabular[[secret_var]]))){ stop("NAs in secret_var are not allowed") } - #Controles sur secret_prim, identiques à secret_var - if ((!is.null(secret_prim)) && (!secret_prim %in% colnames(tabular))){ - stop("secret_prim does not exist in tabular") + if(is.null(secret_var) && !is.null(secret_no_pl)){ + stop("protection levels needs to be applied to primary secret, specify + secret_var") + } + #Controles sur secret_no_pl, identiques à secret_var + if ((!is.null(secret_no_pl)) && (!secret_no_pl %in% colnames(tabular))){ + stop("secret_no_pl does not exist in tabular") } - if((!is.null(secret_prim)) && (any(!is.na(tabular[[secret_prim]]))) && (!is.logical(tabular[[secret_prim]]))){ - stop("unexpected type : secret_prim must be a boolean variable") + if((!is.null(secret_no_pl)) && (!is.logical(tabular[[secret_no_pl]]))){ + stop("unexpected type : secret_no_pl must be a boolean variable") } - if((!is.null(secret_prim)) && any(is.na(tabular[[secret_prim]]))){ - stop("NAs in secret_prim are not allowed") + if((!is.null(secret_no_pl)) && any(is.na(tabular[[secret_no_pl]]))){ + stop("NAs in secret_no_pl are not allowed") } # Controles sur cost_var @@ -516,18 +522,18 @@ tab_rda <- function( #Genere le fichier hst lié au secret primaire - if(any(!is.null(c(ip,secret_var,secret_prim,cost_var)))){ + if(any(!is.null(c(ip,secret_var,cost_var)))){ hst <- creer_hst (tabular, explanatory_vars, value, secret_var, - secret_prim, + secret_no_pl, cost_var, ip, separator) - if( !is.null(secret_var) | !is.null(cost_var)| !is.null(secret_prim)) { + if( !is.null(secret_var) | !is.null(cost_var)| !is.null(secret_no_pl)) { if (nrow(hst)==0) message("no cells are unsafe : hst file is empty") utils::write.table( @@ -544,7 +550,7 @@ tab_rda <- function( # genere fichier longueur fixe (le fichier .tab) dans le dossier indiqué et infos associees ..................... if (!is.null(secret_var)) tabular<-tabular[,!names(tabular)==secret_var] - if (!is.null(secret_prim)) tabular<-tabular[,!names(tabular)==secret_prim] + if (!is.null(secret_no_pl)) tabular<-tabular[,!names(tabular)==secret_no_pl] if (!is.null(cost_var)) tabular<-tabular[,!names(tabular)==cost_var] tabular <- tabular[,c(explanatory_vars,value,freq,maxscore,maxscore_2,maxscore_3)] diff --git a/R/tab_rtauargus.R b/R/tab_rtauargus.R index 8b01304..89ffaa4 100644 --- a/R/tab_rtauargus.R +++ b/R/tab_rtauargus.R @@ -1,4 +1,7 @@ -#' All in once for tabular +#' Protect one table by suppressing cells with Tau-Argus +#' +#' The function prepares all the files needed by Tau-Argus and launches the +#' software with the good settings and gets back the result. #' #' @inheritParams tab_rda #' @inheritParams tab_arb @@ -9,23 +12,47 @@ #' @param dir_name string indicated the path of the directory in which to save #' all the files (.rda, .hst, .txt, .arb, .csv) generated by the function. #' @param unif_labels boolean, if explanatory variables have to be standardized +#' @param split_tab `r lifecycle::badge("experimental")` boolean, +#' whether to reduce dimension to 3 while treating a table of dimension 4 or 5 +#' (default to `FALSE`) +#' @param limit `r lifecycle::badge("experimental")` numeric, used to choose +#' which variable to merge (if nb_tab_option = 'smart') +#' and split table with a number of row above this limit in order to avoid +#' tauargus failures +#' @param nb_tab_option `r lifecycle::badge("experimental")` strategy to follow +#' to choose variables automatically while splitting: +#' \itemize{ +#' \item{`"min"`: minimize the number of tables;} +#' \item{`"max"`: maximize the number of tables;} +#' \item{`"smart"`: minimize the number of tables under the constraint +#' of their row count.} +#' } #' @param ... any parameter of the tab_rda, tab_arb or run_arb functions, relevant #' for the treatment of tabular. #' #' @return -#' If output_type equals to 4, then the original tabular is returned with a new +#' If output_type equals to 4 and split_tab = FALSE, +#' then the original tabular is returned with a new #' column called Status, indicating the status of the cell coming from Tau-Argus : #' "A" for a primary secret due to frequency rule, "B" for a primary secret due #' to dominance rule, "D" for secondary secret and "V" for no secret cell. #' -#' If output_type doesn't equal to 4, then the raw result from tau-argus is returned. +#' If split_tab = TRUE, +#' then the original tabular is returned with some new columns which are boolean +#' variables indicating the status of a cell at each iteration of the protection +#' process as we get with `tab_multi_manager()` function. `TRUE` +#' denotes a cell that have to be suppressed. The last column is then the +#' final status of the suppression process of the original table. +#' +#' If `split_tab = FALSE` and `output_type` doesn't equal to `4`, +#' then the raw result from tau-argus is returned. #' #' @section Standardization of explanatory variables and hierarchies: #' -#' The boolean argument \code{unif_labels} is useful to +#' The boolean argument `unif_labels` is useful to #' prevent some common errors in using Tau-Argus. Indeed, Tau-Argus needs that, #' within a same level of a hierarchy, the labels have the same number of -#' characters. When the argument is set to TRUE, \code{tab_rtauargus} +#' characters. When the argument is set to TRUE, `tab_rtauargus` #' standardizes the explanatory variables to prevent this issue. #' Hierarchical explanatory variables (explanatory variables associated to #' a hrc file) are then modified in the tabular data and an another hrc file is @@ -53,7 +80,7 @@ #' # Compute the secondary secret ---- #' options( #' rtauargus.tauargus_exe = -#' "Y:/Logiciels/TauArgus/TauArgus4.2.2b1/TauArgus.exe" +#' "Y:/Logiciels/TauArgus/TauArgus4.2.3/TauArgus.exe" #' ) #' #' res <- tab_rtauargus( @@ -68,17 +95,31 @@ #' freq = "N_OBS", #' verbose = FALSE #' ) +#' +#' # Reduce dims feature +#' +#' data(datatest1) +#' res_dim4 <- tab_rtauargus( +#' tabular = datatest1, +#' dir_name = "tauargus_files", +#' explanatory_vars = c("A10", "treff","type_distrib","cj"), +#' totcode = rep("Total", 4), +#' secret_var = "is_secret_prim", +#' value = "pizzas_tot_abs", +#' freq = "nb_obs_rnd", +#' split_tab = TRUE +#' ) #' } #' @export tab_rtauargus <- function( tabular, + explanatory_vars, files_name = NULL, dir_name = NULL, - explanatory_vars, totcode = getOption("rtauargus.totcode"), hrc = NULL, secret_var = NULL, - secret_prim = NULL, + secret_no_pl = NULL, cost_var = NULL, value = "value", freq = "freq", @@ -90,6 +131,9 @@ tab_rtauargus <- function( output_type = 4, output_options = "", unif_labels = TRUE, + split_tab = FALSE, + nb_tab_option = "smart", + limit = 14700, ... ){ @@ -129,9 +173,57 @@ tab_rtauargus <- function( names(totcode) <- explanatory_vars } - if(is.null(files_name)) files_name <- paste0("tau_argus_file_", format.Date(Sys.time(), format = '%Y_%m_%d_%H:%M:%S')) + if(is.null(files_name)) files_name <- "targus_file" if(is.null(dir_name)) dir_name <- getwd() + if (split_tab){ + # detect secret_var = NULL + # We want to split the table but the primary secret have not been posed + if ( !grepl("MAN", safety_rules) ){ + stop("While using split_tab = TRUE, you can't use tauargus to put primary secret") + } + if ( is.null(secret_var) ){ + stop("While using split_tab = TRUE, a secret_var has to be provided") + } + # split_tab strategy only work with dimension 4 or 5 tables + if ( ! length(explanatory_vars) %in% c(4,5) ){ + stop( + "You use split_tab = TRUE. However it only works with 4 or 5 dimensions + tables." + ) + } + } + + if (length(explanatory_vars) %in% c(4,5)){ + if (split_tab){ + + params_rt4 <- formals(fun = "tab_rtauargus4") + params_rt4 <- params_rt4[1:(length(params_rt4)-1)] + call <- sys.call(); call[[1]] <- as.name('list') + new_params <- eval.parent(call) + + for(param in intersect(names(params_rt4), names(new_params))){ + params_rt4[[param]] <- new_params[[param]] + } + + params_rt4$tabular <- tabular + params_rt4$totcode <- totcode + params_rt4$dir_name <- dir_name + params_rt4$files_name <- files_name + + return(do.call("tab_rtauargus4", params_rt4)) + + } else { + message("Warning : +It is highly recommended to use split_tab = TRUE when using rtauargus with 4 or 5 dimensions tables. +It allows to split the table in several tables with 3 dimensions. + +With split_tab = FALSE, tauargus treats the table in 4 or 5 dimensions. +In this case, the secondary secret may not being optimal according to tauargus itself +and the process may take longer.") + } + } + ## 1. TAB_RDA ..................... tabular_original <- tabular @@ -148,13 +240,13 @@ tab_rtauargus <- function( param_tab_rda$tabular <- tabular param_tab_rda$tab_filename <- file.path(dir_name, paste0(files_name, ".tab")) param_tab_rda$rda_filename <- file.path(dir_name, paste0(files_name, ".rda")) - param_tab_rda$hst_filename <- if(is.null(secret_var) & is.null(cost_var) & is.null(secret_prim)) NULL else file.path(dir_name, paste0(files_name, ".hst")) + param_tab_rda$hst_filename <- if(is.null(secret_var) & is.null(cost_var)) NULL else file.path(dir_name, paste0(files_name, ".hst")) param_tab_rda$explanatory_vars <- explanatory_vars param_tab_rda$hrc <- hrc param_tab_rda$totcode <- totcode param_tab_rda$secret_var <- secret_var - param_tab_rda$secret_prim <- secret_prim + param_tab_rda$secret_no_pl <- secret_no_pl param_tab_rda$cost_var <- cost_var param_tab_rda$value <- value param_tab_rda$freq <- freq @@ -238,11 +330,11 @@ tab_rtauargus <- function( } -#' Wrapper of tab_rtauargus adapted for \code{tab_multi_manager} function. +#' Wrapper of tab_rtauargus adapted for `tab_multi_manager` function. #' #' @inheritParams tab_rtauargus #' @param ip Interval Protection Level (10 by default) -#' @param ... Other arguments of \code{tab_rtauargus} function +#' @param ... Other arguments of `tab_rtauargus` function #' #' @return #' The original tabular is returned with a new @@ -250,7 +342,7 @@ tab_rtauargus <- function( #' "A" for a primary secret due to frequency rule, "B" for a primary secret due #' to dominance rule, "D" for secondary secret and "V" for no secret cell. #' -#' @seealso \code{tab_rtauargus} +#' @seealso `tab_rtauargus` #' #' @export #' @@ -275,7 +367,7 @@ tab_rtauargus <- function( #' # Compute the secondary secret ---- #' options( #' rtauargus.tauargus_exe = -#' "Y:/Logiciels/TauArgus/TauArgus4.2.2b1/TauArgus.exe" +#' "Y:/Logiciels/TauArgus/TauArgus4.2.3/TauArgus.exe" #' ) #' #' res <- tab_rtauargus2( @@ -289,6 +381,19 @@ tab_rtauargus <- function( #' value = "TOT", #' freq = "N_OBS" #' ) +#' +#' # reduce dimensions feature +#' data(datatest1) +#' res_dim4 <- tab_rtauargus2( +#' tabular = datatest1, +#' dir_name = "tauargus_files", +#' explanatory_vars = c("A10", "treff","type_distrib","cj"), +#' totcode = rep("Total", 4), +#' secret_var = "is_secret_prim", +#' value = "pizzas_tot_abs", +#' freq = "nb_obs_rnd", +#' split_tab = TRUE +#' ) #' } tab_rtauargus2 <- function( tabular, @@ -298,14 +403,18 @@ tab_rtauargus2 <- function( totcode, hrc = NULL, secret_var = NULL, - secret_prim = NULL, + secret_no_pl = NULL, cost_var = NULL, value = "value", freq = "freq", ip = 10, suppress = "MOD(1,5,1,0,0)", + split_tab = TRUE, + nb_tab_option = "smart", + limit = 14700, ... ){ + .dots = list(...) params <- param_function(tab_rtauargus, .dots) @@ -316,7 +425,7 @@ tab_rtauargus2 <- function( params$totcode = totcode params$hrc = hrc params$secret_var = secret_var - params$secret_prim = secret_prim + params$secret_no_pl = secret_no_pl params$cost_var = cost_var params$value = value params$freq = freq @@ -325,10 +434,14 @@ tab_rtauargus2 <- function( stop("Either ip or safety_rules has to be set.") } }else{ - params$safety_rules = paste0("MAN(",ip,")") + params$safety_rules = "MAN(0)" #paste0("MAN(",ip,")") } params$ip = ip params$suppress = suppress + params$split_tab = if(length(explanatory_vars) > 3) split_tab else FALSE + params$dir_name = if(params$split_tab) file.path(dir_name, files_name) else dir_name + params$nb_tab_option = nb_tab_option + params$limit = limit params$show_batch_console = FALSE params$output_type = 4 params$output_options = "" @@ -336,6 +449,18 @@ tab_rtauargus2 <- function( params$separator = "," params$verbose = FALSE - do.call("tab_rtauargus", params) + res <- do.call("tab_rtauargus", params) + + if(params$split_tab & length(explanatory_vars) > 3){ + vars_secret_iteration <- + names(res)[grepl("^is_secret_[1-9].*$", names(res), perl=TRUE)] + last_iteration <- names(res)[length(names(res))] + res$Status <- ifelse(res[[secret_var]], "B", + ifelse(res[[last_iteration]], "D", "V") + ) + res <- res[,c(setdiff(names(res), vars_secret_iteration))] + } + + return(res) } diff --git a/R/tabul_group_fun.R b/R/tabul_group_fun.R new file mode 100644 index 0000000..8df7375 --- /dev/null +++ b/R/tabul_group_fun.R @@ -0,0 +1,181 @@ +################################################################################ +# Le programme ci-dessous propose une fonction pour construire un tableau +# croisant plusieurs variables catégorielles avec l'ensemble des marges +# nécessaire. Ce code devra être adapté aux cas d'usages réels +################################################################################ + +tabul_group_dt <- function( + df, + group_var = NULL, + pond_var = NULL, + exten_var, + resp_var, + marge_label = "Ensemble" +){ + + ponderation = nb_obs = nb_obs.tot = NULL # due to NSE notes in R CMD check + + data_dt <- as.data.table(df) + data_dt$ponderation <- if(is.null(pond_var)) 1 else data_dt[[pond_var]] + data_dt$nb_obs <- 1 + + summary_spec <- function(x, ponderation) list(tot = sum(x*ponderation, na.rm=TRUE), max = max(x, na.rm=TRUE)) + + res <- data_dt[ + , + as.list(unlist(lapply(.SD, function(x) summary_spec(x, ponderation)))), + by = group_var, + .SDcols = c(resp_var, "nb_obs") + ] + res[,nb_obs := nb_obs.tot][,`:=`(nb_obs.max = NULL, nb_obs.tot = NULL)] + + names(res) <- sapply(names(res), function(x) gsub("[.]","_",x)) + + var_marges <- base::setdiff(exten_var, group_var) + for(var_cat in var_marges){ + res[[var_cat]] <- marge_label + } + + if(is.null(resp_var)){ + res <- res[, .SD, .SDcols = c(exten_var, "nb_obs")] + }else{ + res <- res[, .SD, .SDcols = c(exten_var, "nb_obs", paste0(resp_var,"_tot"), paste0(resp_var,"_max"))] + } + + return(res) +} + +all_croisements <- function(vec){ + l <- do.call("c", lapply(seq_along(vec), function(i) combn(vec, i, FUN = list))) + l[[length(l)+1]] <- "" + return(l) +} + +tabul_group_all_margins <- function( + df, + cat_var = NULL, + pond_var = NULL, + resp_var = NULL, + marge_label = "Total" +){ + + croisements <- all_croisements(cat_var) + + res <- rbindlist( + lapply( + croisements, + function(groups){ + tabul_group_dt( + df = df, + group_var = if(groups[1] == "") NULL else groups, + pond_var = pond_var, + exten_var = cat_var, + resp_var = resp_var, + marge_label = marge_label + ) + }), + use.names=TRUE + ) + + if(is.null(resp_var)){ + res[, .SD, .SDcols = c(cat_var, "nb_obs")] + }else{ + res[, .SD, .SDcols = c(cat_var, "nb_obs", paste0(resp_var,"_tot"), paste0(resp_var,"_max"))] + } + + return(res) +} + +#' tabulate grouped data with all margins, handling hierarchical variables +#' +#' @param df data.frame or data.table +#' @param cat_vars vector of categorical variables but not hierarchical +#' @param hrc_vars named list (name = VAR final name, value = VAR current names) +#' @param pond_var weight (NULL if no weight is used) +#' @param resp_var vector of response variables (NULL to only compute frequency table) +#' @param marge_label label of margins (applied to all cat and hrc variables) +#' +#' @return a tibble +#' @export +#' +#' @examples +#' library(data.table) +#' +#' data("indiv_dt") +#' +#' #Non hierarchical variables +#' res_all_dtp <- tabulate_micro_data( +#' df = indiv_dt, +#' #categorical but not hierarchical variables +#' cat_vars = c("A10", "SIZE","CJ"), +#' #weight var +#' pond_var = "WEIGHT", +#' #response variable +#' resp_var = "TURNOVER", +#' # Labels of the margins +#' marge_label = "Total" +#' ) +#' str(res_all_dtp) +#' +#' #With one hierarchical variable +#' res_all_dtph <- tabulate_micro_data( +#' df = indiv_dt, +#' #categorical but not hierarchical variables +#' cat_vars = c("SIZE","CJ"), +#' #categorical nested variables +#' hrc_vars = list(ACTIVITY = c("A10","A21")), +#' pond_var = "WEIGHT", +#' resp_var = c("TURNOVER","PRODUCTION"), +#' marge_label = "Total" +#' ) +#' str(res_all_dtph) +#' +#' @rawNamespace import(data.table, except = transpose) +tabulate_micro_data <- function( + df, + cat_vars = NULL, + hrc_vars = NULL, + pond_var = NULL, + resp_var = NULL, + marge_label = "Total" +){ + setDT(df) + + if(is.null(hrc_vars)){ + result <- tabul_group_all_margins( + df = df, + cat_var = cat_vars, + pond_var, + resp_var, + marge_label + ) + }else{ + + hrc_vars_l <- do.call( + "expand.grid", + args = list(hrc_vars, stringsAsFactors = FALSE) + ) %>% + as.list() %>% + purrr::transpose() %>% + purrr::map(unlist) + + result_l <- purrr::map( + hrc_vars_l, + function(hrc){ + res <- tabul_group_all_margins( + df = df, + cat_var = unname(c(hrc, cat_vars)), + pond_var, + resp_var, + marge_label + ) + names(res)[names(res) %in% hrc] <- names(hrc) + return(res[,.SD, .SDcols = ]) + } + ) + result <- rbindlist(result_l, use.names = TRUE) + } + + return(unique(result)) +} + diff --git a/R/util.R b/R/util.R index 2f6f03d..5d37791 100644 --- a/R/util.R +++ b/R/util.R @@ -107,7 +107,7 @@ param_function <- function(f, list_param) { list_param[f_param_names] } -# vecteur des variables nécessaires pour rtauargus() +# vecteur des variables nécessaires pour micro_rtauargus() used_var <- function(explanatory_vars, weight_var = NULL, diff --git a/R/writehrc.R b/R/writehrc.R index 72fdeea..4c4b9da 100644 --- a/R/writehrc.R +++ b/R/writehrc.R @@ -70,7 +70,7 @@ vect_aro <- Vectorize(arobase, vectorize.args = c("string", "number")) #' value mentionned in the package options (i.e. "@" at the package startup). #' \cr #' Caractère unique repérant le niveau de profondeur dans le .hrc -#' @param adjust_unique_roots boolean. If TRUE will add fictional roots to the +#' @param adjust_unique_roots boolean. If TRUE (default) will add fictional roots to the #' correspondence table, by doing so there will be no unique roots in the hrc file. #' With tabular function, unique roots are not handled by Tau-Argus. \cr #' Si TRUE la fonction va ajouter des feuilles fictives au fichier .hrc afin @@ -98,13 +98,13 @@ vect_aro <- Vectorize(arobase, vectorize.args = c("string", "number")) #' This function mimicks some of its rigidities. #' \cr #' -#' 1 \strong{Ideal case} +#' 1 **Ideal case** #' #' Here is how a correspondence table is assumed to look like: #' #' \tabular{lll}{ -#' \strong{type} \tab \strong{details} \cr -#' \code{-------} \tab \code{------} \cr +#' **type** \tab **details** \cr +#' `-------` \tab `------` \cr #' planet \tab telluric \cr #' planet \tab gasgiant \cr #' star \tab bluestar \cr @@ -124,10 +124,10 @@ vect_aro <- Vectorize(arobase, vectorize.args = c("string", "number")) #' are correctly protected (seek further documentation or help if needed). #' \cr #' -#' 2 \strong{Dealing with NAs} +#' 2 **Dealing with NAs** #' #' The write_hrc2 function has to be preferably used without any NAs in your -#' correspondence table. In presence of NAs, the \strong{sort} argument +#' correspondence table. In presence of NAs, the **sort** argument #' has to be to FALSE. Indeed, NAs would be sorted together and, thus, #' be separated from their expected place in the hierarchy. #' @@ -137,13 +137,13 @@ vect_aro <- Vectorize(arobase, vectorize.args = c("string", "number")) #' Please be careful when dealing with NAs and check thoroughly the #' resulting .hrc file, or consider filling in NAs beforehand. #' -#' 2.1 \emph{Sparse hierarchies} \cr +#' 2.1 *Sparse hierarchies* \cr #' Hierarchy is sparse when NAs are inserted instead of repeating under a given #' level. #' #' \tabular{lll}{ -#' \strong{type} \tab \strong{details} \cr -#' \code{-------} \tab \code{------} \cr +#' **type** \tab **details** \cr +#' `-------` \tab `------` \cr #' planet \tab telluric \cr #' \tab gasgiant \cr #' star \tab bluestar \cr @@ -154,15 +154,15 @@ vect_aro <- Vectorize(arobase, vectorize.args = c("string", "number")) #' } #' #' Such cases still issue a warning for the presence of NAs, but do not pose -#' any problem, if \strong{sort=FALSE} is set. +#' any problem, if **sort=FALSE** is set. #' -#' 2.2 \emph{Non-uniform hierarchies}\cr +#' 2.2 *Non-uniform hierarchies*\cr #' Hierarchies with non-uniform depth happen when some levels are not detailed #' to the lowest detail, creating NAs. #' #' \tabular{lll}{ -#' \strong{type} \tab \strong{details} \cr -#' \code{-------} \tab \code{------} \cr +#' **type** \tab **details** \cr +#' `-------` \tab `------` \cr #' planet \tab telluric \cr #' planet \tab gasgiant \cr #' star \tab \cr @@ -171,21 +171,21 @@ vect_aro <- Vectorize(arobase, vectorize.args = c("string", "number")) #' } #' #' Processing such a file will generate an error with the following messages: -#' \emph{Missing values on the last column of the correspondence table is not allowed. -#' If relevant, you could fill in with the value of the previous column} +#' *Missing values on the last column of the correspondence table is not allowed. +#' If relevant, you could fill in with the value of the previous column* #' #' @section Détails sur les tables de correspondance et le .hrc: #' Tau-Argus attend des fichiers écrits avec précision. Certaines de ses #' rigidités sont reproduites par cette fonction. #' \cr #' -#' 1 \strong{Cas idéal} +#' 1 **Cas idéal** #' #' Voici l'aspect général que devrait avoir une table de correspondance : #' #' \tabular{lll}{ -#' \strong{type} \tab \strong{details} \cr -#' \code{-------} \tab \code{------} \cr +#' **type** \tab **details** \cr +#' `-------` \tab `------` \cr #' planet \tab telluric \cr #' planet \tab gasgiant \cr #' star \tab bluestar \cr @@ -208,10 +208,10 @@ vect_aro <- Vectorize(arobase, vectorize.args = c("string", "number")) #' ou chercher de l'aide). #' \cr #' -#' 2 \strong{Valeurs manquantes} +#' 2 **Valeurs manquantes** #' #' La fonction write_hrc2 doit être utilisée de préférence sans aucun NA dans votre -#' table de correspondance. En présence de NAs, l'argument \strong{sort} +#' table de correspondance. En présence de NAs, l'argument **sort** #' doit être à FALSE. En effet, les NAs seraient triés ensemble et, donc, #' être séparées de leur place attendue dans la hiérarchie. #' @@ -222,13 +222,13 @@ vect_aro <- Vectorize(arobase, vectorize.args = c("string", "number")) #' Soyez prudent lorsque vous manipulez des NA et vérifiez soigneusement #' le fichier .hrc résultant ou envisagez de remplir les NAs à l'avance. #' -#' 2.1 \emph{Hiérarchies creuses} \cr +#' 2.1 *Hiérarchies creuses* \cr #' Une hiérarchie est creuse si des NAs sont insérées au lieu de répéter un #' niveau donné verticalement. #' #' \tabular{lll}{ -#' \strong{type} \tab \strong{details} \cr -#' \code{-------} \tab \code{------} \cr +#' **type** \tab **details** \cr +#' `-------` \tab `------` \cr #' planet \tab telluric \cr #' \tab gasgiant \cr #' star \tab bluestar \cr @@ -239,16 +239,16 @@ vect_aro <- Vectorize(arobase, vectorize.args = c("string", "number")) #' } #' #' De tels cas émettent toujours un avertissement du fait de la présence de NA, -#' mais ne posent aucun problème, si on utilise \strong{sort=FALSE}. +#' mais ne posent aucun problème, si on utilise **sort=FALSE**. #' -#' 2.2 \emph{Hiérarchies non-uniformes}\cr +#' 2.2 *Hiérarchies non-uniformes*\cr #' Les hiérarchies à profondeur non-uniforme correspondent aux cas où certains #' niveaux ne sont pas détaillés jusqu'au bout, la fin de certaines lignes étant #' manquante. #' #' \tabular{lll}{ -#' \strong{type} \tab \strong{details} \cr -#' \code{-------} \tab \code{------} \cr +#' **type** \tab **details** \cr +#' `-------` \tab `------` \cr #' planet \tab telluric \cr #' planet \tab gasgiant \cr #' star \tab \cr @@ -257,8 +257,8 @@ vect_aro <- Vectorize(arobase, vectorize.args = c("string", "number")) #' } #' #' Le traitement d'un tel fichier générera une erreur avec les messages suivants : -#' \emph{Missing values on the last column of the correspondence table is not allowed. -#' If relevant, you could fill in with the value of the previous column} +#' *Missing values on the last column of the correspondence table is not allowed. +#' If relevant, you could fill in with the value of the previous column* #' #' @return Invisible. Path to the written .hrc file. #' \cr @@ -343,7 +343,7 @@ write_hrc2 <- function( sort_table = FALSE, rev = FALSE, hier_lead_string = getOption("rtauargus.hierleadstring"), - adjust_unique_roots = FALSE, + adjust_unique_roots = TRUE, add_char = "ZZZ" ){ @@ -357,7 +357,7 @@ write_hrc2 <- function( file_name <- givenfilename }else{ dir <- dirname(path = file_name) - if(!dir.exists(dir)) dir.create(dir, recursive = FALSE) + if(!dir.exists(dir)) dir.create(dir, recursive = TRUE) } # if(is.null(dir_name)){ @@ -407,7 +407,7 @@ write_hrc2 <- function( corr_table <- na.locf(corr_table) } - if(adjust_unique_roots==TRUE){ + if(adjust_unique_roots==TRUE & ncol(corr_table) > 1){ # warning(paste0("If there is unique roots in the table, the function will create # fictional roots to adjust the hrc file for Tau-Argus, they will be created # by copying the unique roots and adding ",add_char," at the beginning @@ -433,91 +433,110 @@ write_hrc2 <- function( if(length(suspects) > 0) message("Note : the following columns are not of character type : ", colnames(corr_table)[suspects], ". There may be an issue reading the table.") #### Creating the hrc file + loc_file <- ifelse(length(grep(".hrc$", file_name)) == 0, paste0(file_name,".hrc"), file_name) + + # 00. Case of a one column table + if(ncol(corr_table) == 1){ + + utils::write.table( + x = if(sort_table) corr_table[order(corr_table[,1]),, drop=FALSE] else corr_table, + file = loc_file, + quote = FALSE, + row.names = FALSE, + col.names = FALSE, + sep = "", + eol = "\n" + ) + invisible(loc_file) + }else{ - # 0. Sort the correspondence table - if (sort_table){ - for (j in 1:d[2]){ - corr_table <- corr_table[ - order(corr_table[,d[2]-j+1]) - ,] - # CORR JJ à vérifier - # sort the table is not efficient if there are NA values ! - # corr_table <- corr_table[ - # order(corr_table[,1]) - # ,] + # 0. Sort the correspondence table + if (sort_table){ + for (j in 1:d[2]){ + corr_table <- corr_table[ + order(corr_table[,d[2]-j+1]) + ,] + # CORR JJ à vérifier + # sort the table is not efficient if there are NA values ! + # corr_table <- corr_table[ + # order(corr_table[,1]) + # ,] + } } - } - # 0.b Remove total if needed - if(length(unique(as.character(corr_table[,1]))) == 1){ - corr_table <- corr_table[,-1] - } + # 0.b Remove total if needed + if(length(unique(as.character(corr_table[,1]))) == 1){ + corr_table <- corr_table[,-1] + } - # 1. Compare cell values in order to erase duplicates (vertically / horizontally) + # 1. Compare cell values in order to erase duplicates (vertically / horizontally) - corr_table_decale <- rbind( - rep("line1"), - corr_table[1:(d[1]-1),] - ) - corr_table_dec_left <- cbind( - w = rep("col1"), - corr_table[,1:d[2]-1] - ) + corr_table_decale <- rbind( + rep("line1"), + corr_table[1:(d[1]-1),] + ) + corr_table_dec_left <- cbind( + w = rep("col1"), + corr_table[,1:d[2]-1] + ) - compare <- corr_table == corr_table_decale #<-- cells identical to their upper - # neighbour - compare_left <- corr_table == corr_table_dec_left - missing <- is.na(corr_table) + compare <- corr_table == corr_table_decale #<-- cells identical to their upper + # neighbour + compare_left <- corr_table == corr_table_dec_left + missing <- is.na(corr_table) - # 2. Add a fitting number of hier_lead_string to all + # 2. Add a fitting number of hier_lead_string to all - depth_table <- as.data.frame( - matrix(0:(d[2]-1),nrow = d[1], ncol = d[2], byrow = TRUE) - ) - - # the numeric values (from 0 to d2 -1) correspond to the depth in the - # hierarchy, which will govern how many hier_lead_string are added when - # writing the hrc. - # One adjustment has to be done for cases when a same level is repeated - # in a line : - - compare_col <- t(apply( - compare_left, - MARGIN = 1, - cumsum - )) - depth_table <- depth_table - compare_col - - for(col in 1:d[2]){ - corr_table[,col] <- vect_aro( - string = corr_table[,col], - number = depth_table[,col], - hier_lead_string + depth_table <- as.data.frame( + matrix(0:(d[2]-1),nrow = d[1], ncol = d[2], byrow = TRUE) ) - } - corr_table[compare] <- "" - corr_table[compare_left] <- "" - corr_table[missing] <- "" + # the numeric values (from 0 to d2 -1) correspond to the depth in the + # hierarchy, which will govern how many hier_lead_string are added when + # writing the hrc. + # One adjustment has to be done for cases when a same level is repeated + # in a line : + + compare_col <- t(apply( + compare_left, + MARGIN = 1, + cumsum + )) + depth_table <- depth_table - compare_col + + for(col in 1:d[2]){ + corr_table[,col] <- vect_aro( + string = corr_table[,col], + number = depth_table[,col], + hier_lead_string + ) + } + + corr_table[compare] <- "" + corr_table[compare_left] <- "" + corr_table[missing] <- "" - # 3. Write corresponding table - # Note that columns & cells are not separated by anything, but cells that have - # not been erased still hold a line break ("\n") so that there will be line - # breaks only after non-void characters. + # 3. Write corresponding table + # Note that columns & cells are not separated by anything, but cells that have + # not been erased still hold a line break ("\n") so that there will be line + # breaks only after non-void characters. - loc_file <- ifelse(length(grep(".hrc$", file_name)) == 0, paste0(file_name,".hrc"), file_name) - utils::write.table( - x = corr_table, - file = loc_file, - quote = FALSE, - row.names = FALSE, - col.names = FALSE, - sep = "", - eol = "" - ) - invisible(loc_file) + utils::write.table( + x = corr_table, + file = loc_file, + quote = FALSE, + row.names = FALSE, + col.names = FALSE, + sep = "", + eol = "" + ) + + invisible(loc_file) + } + + } diff --git a/README.Rmd b/README.Rmd index 04e63f2..903c49c 100644 --- a/README.Rmd +++ b/README.Rmd @@ -8,14 +8,14 @@ knitr::opts_chunk$set( collapse = TRUE, comment = "#>", - fig.path = "man/figures/" + fig.path = "man/figures/README-" ) ``` # rtauargus -[![pipeline status](https://gitlab.insee.fr/outilsconfidentialite/rtauargus/badges/master/pipeline.svg)](https://gitlab.insee.fr/outilsconfidentialite/rtauargus/-/pipelines) + @@ -163,5 +163,5 @@ same directory, too. To go further, you can consult the latest version of the The functions of *rtauargus* calling τ-Argus require that this software be accessible from the workstation. The download of τ-Argus is done on the [dedicated page](https://github.com/sdcTools/tauargus/releases) of the *sdcTools* git repository. _The package was developed on the basis of open source versions of τ-Argus -(versions 4.2 and above), in particular the latest version available at the -time of development (4.2.2b1). It is not compatible with version 3.5.**_ +(versions 4.2 and above), in particular the version used for this version is +τ-Argus 4.2.3. It is not compatible with version 3.5.**_ diff --git a/README.html b/README.html new file mode 100644 index 0000000..ce1b1a1 --- /dev/null +++ b/README.html @@ -0,0 +1,761 @@ + + + + + + + + + + + + + + + + + + + + + +

rtauargus +

+ + + + + + + + +

Run τ-Argus from R

+

The rtauargus package provides an R +interface for τ-Argus.

+

It allows to:

+
    +
  • create inputs (rda, arb, hst and tab files) from data in R format +;
  • +
  • generate the sequence of instructions to be executed in batch mode +(arb file);
  • +
  • launch a τ-Argus batch in command line;
  • +
  • retrieve the results in R.
  • +
+

These different operations can be executed in one go, but also in a +modular way. They allow to integrate the tasks performed by τ-Argus in a +processing chain written in R.

+

The package presents other additional +functionalities, such as:

+
    +
  • managing the protection of several tables at once;
  • +
  • creating a hierarchical variable from correspondence table.
  • +
+

It’s possible to choose a tabular or microdata approach, but the +tabular one is, from now on, encouraged.

+

Installation

+
    +
  • most recent stable version (recommended)

    +
      +
    • For Insee agents:

      +
      install.packages(
      +  "rtauargus",
      +  repos = "https://nexus.insee.fr/repository/r-public",
      +  type = "source"
      +)
    • +
    • Elsewhere:

      +
      install.packages("remotes")
      +remotes::install_github(
      +  "InseeFrLab/rtauargus",
      +  build_vignettes = FALSE,
      +  upgrade = "never"
      +)
    • +
  • +
  • version in development

  • +
+

To install a specific version, add to the directory a reference (commit +or tag), for +example "inseefrlab/rtauargus@v-0.4.1".

+

Simple example

+

When loading the package, the console displays some information:

+
library(rtauargus)
+

In particular, a plausible location for the τ-Argus software is +predefined. This can be changed for the duration of the R session, as +follows:

+
loc_tauargus <- "Y:/Logiciels/TauArgus/TauArgus4.2.2b1/TauArgus.exe"
+options(rtauargus.tauargus_exe = loc_tauargus)
+

With this small adjustment done, the package is ready to be used.

+

For the following demonstration, a fictitious table will be used:

+
act_size <-
+  data.frame(
+    ACTIVITY = c("01","01","01","02","02","02","06","06","06","Total","Total","Total"),
+    SIZE = c("tr1","tr2","Total","tr1","tr2","Total","tr1","tr2","Total","tr1","tr2","Total"),
+    VAL = c(100,50,150,30,20,50,60,40,100,190,110,300),
+    N_OBS = c(10,5,15,2,5,7,8,6,14,20,16,36),
+    MAX = c(20,15,20,20,10,20,16,38,38,20,38,38)
+  )
+

As primary rules, we use the two following ones:

+
    +
  • The n-k dominance rule with n=1 and k = 85
  • +
  • The minimum frequency rule with n = 3 and a safety range of 10.
  • +
+

To get the results for the dominance rule, we need to specify the +largest contributor to each cell, corresponding to the MAX +variable in the tabular data.

+
ex1 <- tab_rtauargus(
+  act_size,
+  dir_name = "tauargus_files",
+  files_name = "ex1",
+  explanatory_vars = c("ACTIVITY","SIZE"),
+  safety_rules = "FREQ(3,10)|NK(1,85)",
+  value = "VAL",
+  freq = "N_OBS",
+  maxscore = "MAX",
+  totcode = c(ACTIVITY="Total",SIZE="Total")
+)
+#> Start of batch procedure; file: Z:\SDC\OutilsConfidentialite\rtauargus\tauargus_files\ex1.arb
+#> <OPENTABLEDATA> "Z:\SDC\OutilsConfidentialite\rtauargus\tauargus_files\ex1.tab"
+#> <OPENMETADATA> "Z:\SDC\OutilsConfidentialite\rtauargus\tauargus_files\ex1.rda"
+#> <SPECIFYTABLE> "ACTIVITY""SIZE"|"VAL"||
+#> <SAFETYRULE> FREQ(3,10)|NK(1,85)
+#> <READTABLE> 1
+#> Tables have been read
+#> <SUPPRESS> MOD(1,5,1,0,0)
+#> Start of the modular protection for table ACTIVITY x SIZE | VAL
+#> End of modular protection. Time used 0 seconds
+#>                    Number of suppressions: 2
+#> <WRITETABLE> (1,4,,"Z:\SDC\OutilsConfidentialite\rtauargus\tauargus_files\ex1.csv")
+#> Table: ACTIVITY x SIZE | VAL has been written
+#>                    Output file name: Z:\SDC\OutilsConfidentialite\rtauargus\tauargus_files\ex1.csv
+#> End of TauArgus run
+

By default, the function displays in the console the logbook content +in which user can read all steps run by τ-Argus. This can be retrieved +in the logbook.txt file. With verbose = FALSE, the function +can be silenced.

+

By default, the function returns the original dataset with one +variable more, called Status, directly resulting from +τ-Argus and describing the status of each cell as follows:

+

-A: primary secret cell because of frequency rule;
+-B: primary secret cell because of dominance rule (1st +contributor);
+-C: primary secret cell because of frequency rule (more +contributors in case when n>1);
+-D: secondary secret cell;
+-V: valid cells - no need to mask.

+
ex1
+#>    ACTIVITY  SIZE VAL N_OBS MAX Status
+#> 1        01 Total 150    15  20      V
+#> 2        01   tr1 100    10  20      V
+#> 3        01   tr2  50     5  15      V
+#> 4        02 Total  50     7  20      V
+#> 5        02   tr1  30     2  20      A
+#> 6        02   tr2  20     5  10      D
+#> 7        06 Total 100    14  38      V
+#> 8        06   tr1  60     8  16      D
+#> 9        06   tr2  40     6  38      B
+#> 10    Total Total 300    36  38      V
+#> 11    Total   tr1 190    20  20      V
+#> 12    Total   tr2 110    16  38      V
+

All the files generated by the function are written in the specified +directory (dir_name argument). The default format for the +protected table is csv but it can be changed. All the τ-Argus files +(.tab, .rda, .arb and .txt) are written in the same directory, too. To +go further, you can consult the latest version of the τ-Argus manual is +downloadable here: https://research.cbs.nl/casc/Software/TauManualV4.1.pdf.

+

A detailed overview is available via +vignette("rtauargus").

+

Important notes

+

The functions of rtauargus calling τ-Argus require that this +software be accessible from the workstation. The download of τ-Argus is +done on the dedicated page +of the sdcTools git repository.

+

_The package was developed on the basis of open source versions of +τ-Argus (versions 4.2 and above), in particular the version used for +this version is τ-Argus 4.2.3. It is not compatible with version +3.5.**_

+ + + diff --git a/README.md b/README.md index 9e202e2..47bc70a 100644 --- a/README.md +++ b/README.md @@ -4,11 +4,8 @@ # rtauargus - -[![pipeline -status](https://gitlab.insee.fr/outilsconfidentialite/rtauargus/badges/master/pipeline.svg)](https://gitlab.insee.fr/outilsconfidentialite/rtauargus/-/pipelines) + - ## Run τ-Argus from R @@ -17,11 +14,11 @@ The *rtauargus* package provides an **R** interface for **τ-Argus**. It allows to: -- create inputs (rda, arb, hst and tab files) from data in R format ; -- generate the sequence of instructions to be executed in batch mode - (arb file); -- launch a τ-Argus batch in command line; -- retrieve the results in R. +- create inputs (rda, arb, hst and tab files) from data in R format ; +- generate the sequence of instructions to be executed in batch mode + (arb file); +- launch a τ-Argus batch in command line; +- retrieve the results in R. These different operations can be executed in one go, but also in a modular way. They allow to integrate the tasks performed by τ-Argus in a @@ -29,38 +26,38 @@ processing chain written in R. The package presents other **additional functionalities**, such as: -- managing the protection of several tables at once; -- creating a hierarchical variable from correspondence table. +- managing the protection of several tables at once; +- creating a hierarchical variable from correspondence table. It’s possible to choose a tabular or microdata approach, but the tabular one is, from now on, encouraged. ## Installation -- **most recent stable version** (recommended) +- **most recent stable version** (recommended) - - For Insee agents: + - For Insee agents: - ``` r - install.packages( - "rtauargus", - repos = "https://nexus.insee.fr/repository/r-public", - type = "source" - ) - ``` + ``` r + install.packages( + "rtauargus", + repos = "https://nexus.insee.fr/repository/r-public", + type = "source" + ) + ``` - - Elsewhere: + - Elsewhere: - ``` r - install.packages("remotes") - remotes::install_github( - "InseeFrLab/rtauargus", - build_vignettes = FALSE, - upgrade = "never" - ) - ``` + ``` r + install.packages("remotes") + remotes::install_github( + "InseeFrLab/rtauargus", + build_vignettes = FALSE, + upgrade = "never" + ) + ``` -- **version in development** +- **version in development** To install a specific version, add to the directory a reference ([commit](https://github.com/inseefrlab/rtauargus/commits/master) or @@ -73,18 +70,6 @@ When loading the package, the console displays some information: ``` r library(rtauargus) -#> -#> Tau-Argus : "Y:/Logiciels/TauArgus/TauArgus.exe" -#> (note: unknown location) -#> -#> To change this directory : -#> options(rtauargus.tauargus_exe = "chemin/vers/TauArgus.exe") -#> -#> To return to the default location, -#> reset_rtauargus_options("tauargus_exe") -#> -#> To display all the options of the package : -#> rtauargus_options() ``` In particular, a plausible location for the τ-Argus software is @@ -113,8 +98,8 @@ act_size <- As primary rules, we use the two following ones: -- The n-k dominance rule with n=1 and k = 85 -- The minimum frequency rule with n = 3 and a safety range of 10. +- The n-k dominance rule with n=1 and k = 85 +- The minimum frequency rule with n = 3 and a safety range of 10. To get the results for the dominance rule, we need to specify the largest contributor to each cell, corresponding to the `MAX` variable in @@ -199,6 +184,6 @@ the [dedicated page](https://github.com/sdcTools/tauargus/releases) of the *sdcTools* git repository. \_The package was developed on the basis of open source versions of -τ-Argus (versions 4.2 and above), in particular the latest version -available at the time of development (4.2.2b1). It is not compatible -with version 3.5.\*\*\_ +τ-Argus (versions 4.2 and above), in particular the version used for +this version is τ-Argus 4.2.3. It is not compatible with version +3.5.\*\*\_ diff --git a/_pkgdown.yml b/_pkgdown.yml index 09946a5..60f2d71 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -22,6 +22,22 @@ reference: - write_hrc2 - tab_rtauargus - tab_rtauargus2 + - title: Functions ensuring the dimensions reduction in the background + desc: > + Functions called by tab_rtauargus or tab_multi_manager while setting + split_tab = TRUE, in order to protect 4/5 dimensions tables by + reducing the dimension first. + contents: + - tab_rtauargus4 + - reduce_dims + - var_to_merge + - nb_tab_generated + - length_tabs + - from_4_to_3 + - from_5_to_3 + - title: Data + desc: > + Data to run examples - activity_corr_table - nuts23_fr_corr_table - turnover_act_cj @@ -29,12 +45,14 @@ reference: - turnover_nuts_cj - turnover_nuts_size - turnover_act_nuts_size + - datatest1 + - datatest2 - title: Protect tables from microdata desc: > Functions for protecting magnitude and frequency tables, i.e. from a microdataset. Original way to proceed but no longer the favored one. contents: - - rtauargus + - micro_rtauargus - rtauargus_plus - title: Step by step functions desc: "Mostly internal functions" @@ -60,6 +78,8 @@ articles: - protect_multi_tables_fr - options_safety_rules - options_safety_rules_fr + - split_tab + - split_tab_fr - title: Work with microdata data (old version) navbar: micro-wise contents: diff --git a/data/datatest1.rda b/data/datatest1.rda new file mode 100644 index 0000000..d2640f2 Binary files /dev/null and b/data/datatest1.rda differ diff --git a/data/datatest2.rda b/data/datatest2.rda new file mode 100644 index 0000000..968d941 Binary files /dev/null and b/data/datatest2.rda differ diff --git a/data/indiv_dt.rda b/data/indiv_dt.rda new file mode 100644 index 0000000..e504d1e Binary files /dev/null and b/data/indiv_dt.rda differ diff --git a/man/activity_corr_table.Rd b/man/activity_corr_table.Rd index df0a754..84aafdb 100644 --- a/man/activity_corr_table.Rd +++ b/man/activity_corr_table.Rd @@ -7,9 +7,9 @@ \format{ A data frame with 92 rows and 3 variables: \describe{ - \item{A10}{business sectors in 10 categories} - \item{A21}{business sectors in 21 categories} - \item{A88}{business sectors in 88 categories} +\item{A10}{business sectors in 10 categories} +\item{A21}{business sectors in 21 categories} +\item{A88}{business sectors in 88 categories} } } \usage{ diff --git a/man/datatest1.Rd b/man/datatest1.Rd new file mode 100644 index 0000000..2ce673d --- /dev/null +++ b/man/datatest1.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{datatest1} +\alias{datatest1} +\title{data crossing 4 categorical variables, none are hierarchical.} +\format{ +A tibble/data frame with 689 rows and 12 variables: +\describe{ +\item{A10}{business sector, not hierarchical} +\item{cj}{legal category, not hierarchical} +\item{type_distrib}{type of distribution, not hierarchical} +\item{treff}{Number of employees (categorical), not hierarchical} +\item{nb_obs}{Frequency, number of companies} +\item{nb_obs_rnd}{Frequency rounded, number of companies} +\item{pizzas_tot}{turnover value in euros} +\item{pizzas_tot_abs}{turnover absolute value in euros} +\item{pizzas_max}{turnover max value in euros} +\item{is_secret_freq}{Boolean, TRUE if primary secret for frequency rule} +\item{is_secret_dom}{Boolean, TRUE if primary secret for dominance rule} +\item{is_secret_prim}{Boolean, TRUE if primary secret for any rule} + +} +} +\usage{ +datatest1 +} +\description{ +data crossing 4 categorical variables, none are hierarchical. +} +\keyword{datasets} diff --git a/man/datatest2.Rd b/man/datatest2.Rd new file mode 100644 index 0000000..be9802f --- /dev/null +++ b/man/datatest2.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{datatest2} +\alias{datatest2} +\title{data crossing 5 categorical variables, none are hierarchical.} +\format{ +A tibble/data frame with 5 612 rows and 15 variables: +\describe{ +\item{A10}{business sector, not hierarchical} +\item{cj}{legal category, not hierarchical} +\item{type_distrib}{type of distribution, not hierarchical} +\item{treff}{Number of employees (categorical), not hierarchical} +\item{nuts1}{NUTS region, no hierarchical} +\item{nb_obs}{Frequency, number of companies} +\item{nb_obs_rnd}{Frequency rounded, number of companies} +\item{pizzas_tot}{turnover value in euros} +\item{pizzas_tot_abs}{turnover absolute value in euros} +\item{pizzas_max}{turnover max value in euros} +\item{is_secret_freq}{Boolean, TRUE if primary secret for frequency rule} +\item{is_secret_dom}{Boolean, TRUE if primary secret for dominance rule} +\item{is_secret_prim}{Boolean, TRUE if primary secret for any rule} + +} +} +\usage{ +datatest2 +} +\description{ +data crossing 5 categorical variables, none are hierarchical. +} +\keyword{datasets} diff --git a/man/figures/lifecycle-archived.svg b/man/figures/lifecycle-archived.svg new file mode 100644 index 0000000..48f72a6 --- /dev/null +++ b/man/figures/lifecycle-archived.svg @@ -0,0 +1 @@ + lifecyclelifecyclearchivedarchived \ No newline at end of file diff --git a/man/figures/lifecycle-defunct.svg b/man/figures/lifecycle-defunct.svg new file mode 100644 index 0000000..01452e5 --- /dev/null +++ b/man/figures/lifecycle-defunct.svg @@ -0,0 +1 @@ +lifecyclelifecycledefunctdefunct \ No newline at end of file diff --git a/man/figures/lifecycle-deprecated.svg b/man/figures/lifecycle-deprecated.svg new file mode 100644 index 0000000..4baaee0 --- /dev/null +++ b/man/figures/lifecycle-deprecated.svg @@ -0,0 +1 @@ +lifecyclelifecycledeprecateddeprecated \ No newline at end of file diff --git a/man/figures/lifecycle-experimental.svg b/man/figures/lifecycle-experimental.svg new file mode 100644 index 0000000..d1d060e --- /dev/null +++ b/man/figures/lifecycle-experimental.svg @@ -0,0 +1 @@ +lifecyclelifecycleexperimentalexperimental \ No newline at end of file diff --git a/man/figures/lifecycle-maturing.svg b/man/figures/lifecycle-maturing.svg new file mode 100644 index 0000000..df71310 --- /dev/null +++ b/man/figures/lifecycle-maturing.svg @@ -0,0 +1 @@ +lifecyclelifecyclematuringmaturing \ No newline at end of file diff --git a/man/figures/lifecycle-questioning.svg b/man/figures/lifecycle-questioning.svg new file mode 100644 index 0000000..08ee0c9 --- /dev/null +++ b/man/figures/lifecycle-questioning.svg @@ -0,0 +1 @@ +lifecyclelifecyclequestioningquestioning \ No newline at end of file diff --git a/man/figures/lifecycle-stable.svg b/man/figures/lifecycle-stable.svg new file mode 100644 index 0000000..e015dc8 --- /dev/null +++ b/man/figures/lifecycle-stable.svg @@ -0,0 +1 @@ +lifecyclelifecyclestablestable \ No newline at end of file diff --git a/man/figures/lifecycle-superseded.svg b/man/figures/lifecycle-superseded.svg new file mode 100644 index 0000000..75f24f5 --- /dev/null +++ b/man/figures/lifecycle-superseded.svg @@ -0,0 +1 @@ + lifecyclelifecyclesupersededsuperseded \ No newline at end of file diff --git a/man/from_4_to_3.Rd b/man/from_4_to_3.Rd new file mode 100644 index 0000000..3acd48f --- /dev/null +++ b/man/from_4_to_3.Rd @@ -0,0 +1,108 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sp_from_4_to_3.R +\name{from_4_to_3} +\alias{from_4_to_3} +\title{Function reducing from 4 to 3 categorical variables} +\usage{ +from_4_to_3( + dfs, + dfs_name, + totcode, + hrcfiles = NULL, + sep_dir = FALSE, + hrc_dir = "hrc_alt", + v1 = NULL, + v2 = NULL, + sep = "_", + maximize_nb_tabs = FALSE +) +} +\arguments{ +\item{dfs}{data.frame with 4 categorical variables (n >= 2 in the general case)} + +\item{dfs_name}{name of the dataframe} + +\item{totcode}{named vector of totals for categorical variables} + +\item{hrcfiles}{named vector indicating the hrc files of hierarchical variables +among the categorical variables of dfs} + +\item{sep_dir}{allows forcing the writing of hrc into a separate folder, +default is FALSE} + +\item{hrc_dir}{folder to write hrc files if writing to a new folder is forced +or if no folder is specified in hrcfiles} + +\item{v1}{allows forcing the value of the first variable to merge, +not specified by default (NULL)} + +\item{v2}{allows forcing the value of the second variable to merge, +not specified by default (NULL)} + +\item{sep}{separator used during concatenation of variables} + +\item{maximize_nb_tabs}{specifies whether to prefer selecting hierarchical variables with +the most nodes in priority (TRUE), generating more tables but with smaller sizes, +or non-hierarchical variables with the fewest modalities (FALSE) to create fewer tables} +} +\value{ +A list containing the following components: +\itemize{ +\item \code{tabs}: named list of 3-dimensional dataframes +(n-1 dimensions in the general case) with nested hierarchies +\item \code{hrc}: named list of hrc specific to the variable created +through merging +\item \code{alt_tot}: named list of totals +\item \code{vars}: named list of vectors representing the merged variables +during the two stages of dimension reduction +} +} +\description{ +Function reducing from 4 to 3 categorical variables +} +\examples{ +library(dplyr) +data <- expand.grid( + ACT = c("Total", "A", "B", "A1", "A2", "B1", "B2"), + GEO = c("Total", "G1", "G2"), + SEX = c("Total", "F", "M"), + AGE = c("Total", "AGE1", "AGE2"), + stringsAsFactors = FALSE +) \%>\% + as.data.frame() + +data <- data \%>\% mutate(VALUE = 1) + +hrc_act <- "hrc_ACT.hrc" + +sdcHierarchies::hier_create(root = "Total", nodes = c("A","B")) \%>\% + sdcHierarchies::hier_add(root = "A", nodes = c("A1","A2")) \%>\% + sdcHierarchies::hier_add(root = "B", nodes = c("B1","B2")) \%>\% + sdcHierarchies::hier_convert(as = "argus") \%>\% + slice(-1) \%>\% + mutate(levels = substring(paste0(level,name),3)) \%>\% + select(levels) \%>\% + write.table(file = hrc_act, row.names = FALSE, col.names = FALSE, quote = FALSE) + +# Results of the function +res1 <- from_4_to_3( + dfs = data, + dfs_name = "tab", + totcode = c(SEX = "Total", AGE = "Total", GEO = "Total", ACT = "Total"), + hrcfiles = c(ACT = hrc_act), + sep_dir = TRUE, + hrc_dir = "output" +) + +# Maximize the number of tables +res2 <- from_4_to_3( + dfs = data, + dfs_name = "tab", + totcode = c(SEX = "Total", AGE = "Total", GEO = "Total", ACT = "Total"), + hrcfiles = c(ACT = hrc_act), + sep_dir = TRUE, + hrc_dir = "output", + maximize_nb_tabs = TRUE +) +} +\keyword{internal} diff --git a/man/from_4_to_3_case_0_hr.Rd b/man/from_4_to_3_case_0_hr.Rd new file mode 100644 index 0000000..9f6bf51 --- /dev/null +++ b/man/from_4_to_3_case_0_hr.Rd @@ -0,0 +1,79 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sp_from_4_to_3_case_0_hr.R +\name{from_4_to_3_case_0_hr} +\alias{from_4_to_3_case_0_hr} +\title{Transition from 4 to 3 variables by merging two non-hierarchical variables} +\usage{ +from_4_to_3_case_0_hr(dfs, dfs_name, v1, v2, totcode, dir_name, sep = "_") +} +\arguments{ +\item{dfs}{data.frame with 4 categorical variables (n >= 2 in the general case)} + +\item{dfs_name}{name of the data.frame in the list provided by the user} + +\item{v1}{non-hierarchical categorical variable} + +\item{v2}{non-hierarchical categorical variable} + +\item{totcode}{named vector of totals for categorical variables} + +\item{dir_name}{folder where to write the hrc files +if no folder is specified in hrcfiles} + +\item{sep}{separator used when concatenating variables} +} +\value{ +A list containing: +\itemize{ +\item \code{tabs}: named list of 3-dimensional dataframes +(n-1 dimensions in the general case) with nested hierarchies +\item \code{hrc}: named list of hrc specific to the variable created via merging +\item \code{alt_tot}: named list of totals +\item \code{vars}: named list of vectors representing the merged variables +during the two steps of dimension reduction +} +} +\description{ +Transition from 4 to 3 variables by merging two non-hierarchical variables +} +\examples{ +library(dplyr) +data <- expand.grid( + ACT = c("Total", "A", "B", "A1", "A2", "B1", "B2"), + SEX = c("Total", "F", "M","F1","F2","M1","M2"), + AGE = c("Total", "AGE1", "AGE2", "AGE11", "AGE12", "AGE21", "AGE22"), + ECO = c("PIB","Households","Companies"), + stringsAsFactors = FALSE, + KEEP.OUT.ATTRS = FALSE +) \%>\% + as.data.frame() + +data <- data \%>\% mutate(VALUE = 1:n()) + +hrc_act <- "hrc_ACT.hrc" +sdcHierarchies::hier_create(root = "Total", nodes = c("A","B")) \%>\% + sdcHierarchies::hier_add(root = "A", nodes = c("A1","A2")) \%>\% + sdcHierarchies::hier_convert(as = "argus") \%>\% + slice(-1) \%>\% + mutate(levels = substring(paste0(level,name),3)) \%>\% + select(levels) \%>\% + write.table(file = hrc_act, row.names = FALSE, col.names = FALSE, quote = FALSE) + +hrc_sex <- "hrc_SEX.hrc" +sdcHierarchies::hier_create(root = "Total", nodes = c("F","M")) \%>\% + sdcHierarchies::hier_add(root = "F", nodes = c("F1","F2")) \%>\% + sdcHierarchies::hier_add(root = "M", nodes = c("M1","M2")) \%>\% + sdcHierarchies::hier_convert(as = "argus") \%>\% + slice(-1) \%>\% + mutate(levels = substring(paste0(level,name),3)) \%>\% + select(levels) \%>\% + write.table(file = hrc_sex, row.names = FALSE, col.names = FALSE, quote = FALSE) + +res1 <- from_4_to_3_case_0_hr(dfs = data, + dfs_name = "dfs_name", + v1 = "ECO",v2 = "AGE", + totcode = c(ACT = "Total",SEX = "Total", + AGE = "Total",ECO = "PIB"), + dir_name = "output") +} +\keyword{internal} diff --git a/man/from_4_to_3_case_1_hr.Rd b/man/from_4_to_3_case_1_hr.Rd new file mode 100644 index 0000000..a74b9a4 --- /dev/null +++ b/man/from_4_to_3_case_1_hr.Rd @@ -0,0 +1,94 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sp_from_4_to_3_case_1_hr.R +\name{from_4_to_3_case_1_hr} +\alias{from_4_to_3_case_1_hr} +\title{Transition from 4 to 3 variables by merging a hierarchical +and a non-hierarchical variable} +\usage{ +from_4_to_3_case_1_hr( + dfs, + dfs_name, + v1, + v2, + totcode, + hrcfiles, + dir_name, + sep = "_" +) +} +\arguments{ +\item{dfs}{data.frame with 4 categorical variables (n >= 2 in the general case)} + +\item{dfs_name}{name of the data.frame in the list provided by the user} + +\item{v1}{non-hierarchical categorical variable} + +\item{v2}{hierarchical categorical variable} + +\item{totcode}{named vector of totals for categorical variables} + +\item{hrcfiles}{named vector indicating the hrc files of hierarchical variables +among the categorical variables of dfs} + +\item{dir_name}{directory where to write the hrc files +if no folder is specified in hrcfiles} + +\item{sep}{separator used when concatenating variables} +} +\value{ +A list containing: +\itemize{ +\item \code{tabs}: named list of 3-dimensional dataframes +(n-1 dimensions in the general case) with nested hierarchies +\item \code{hrc}: named list of hrc specific to the variable created by fusion +\item \code{alt_tot}: named list of totals +\item \code{vars}: named list of vectors representing the merged variables +during the two stages of dimension reduction +} +} +\description{ +Transition from 4 to 3 variables by merging a hierarchical +and a non-hierarchical variable +} +\examples{ +library(dplyr) +data <- expand.grid( + ACT = c("Total", "A", "B", "A1", "A2", "B1", "B2"), + SEX = c("Total", "F", "M","F1","F2","M1","M2"), + AGE = c("Total", "AGE1", "AGE2", "AGE11", "AGE12", "AGE21", "AGE22"), + ECO = c("PIB","Ménages","Entreprises"), + stringsAsFactors = FALSE, + KEEP.OUT.ATTRS = FALSE +) \%>\% + as.data.frame() + +data <- data \%>\% mutate(VALUE = 1:n()) + +hrc_act <- "hrc_ACT.hrc" +sdcHierarchies::hier_create(root = "Total", nodes = c("A","B")) \%>\% + sdcHierarchies::hier_add(root = "A", nodes = c("A1","A2")) \%>\% + sdcHierarchies::hier_convert(as = "argus") \%>\% + slice(-1) \%>\% + mutate(levels = substring(paste0(level,name),3)) \%>\% + select(levels) \%>\% + write.table(file = hrc_act, row.names = FALSE, col.names = FALSE, quote = FALSE) + +hrc_sex <- "hrc_SEX.hrc" +sdcHierarchies::hier_create(root = "Total", nodes = c("F","M")) \%>\% + sdcHierarchies::hier_add(root = "F", nodes = c("F1","F2")) \%>\% + sdcHierarchies::hier_add(root = "M", nodes = c("M1","M2")) \%>\% + sdcHierarchies::hier_convert(as = "argus") \%>\% + slice(-1) \%>\% + mutate(levels = substring(paste0(level,name),3)) \%>\% + select(levels) \%>\% + write.table(file = hrc_sex, row.names = FALSE, col.names = FALSE, quote = FALSE) + +res1 <- from_4_to_3_case_1_hr(dfs = data, + dfs_name = "dfs_name", + v1 = "ECO",v2 = "SEX", + totcode = c(ACT = "Total",SEX = "Total", + AGE = "Total",ECO = "PIB"), + hrcfiles = c(ACT = hrc_act, SEX = hrc_sex), + dir_name = "output") +} +\keyword{internal} diff --git a/man/from_4_to_3_case_2_hr.Rd b/man/from_4_to_3_case_2_hr.Rd new file mode 100644 index 0000000..9e36bb0 --- /dev/null +++ b/man/from_4_to_3_case_2_hr.Rd @@ -0,0 +1,93 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sp_from_4_to_3_case_2_hr.R +\name{from_4_to_3_case_2_hr} +\alias{from_4_to_3_case_2_hr} +\title{Transition from 4 to 3 variables via the merging of two hierarchical variables} +\usage{ +from_4_to_3_case_2_hr( + dfs, + dfs_name, + v1, + v2, + totcode, + hrcfiles, + dir_name, + sep = "_" +) +} +\arguments{ +\item{dfs}{data.frame with 4 categorical variables (n >= 2 in the general case)} + +\item{dfs_name}{name of the data.frame in the list provided by the user} + +\item{v1}{hierarchical categorical variable} + +\item{v2}{hierarchical categorical variable} + +\item{totcode}{named vector of totals for categorical variables} + +\item{hrcfiles}{named vector indicating the hrc files of hierarchical variables +among the categorical variables of dfs} + +\item{dir_name}{folder where to write the hrc files +if no folder is specified in hrcfiles} + +\item{sep}{separator used during the concatenation of variables} +} +\value{ +A list containing the following components: +\itemize{ +\item \code{tabs}: named list of 3-dimensional dataframes +(n-1 dimensions in the general case) with nested hierarchies +\item \code{hrcs}: named list of hrc specific to the variable +created via the merge +\item \code{alt_tot}: named list of totals +\item \code{vars}: named list of vectors representing the merged variables +during the two stages of dimension reduction +} +} +\description{ +Transition from 4 to 3 variables via the merging of two hierarchical variables +} +\examples{ +library(dplyr) +data <- expand.grid( + ACT = c("Total", "A", "B", "A1", "A2", "B1", "B2"), + SEX = c("Total", "F", "M","F1","F2","M1","M2"), + AGE = c("Total", "AGE1", "AGE2", "AGE11", "AGE12", "AGE21", "AGE22"), + ECO = c("PIB","Ménages","Entreprises"), + stringsAsFactors = FALSE, + KEEP.OUT.ATTRS = FALSE +) \%>\% + as.data.frame() + +data <- data \%>\% mutate(VALUE = 1:n()) + +hrc_act <- "hrc_ACT.hrc" +sdcHierarchies::hier_create(root = "Total", nodes = c("A","B")) \%>\% + sdcHierarchies::hier_add(root = "A", nodes = c("A1","A2")) \%>\% + sdcHierarchies::hier_convert(as = "argus") \%>\% + slice(-1) \%>\% + mutate(levels = substring(paste0(level,name),3)) \%>\% + select(levels) \%>\% + write.table(file = hrc_act, row.names = FALSE, col.names = FALSE, quote = FALSE) + +hrc_sex <- "hrc_SEX.hrc" +sdcHierarchies::hier_create(root = "Total", nodes = c("F","M")) \%>\% + sdcHierarchies::hier_add(root = "F", nodes = c("F1","F2")) \%>\% + sdcHierarchies::hier_add(root = "M", nodes = c("M1","M2")) \%>\% + sdcHierarchies::hier_convert(as = "argus") \%>\% + slice(-1) \%>\% + mutate(levels = substring(paste0(level,name),3)) \%>\% + select(levels) \%>\% + write.table(file = hrc_sex, row.names = FALSE, col.names = FALSE, quote = FALSE) + +res <- from_4_to_3_case_2_hr(dfs = data, + dfs_name = "dfs_name", + v1 = "ACT",v2 = "SEX", + totcode = c(ACT = "Total",SEX = "Total", + AGE = "Total",ECO = "PIB"), + hrcfiles = c(ACT = hrc_act, SEX = hrc_sex), + dir_name = "output") +} +\keyword{internal} diff --git a/man/from_5_to_3.Rd b/man/from_5_to_3.Rd new file mode 100644 index 0000000..e4827cc --- /dev/null +++ b/man/from_5_to_3.Rd @@ -0,0 +1,147 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sp_from_5_to_3.R +\name{from_5_to_3} +\alias{from_5_to_3} +\title{Function reducing from 5 to 3 categorical variables} +\usage{ +from_5_to_3( + dfs, + dfs_name, + totcode, + hrcfiles = NULL, + sep_dir = FALSE, + hrc_dir = "hrc_alt", + v1 = NULL, + v2 = NULL, + v3 = NULL, + v4 = NULL, + sep = "_", + maximize_nb_tabs = FALSE, + verbose = FALSE +) +} +\arguments{ +\item{dfs}{data.frame with 5 categorical variables (n >= 3 in the general case)} + +\item{dfs_name}{name of the data.frame in the list provided by the user} + +\item{totcode}{named vector of totals for categorical variables} + +\item{hrcfiles}{named vector indicating the hrc files of hierarchical variables +among the categorical variables of dfs} + +\item{sep_dir}{allows forcing the writing of hrc files in a separate folder +defaulted to FALSE} + +\item{hrc_dir}{folder where to write the hrc files if forcing the writing +in a new folder or if no folder is specified in hrcfiles} + +\item{v1}{allows forcing the value of the first variable to merge +when reducing from 5 to 4 dimensions, not specified by default (NULL)} + +\item{v2}{allows forcing the value of the second variable to merge +when reducing from 5 to 4 dimensions, not specified by default (NULL)} + +\item{v3}{allows forcing the value of the first variable to merge +when reducing from 4 to 3 dimensions, not specified by default (NULL)} + +\item{v4}{allows forcing the value of the second variable to merge +when reducing from 4 to 3 dimensions, not specified by default (NULL)} + +\item{sep}{separator used during concatenation of variables} + +\item{maximize_nb_tabs}{specifies whether to prefer selecting hierarchical variables with +the most nodes as a priority (TRUE), which generates more tables +but of smaller size, or non-hierarchical variables with the least modality (FALSE) +to create fewer tables} + +\item{verbose}{prints the different steps of the function to notify +the user of the progress, mainly for the general function gen_tabs_5_4_to_3()} +} +\value{ +a list containing the following components: +\itemize{ +\item \code{tabs}: named list of dataframes with 3 dimensions +(n-2 dimensions in the general case) endowed with nested hierarchies +\item \code{hrcs5_4}: named list of hrc specific to the variable created +via the merge when reducing from 5 to 4 dimensions +\item \code{hrcs4_3}: named list of hrc specific to the variable created +via the merge when reducing from 4 to 3 dimensions +\item \code{alt_tot5_4}: named list of totals when reducing from 5 to 4 dimensions +\item \code{alt_tot4_3}: named list of totals when reducing from 4 to 3 dimensions +\item \code{vars}: named list of vectors representing the merged variables +during the two steps of dimension reduction +} +} +\description{ +Function reducing from 5 to 3 categorical variables +} +\examples{ +library(dplyr) +data <- expand.grid( + ACT = c("Total", "A", "B", "A1", "A2", "B1", "B2"), + GEO = c("Total", "GA", "GB", "GA1", "GA2", "GB1", "GB2"), + SEX = c("Total", "F", "M","F1","F2","M1","M2"), + AGE = c("Total", "AGE1", "AGE2", "AGE11", "AGE12", "AGE21", "AGE22"), + ECO = c("PIB","Ménages","Entreprises"), + stringsAsFactors = FALSE, + KEEP.OUT.ATTRS = FALSE +) \%>\% + as.data.frame() + +data <- data \%>\% mutate(VALUE = 1:n()) + +hrc_act <- "hrc_ACT.hrc" +sdcHierarchies::hier_create(root = "Total", nodes = c("A","B")) \%>\% + sdcHierarchies::hier_add(root = "A", nodes = c("A1","A2")) \%>\% + sdcHierarchies::hier_convert(as = "argus") \%>\% + slice(-1) \%>\% + mutate(levels = substring(paste0(level,name),3)) \%>\% + select(levels) \%>\% + write.table(file = hrc_act, row.names = FALSE, col.names = FALSE, quote = FALSE) + +hrc_geo <- "hrc_GEO.hrc" +sdcHierarchies::hier_create(root = "Total", nodes = c("GA","GB")) \%>\% + sdcHierarchies::hier_add(root = "GA", nodes = c("GA1","GA2")) \%>\% + sdcHierarchies::hier_add(root = "GB", nodes = c("GB1","GB2")) \%>\% + sdcHierarchies::hier_convert(as = "argus") \%>\% + slice(-1) \%>\% + mutate(levels = substring(paste0(level,name),3)) \%>\% + select(levels) \%>\% + write.table(file = hrc_geo, row.names = FALSE, col.names = FALSE, quote = FALSE) + +hrc_sex <- "hrc_SEX.hrc" +sdcHierarchies::hier_create(root = "Total", nodes = c("F","M")) \%>\% + sdcHierarchies::hier_add(root = "F", nodes = c("F1","F2")) \%>\% + sdcHierarchies::hier_add(root = "M", nodes = c("M1","M2")) \%>\% + sdcHierarchies::hier_convert(as = "argus") \%>\% + slice(-1) \%>\% + mutate(levels = substring(paste0(level,name),3)) \%>\% + select(levels) \%>\% + write.table(file = hrc_sex, row.names = FALSE, col.names = FALSE, quote = FALSE) + +# Results of the function +res1 <- from_5_to_3( + dfs = data, + dfs_name = "tab", + totcode = c(SEX="Total",AGE="Total", GEO="Total", ACT="Total", ECO = "PIB"), + hrcfiles = c(ACT = hrc_act, GEO = hrc_geo, SEX = hrc_sex), + sep_dir = TRUE, + hrc_dir = "output", + v1 = "ACT", + v2 = "AGE", + v3 = "SEX", + v4 = "ECO" +) + +res2 <- from_5_to_3( + dfs = data, + dfs_name = "tab", + totcode = c(SEX="Total",AGE="Total", GEO="Total", ACT="Total", ECO = "PIB"), + hrcfiles = c(ACT = hrc_act, GEO = hrc_geo, SEX = hrc_sex), + sep_dir = TRUE, + hrc_dir = "output", + verbose = TRUE +) +} +\keyword{internal} diff --git a/man/import.Rd b/man/import.Rd index d973a5c..e1db645 100644 --- a/man/import.Rd +++ b/man/import.Rd @@ -10,15 +10,15 @@ import(arb_filename) \item{arb_filename}{name of the arb file (with extension) containing the information needed for the import. \cr (nom du fichier arb (avec extension) contenant les - informations nécessaires à l'import.)} +informations nécessaires à l'import.)} } \value{ A list of one or more data.frames. Each data.frame corresponds to to the result of a tabulation. The names of the tables filled in the -lines of the batch of the form \code{// "..."} are recovered. \cr +lines of the batch of the form \verb{// "..."} are recovered. \cr (Une liste d'un ou plusieurs data.frames. Chaque data.frame correspond - au résultat d'une tabulation. Les noms des tableaux renseignés dans les - lignes du batch de la forme \code{// "..."} sont récupérés.) +au résultat d'une tabulation. Les noms des tableaux renseignés dans les +lignes du batch de la forme \verb{// "..."} sont récupérés.) } \description{ Imports into R the results generated by Tau-Argus from the information @@ -39,7 +39,7 @@ message). \cr (Nécessite que le batch ait été exécuté et se soit terminé sans erreur. Afin d'importer immédiatement après exécution du batch, cette fonction sera ainsi -le plus souvent appelée via \code{\link{run_arb}} (en paramétrant +le plus souvent appelée via \code{\link[=run_arb]{run_arb()}} (en paramétrant \code{import = TRUE}). Il n'est possible (pour l'instant) que d'importer les résultats de type "2" @@ -74,9 +74,9 @@ l'utilisateur : \code{shadow_var}, \code{cost_var}, \code{output_options}.) \section{See also}{ -The function \code{\link{rtauargus}}, which uses this +The function \code{\link[=micro_rtauargus]{micro_rtauargus()}}, which uses this function and inherits its parameters. \cr -(La fonction \code{\link{rtauargus}}, qui utilise cette +(La fonction \code{\link[=micro_rtauargus]{micro_rtauargus()}}, qui utilise cette fonction et hérite de ses paramètres.) } diff --git a/man/indiv_dt.Rd b/man/indiv_dt.Rd new file mode 100644 index 0000000..a8f7489 --- /dev/null +++ b/man/indiv_dt.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{indiv_dt} +\alias{indiv_dt} +\title{Companies data at individual level.} +\format{ +A data.table with 9 786 rows and 12 variables: +\describe{ +\item{A10}{business sector, not hierarchical} +\item{A21}{business sector, not hierarchical but nested in A10} +\item{A88}{business sector, not hierarchical but nested in A21} +\item{CJ}{legal category, not hierarchical} +\item{TYPE}{type of distribution, not hierarchical} +\item{SIZE}{Number of employees (categorical), not hierarchical} +\item{NUTS1}{NUTS 1 level of European administrative regions, not hierarchical} +\item{NUTS2}{NUTS 2 level of European administrative regions, not hierarchical} +\item{NUTS3}{NUTS 3 level of European administrative regions, not hierarchical} +\item{WEIGHT}{Weight of the companies, numeric} +\item{TURNOVER}{Turnover, numeric} +\item{PRODUCTION}{Production, numeric} +} +} +\usage{ +indiv_dt +} +\description{ +Companies data at individual level. +} +\keyword{datasets} diff --git a/man/length_tabs.Rd b/man/length_tabs.Rd new file mode 100644 index 0000000..263cfbb --- /dev/null +++ b/man/length_tabs.Rd @@ -0,0 +1,123 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sp_var_to_merge.R +\name{length_tabs} +\alias{length_tabs} +\title{Calculation of the table sizes generated a priori during the reduction of dimension +from 4 or 5 dimensions to 3 dimensions} +\usage{ +length_tabs(dfs, v1, v2, v3 = NULL, v4 = NULL, totcode, hrcfiles = NULL) +} +\arguments{ +\item{dfs}{a data.frame + +Variable in the 5->4 or 4->3 step} + +\item{v1}{the first merged variable} + +\item{v2}{the second merged variable + +Variable in the case of 4->3 passage in the 4->3 process +do not specify v1_v2 if three variables are merged into one} + +\item{v3}{the third original variable to be merged} + +\item{v4}{the fourth original variable to be merged} + +\item{totcode}{character named vector} + +\item{hrcfiles}{named vector of hrc files related to the variables} +} +\value{ +a list of the lengths of the tables created during the dimension reduction +} +\description{ +Calculation of the table sizes generated a priori during the reduction of dimension +from 4 or 5 dimensions to 3 dimensions +} +\examples{ +# Dimension 4 +library(dplyr) +data <- expand.grid( + ACT = c("Total", "A", "B", "A1", "A2","A3", "B1", "B2","B3","B4","C", + "name_non_changed_vars","E","F","G","B5"), + GEO = c("Total", "G1", "G2"), + SEX = c("Total", "F", "M"), + AGE = c("Total", "AGE1", "AGE2"), + stringsAsFactors = FALSE +) \%>\% + as.data.frame() + +data <- data \%>\% mutate(VALUE = 1) + + +hrc_act <- "hrc_ACT.hrc" + +sdcHierarchies::hier_create( + root = "Total", + nodes = c("A","B","C","name_non_changed_vars","E","F","G") +) \%>\% + sdcHierarchies::hier_add(root = "A", nodes = c("A1","A2","A3")) \%>\% + sdcHierarchies::hier_add(root = "B", nodes = c("B1","B2","B3","B4","B5")) \%>\% + sdcHierarchies::hier_convert(as = "argus") \%>\% + slice(-1) \%>\% + mutate(levels = substring(paste0(level,name),3)) \%>\% + select(levels) \%>\% + write.table(file = hrc_act, row.names = FALSE, col.names = FALSE, quote = FALSE) + +# Function results + +res1 <- length_tabs(dfs = data, + hrcfiles = c(ACT = hrc_act), + totcode = c(SEX="Total",AGE="Total", GEO="Total", ACT="Total"), + v1 = "ACT", + v2 = "GEO") + +# Dimension 5 +data <- expand.grid( + ACT = c("Total_A", paste0("A", seq(1,5),"_"), + paste0("A1_", seq(1,7)),paste0("A2_", seq(1,9))), + GEO = c("Total_G", "GA", "GB", "GA1", "GA2", "GB1", "GB2","GA3","GB3","GB4"), + SEX = c("Total_S", "F", "M","F1","F2","M1","M2"), + AGE = c("Ensemble", "AGE1", "AGE2", "AGE11", "AGE12", "AGE21", "AGE22"), + ECO = c("PIB","Ménages","Entreprises"), + stringsAsFactors = FALSE, + KEEP.OUT.ATTRS = FALSE +) \%>\% + as.data.frame() + +data <- data \%>\% mutate(VALUE = 1:n()) + +hrc_act <- "hrc_ACT.hrc" +sdcHierarchies::hier_create(root = "Total_A", nodes = paste0("A", seq(1,5),"_")) \%>\% + sdcHierarchies::hier_add(root = "A1_", nodes = paste0("A1_", seq(1,7))) \%>\% + sdcHierarchies::hier_add(root = "A2_", nodes = paste0("A2_", seq(1,9))) \%>\% + sdcHierarchies::hier_convert(as = "argus") \%>\% + slice(-1) \%>\% + mutate(levels = substring(paste0(level,name),3)) \%>\% + select(levels) \%>\% + write.table(file = hrc_act, row.names = FALSE, col.names = FALSE, quote = FALSE) + +hrc_geo <- "hrc_GEO.hrc" +sdcHierarchies::hier_create(root = "Total_G", nodes = c("GA","GB")) \%>\% + sdcHierarchies::hier_add(root = "GA", nodes = c("GA1","GA2","GA3")) \%>\% + sdcHierarchies::hier_add(root = "GB", nodes = c("GB1","GB2","GB3","GB4")) \%>\% + sdcHierarchies::hier_convert(as = "argus") \%>\% + slice(-1) \%>\% + mutate(levels = substring(paste0(level,name),3)) \%>\% + select(levels) \%>\% + write.table(file = hrc_geo, row.names = FALSE, col.names = FALSE, quote = FALSE) + +res2 <- length_tabs(dfs = data, + hrcfiles = c(ACT = hrc_act, GEO = hrc_geo), + totcode = c(SEX="Total_S",AGE="Ensemble", GEO="Total_G", + ACT="Total_A", ECO = "PIB"), + v1 = "ACT",v2 = "AGE", + v3 = "GEO",v4 = "SEX") + +res3 <- length_tabs(dfs = data, + hrcfiles = c(ACT = hrc_act, GEO = hrc_geo), + totcode = c(SEX="Total_S",AGE="Ensemble", GEO="Total_G", + ACT="Total_A", ECO = "PIB"), + v1 = "ACT",v2 = "AGE",v3 = "GEO") +} +\keyword{internal} diff --git a/man/micro_arb.Rd b/man/micro_arb.Rd index 54554d7..7123d91 100644 --- a/man/micro_arb.Rd +++ b/man/micro_arb.Rd @@ -29,16 +29,16 @@ extension). If not specified, a temporary file. \cr (nom du fichier arb généré (avec extension). Si non renseigné, un fichier temporaire.)} -\item{asc_filename}{[\strong{required}] name of the asc file +\item{asc_filename}{name of the asc file (with extension). \cr -([\strong{obligatoire}] nom du fichier asc (avec extension).)} +( nom du fichier asc (avec extension).)} \item{rda_filename}{name of the rda file (with extension). If not filled in, \code{asc_filename} with the extension "rda" instead of "asc". \cr (nom du fichier rda (avec extension). Si non renseigné, \code{asc_filename} avec l'extension "rda" à la place de "asc".)} -\item{explanatory_vars}{[\strong{required}] categorical variables, in +\item{explanatory_vars}{categorical variables, in form of a list of vectors. Each element of the list is a vector of variable names forming a tab. Example: \code{list(c("CJ", "A21"), c("SEX", "REGION"))} for the first @@ -46,7 +46,7 @@ table crossing \code{CJ} x \code{A21} and the second table crossing \code{SEXE} x \code{REGION}. If a single tabulation, a simple vector of the variables to be crossed is accepted (no need for \code{list(...)}). \cr -([\strong{obligatoire}] variables catégorielles, sous +( variables catégorielles, sous forme de liste de vecteurs. Chaque élément de la liste est un vecteur des noms des variables formant une tabulation. Exemple: \code{list(c("CJ", "A21"), c("SEX", "REGION"))} pour la première @@ -68,11 +68,11 @@ renseigné, \code{response_var} sera utilisé par Tau-Argus.)} \item{cost_var}{cost variable(s) for the secondary secret. \cr (variable(s) de coût pour le secret secondaire.)} -\item{safety_rules}{[\strong{required}] primary secret rule(s). +\item{safety_rules}{primary secret rule(s). String in Tau-Argus batch syntax. The weighting is treated in a separate parameter (do not specify WGT here, use the \code{weighted}). \cr -([\strong{obligatoire}] règle(s) de secret primaire. +( règle(s) de secret primaire. Chaîne de caractères en syntaxe batch Tau-Argus. La pondération est traitée dans un paramètre à part (ne pas spécifier WGT ici, utiliser le paramètre \code{weighted}).)} @@ -80,11 +80,11 @@ dans un paramètre à part (ne pas spécifier WGT ici, utiliser le paramètre \item{weighted}{indicator(s) (boolean). \cr (indicatrice(s) de pondération (booléen).)} -\item{suppress}{[\strong{required}] secret management method(s) +\item{suppress}{secret management method(s) secondary (Tau-Argus batch syntax). If the method is the same for each tabulation, the first parameter (table number) will be ignored and renumbered automatically (see section 'Syntax'). \cr -([\strong{obligatoire}] méthode(s) de gestion du secret +( méthode(s) de gestion du secret secondaire (syntaxe batch de Tau-Argus). Si la méthode est la même pour chaque tabulation, le premier paramètre (numéro du tableau) sera ignoré et renuméroté automatiquement (voir la section 'Syntax').)} @@ -133,12 +133,21 @@ A list of two elements: arb filename and names of output files fichiers en sortie (utile pour récupérer les noms générés aléatoirement).) } \description{ +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}} + +Development on \code{micro_arb()} is complete, and for new code we recommend +switching to the tabular-wise protection provided by \code{tab_rtauargus()} +or \code{tab_multi_manager()}, which offer a lot more features for your +protection problems. + +See more details in \code{vignette("rtauargus")} or in +\verb{vignette("protect_multi_tables)}. + Creates a batch file for microdata, executable by Tau-Argus in command line. \cr (Crée un fichier batch pour microdonnées, exécutable par Tau-Argus en ligne de commande.) -} -\details{ + The function does not check if asc and rda files exist. \cr (La fonction ne vérifie pas si les fichiers asc et rda existent.) } @@ -181,13 +190,13 @@ tabulation, en 2 pour la deuxième tabulation, etc.) If the list \code{explanatory_vars} has names, these will be used in the batch to give an identifier to the table, in the form of -of a comment line (\code{// "..."}). They will be +of a comment line (\verb{// "..."}). They will be reused by the \code{import} function to name the R format arrays tables in output. (Si la liste \code{explanatory_vars} comporte des noms, ceux-ci seront utilisés dans le batch pour donner un identifiant au tableau, sous la forme -d'une ligne de commentaire (\code{// "..."}). Ils seront +d'une ligne de commentaire (\verb{// "..."}). Ils seront réutilisés par la fonction \code{import} pour nommer les tableaux formats R en sortie.) } @@ -225,9 +234,9 @@ autant de valeurs que de tabulations.) \section{See also}{ -The function \code{\link{rtauargus}}, which uses this +The function \code{\link[=micro_rtauargus]{micro_rtauargus()}}, which uses this function and inherits its parameters. \cr -(La fonction \code{\link{rtauargus}}, qui utilise cette +(La fonction \code{\link[=micro_rtauargus]{micro_rtauargus()}}, qui utilise cette fonction et hérite de ses paramètres.) } diff --git a/man/micro_asc_rda.Rd b/man/micro_asc_rda.Rd index b04c71a..5860d68 100644 --- a/man/micro_asc_rda.Rd +++ b/man/micro_asc_rda.Rd @@ -20,8 +20,8 @@ micro_asc_rda( ) } \arguments{ -\item{microdata}{[\strong{required}] data.frame containing the microdata. \cr -([\strong{obligatoire}] data.frame contenant les microdonnées.)} +\item{microdata}{data.frame containing the microdata. \cr +( data.frame contenant les microdonnées.)} \item{asc_filename}{name of the asc file (with extension). If not filled in, a temporary file. \cr @@ -66,12 +66,12 @@ attribuer la valeur de \code{rtauargus.totcode}.)} \item{missing}{code(s) for a missing value (see section 'Specific parameters' for the syntax of this parameter). \cr (code(s) pour une valeur manquante (voir section - 'Specific parameters' pour la syntaxe de ce paramètre).)} +'Specific parameters' pour la syntaxe de ce paramètre).)} \item{codelist}{file(s) containing labels for categorical variables (see section 'Specific parameters' for the syntax of this parameter). \cr (fichier(s) contenant les libellés des variables catégorielles - (voir section 'Specific parameters' pour la syntaxe de ce paramètre).)} +(voir section 'Specific parameters' pour la syntaxe de ce paramètre).)} \item{request}{(not yet implemented - pas encore implémenté)} } @@ -81,11 +81,21 @@ invisibly). Empty columns (filled with \code{NA} or empty strings) are not strings) will not be exported to the asc file. A warning message will list the affected columns. \cr (Renvoie les noms des fichiers asc et rda sous forme de liste (de - manière invisible). Les colonnes vides (remplies de \code{NA} ou de chaînes - de caractères vides) ne seront pas exportées dans le fichier asc. Un - message d'avertissement listera les colonnes concernées.) +manière invisible). Les colonnes vides (remplies de \code{NA} ou de chaînes +de caractères vides) ne seront pas exportées dans le fichier asc. Un +message d'avertissement listera les colonnes concernées.) } \description{ +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}} + +Development on \code{micro_asc_rda()} is complete, and for new code we recommend +switching to the tabular-wise protection provided by \code{tab_rtauargus()} +or \code{tab_multi_manager()}, which offer a lot more features for your +protection problems. + +See more details in \code{vignette("rtauargus")} or in +\verb{vignette("protect_multi_tables)}. + Creates a fixed length text file (asc) and a metadata file (rda) from microdata and additional information. \cr (Crée un fichier texte de longueur fixe (asc) et un fichier de métadonnées @@ -105,10 +115,10 @@ variables that can take this parameter. For example : \itemize{ - \item{\code{totcode = "All"} : écrit \code{ "All"} for all - categorical variables} - \item{\code{totcode = c("All", GEO = "France")} : idem, except for the - \code{GEO} variable } +\item{\code{totcode = "All"} : écrit \verb{ "All"} for all +categorical variables} +\item{\code{totcode = c("All", GEO = "France")} : idem, except for the +\code{GEO} variable } } (Les paramètres \code{totcode}, \code{missing} et \code{codelist} @@ -122,10 +132,10 @@ variables pouvant prendre ce paramètre. Par exemple : \itemize{ - \item{\code{totcode = "Ensemble"} : écrit \code{ "Ensemble"} pour - toutes les variables catégorielles} - \item{\code{totcode = c("Ensemble", GEO = "France")} : idem, sauf pour la - variable \code{GEO}} +\item{\code{totcode = "Ensemble"} : écrit \verb{ "Ensemble"} pour +toutes les variables catégorielles} +\item{\code{totcode = c("Ensemble", GEO = "France")} : idem, sauf pour la +variable \code{GEO}} }) } @@ -149,7 +159,7 @@ If the hierarchy is defined in a separate hrc file a possible \code{hiercodelist} if it differs from the default option of the package). In this case, you can write explicitly the path to an existing file file (\code{c(A38 = "a38.hrc")}), but also make a call to -\code{\link{write_hrc}} which will generate an hrc file from microdata. +\code{\link[=write_hrc]{write_hrc()}} which will generate an hrc file from microdata. \emph{Example :} \code{c(A38 = write_hrc(microdata, c("A38", "A21", "A10")))} @@ -184,7 +194,7 @@ Si la hiérarchie est définie dans un fichier hrc à part un éventuel \code{hierleadstring} s'il diffère de l'option par défaut du package). Dans ce cas, on peut écrire explicitement le chemin vers un fichier existant (\code{c(A38 = "a38.hrc")}), mais aussi passer un appel à -\code{\link{write_hrc}} qui génèrera un fichier hrc à partir de microdonnées. +\code{\link[=write_hrc]{write_hrc()}} qui génèrera un fichier hrc à partir de microdonnées. Un raccourci pour cet appel est d'écrire les variables constituant la hiérarchie séparées par des ">". Dans ce cas, les microdonnées et @@ -215,7 +225,7 @@ beforehand. The digits after the decimal point may be incorrect in the asc file if the total number of digits (before or after the decimal separator) is -greater than 15. See \code{\link[gdata]{write.fwf}} (function used to +greater than 15. See \code{\link[gdata:write.fwf]{gdata::write.fwf()}} (function used to writing the asc file) for more details. \cr (Le paramètre \code{decimals} indique le nombre minimal de décimales à faire @@ -227,16 +237,16 @@ au préalable. Les chiffres après la virgule peuvent être incorrects dans le fichier asc si le nombre total de chiffres (avant ou après le séparateur décimal) est -supérieur à 15. Voir \code{\link[gdata]{write.fwf}} (fonction utilisée pour +supérieur à 15. Voir \code{\link[gdata:write.fwf]{gdata::write.fwf()}} (fonction utilisée pour écrire le fichier asc) pour plus de détails.) } \section{See also}{ -The function \code{\link{rtauargus}}, which uses this +The function \code{\link[=micro_rtauargus]{micro_rtauargus()}}, which uses this function and inherits its parameters. \cr -(La fonction \code{\link{rtauargus}}, qui utilise cette +(La fonction \code{\link[=micro_rtauargus]{micro_rtauargus()}}, qui utilise cette fonction et hérite de ses paramètres.) } diff --git a/man/rtauargus.Rd b/man/micro_rtauargus.Rd similarity index 78% rename from man/rtauargus.Rd rename to man/micro_rtauargus.Rd index d2cad2a..84a121d 100644 --- a/man/rtauargus.Rd +++ b/man/micro_rtauargus.Rd @@ -1,20 +1,20 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/rtauargus.R -\name{rtauargus} -\alias{rtauargus} +% Please edit documentation in R/micro_rtauargus.R +\name{micro_rtauargus} +\alias{micro_rtauargus} \title{Protects tables from microdata} \usage{ -rtauargus(microdata, explanatory_vars, safety_rules, suppress, ...) +micro_rtauargus(microdata, explanatory_vars, safety_rules, suppress, ...) } \arguments{ -\item{microdata}{[\strong{required}] data.frame containing the microdata +\item{microdata}{data.frame containing the microdata (or path to text files already present: see section \emph{Microdata already as text files}). \cr -([\strong{obligatoire}] data.frame contenant les microdonnées - (ou chemin vers des fichiers texte déjà présents : voir section - \emph{Microdata already as text files}).)} +( data.frame contenant les microdonnées +(ou chemin vers des fichiers texte déjà présents : voir section +\emph{Microdata already as text files}).)} -\item{explanatory_vars}{[\strong{required}] categorical variables, in +\item{explanatory_vars}{categorical variables, in form of a list of vectors. Each element of the list is a vector of variable names forming a tab. Example: \code{list(c("CJ", "A21"), c("SEX", "REGION"))} for the first @@ -22,7 +22,7 @@ table crossing \code{CJ} x \code{A21} and the second table crossing \code{SEXE} x \code{REGION}. If a single tabulation, a simple vector of the variables to be crossed is accepted (no need for \code{list(...)}). \cr -([\strong{obligatoire}] variables catégorielles, sous +( variables catégorielles, sous forme de liste de vecteurs. Chaque élément de la liste est un vecteur des noms des variables formant une tabulation. Exemple: \code{list(c("CJ", "A21"), c("SEX", "REGION"))} pour la première @@ -31,20 +31,20 @@ table croisant \code{CJ} x \code{A21} et la seconde croisant Si une seule tabulation, un simple vecteur des variables à croiser est accepté (pas besoin de \code{list(...)}).)} -\item{safety_rules}{[\strong{required}] primary secret rule(s). +\item{safety_rules}{primary secret rule(s). String in Tau-Argus batch syntax. The weighting is treated in a separate parameter (do not specify WGT here, use the \code{weighted}). \cr -([\strong{obligatoire}] règle(s) de secret primaire. +( règle(s) de secret primaire. Chaîne de caractères en syntaxe batch Tau-Argus. La pondération est traitée dans un paramètre à part (ne pas spécifier WGT ici, utiliser le paramètre \code{weighted}).)} -\item{suppress}{[\strong{required}] secret management method(s) +\item{suppress}{secret management method(s) secondary (Tau-Argus batch syntax). If the method is the same for each tabulation, the first parameter (table number) will be ignored and renumbered automatically (see section 'Syntax'). \cr -([\strong{obligatoire}] méthode(s) de gestion du secret +( méthode(s) de gestion du secret secondaire (syntaxe batch de Tau-Argus). Si la méthode est la même pour chaque tabulation, le premier paramètre (numéro du tableau) sera ignoré et renuméroté automatiquement (voir la section 'Syntax').)} @@ -52,16 +52,26 @@ renuméroté automatiquement (voir la section 'Syntax').)} \item{...}{optional parameters for \code{micro_asc_rda}, \code{micro_arb} and \code{run_arb}. See the help for these functions. \cr (paramètres optionnels pour \code{micro_asc_rda}, \code{micro_arb} - et \code{run_arb}. Voir l'aide de ces fonctions.)} +et \code{run_arb}. Voir l'aide de ces fonctions.)} } \value{ If \code{import = TRUE}, a list of data.frames (protected tables), \code{NULL} otherwise. \cr (Si \code{import = TRUE}, une liste de data.frames (tableaux - secrétisés), \code{NULL} sinon.) +secrétisés), \code{NULL} sinon.) } \description{ +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}} + +Development on \code{micro_rtauargus()} is complete, and for new code we recommend +switching to the tabular-wise protection provided by \code{tab_rtauargus()} +or \code{tab_multi_manager()}, which offer a lot more features for your +protection problems. + +See more details in \code{vignette("rtauargus")} or in +\verb{vignette("protect_multi_tables)}. + Protects tables built from microdata and specifications of the crossings. The function allows to perform the complete process, namely the creation of the asc and rda files, the construction of the arb file, the effective @@ -71,13 +81,12 @@ spécifications des croisements. La fonction permet d'effectuer le processus complet, à savoir la création des fichiers asc et rda, la construction du fichier arb, le lancement effectif de Tau-Argus et la récupération éventuelle des résultats dans R.) -} -\details{ + The function executes sequentially the functions: \itemize{ \item{ -\code{\link{micro_asc_rda}} \code{->} -\code{\link{micro_arb}} \code{->} -\code{\link{run_arb}} +\code{\link[=micro_asc_rda]{micro_asc_rda()}} \verb{->} +\code{\link[=micro_arb]{micro_arb()}} \verb{->} +\code{\link[=run_arb]{run_arb()}} } } @@ -88,11 +97,11 @@ preparation of the data and to maintain the entire chain of processing in R. \cr (La fonction exécute séquentiellement les fonctions : \itemize{ - \item{ - \code{\link{micro_asc_rda}} \code{->} - \code{\link{micro_arb}} \code{->} - \code{\link{run_arb}} - } +\item{ +\code{\link[=micro_asc_rda]{micro_asc_rda()}} \verb{->} +\code{\link[=micro_arb]{micro_arb()}} \verb{->} +\code{\link[=run_arb]{run_arb()}} +} } Les fichiers intermédiaires sans nom renseigné (\code{asc_filename}...) @@ -120,9 +129,9 @@ le deuxième élément le fichier rda. Le fichier rda peut être omis s'il porte le même nom que le fichier asc (à l'extension près). Utiliser cette option pour lancer le processus complet sans la génération - des données en texte. Ne pas spécifier \code{asc_filename} ou - \code{rda_filename} (sert à nommer les fichiers texte à créer, ce qui est - sans objet ici). +des données en texte. Ne pas spécifier \code{asc_filename} ou +\code{rda_filename} (sert à nommer les fichiers texte à créer, ce qui est +sans objet ici). } \section{Syntax}{ @@ -173,6 +182,6 @@ rtauargus( \seealso{ \code{link{rtauargus_plus}}, a version optimized for a large number of tables (at the cost of some usage restrictions). \cr -(\code{\link{rtauargus_plus}}, une version optimisée pour un grand - nombre de tableaux (au prix de quelques restrictions d'usage).) +(\code{\link[=rtauargus_plus]{rtauargus_plus()}}, une version optimisée pour un grand +nombre de tableaux (au prix de quelques restrictions d'usage).) } diff --git a/man/nb_tab_generated.Rd b/man/nb_tab_generated.Rd new file mode 100644 index 0000000..05a3d90 --- /dev/null +++ b/man/nb_tab_generated.Rd @@ -0,0 +1,113 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sp_var_to_merge.R +\name{nb_tab_generated} +\alias{nb_tab_generated} +\title{Calculate the number of tables generated when merging 3 variables +in the transition from 5 to 3 dimensions} +\usage{ +nb_tab_generated(v1, v2, v3 = NULL, v4 = NULL, hrcfiles = NULL, data = NULL) +} +\arguments{ +\item{v1}{first variable to be merged} + +\item{v2}{second variable to be merged} + +\item{v3}{third variable to be merged ( +variable that will be merged with v1 and v2 if v4 is not specified)} + +\item{v4}{fourth variable to be merged (with v3)} + +\item{hrcfiles}{named list of hrc files} + +\item{data}{data.frame (used only in the case where a trio is formed)} +} +\value{ +an integer representing the number of tables generated +} +\description{ +Calculate the number of tables generated when merging 3 variables +in the transition from 5 to 3 dimensions +} +\examples{ +# Dimension 4 +library(dplyr) +data <- expand.grid( + ACT = c("Total", "A", "B", "A1", "A2", "B1", "B2"), + GEO = c("Total", "G1", "G2"), + SEX = c("Total", "F", "M"), + AGE = c("Total", "AGE1", "AGE2"), + stringsAsFactors = FALSE +) \%>\% + as.data.frame() + +data <- data \%>\% mutate(VALUE = 1) + +hrc_act <- "hrc_ACT.hrc" + +sdcHierarchies::hier_create(root = "Total", nodes = c("A", "B")) \%>\% + sdcHierarchies::hier_add(root = "A", nodes = c("A1", "A2")) \%>\% + sdcHierarchies::hier_add(root = "B", nodes = c("B1", "B2")) \%>\% + sdcHierarchies::hier_convert(as = "argus") \%>\% + slice(-1) \%>\% + mutate(levels = substring(paste0(level, name), 3)) \%>\% + select(levels) \%>\% + write.table(file = hrc_act, row.names = FALSE, col.names = FALSE, quote = FALSE) + +# 1 pair created +nb_tab_generated(v1 = "ACT", v2 = "GEO", + hrcfiles = c(ACT = hrc_act)) + +# Dimension 5 +data <- expand.grid( + ACT = c("Total", "A", "B", "A1", "A2", "B1", "B2"), + GEO = c("Total", "GA", "GB", "GA1", "GA2", "GB1", "GB2"), + SEX = c("Total", "F", "M", "F1", "F2", "M1", "M2"), + AGE = c("Total", "AGE1", "AGE2", "AGE11", "AGE12", "AGE21", "AGE22"), + ECO = c("PIB", "Ménages", "Entreprises"), + stringsAsFactors = FALSE, + KEEP.OUT.ATTRS = FALSE +) \%>\% + as.data.frame() + +data <- data \%>\% mutate(VALUE = 1:n()) + +hrc_act <- "hrc_ACT.hrc" +sdcHierarchies::hier_create(root = "Total", nodes = c("A", "B")) \%>\% + sdcHierarchies::hier_add(root = "A", nodes = c("A1", "A2")) \%>\% + sdcHierarchies::hier_convert(as = "argus") \%>\% + slice(-1) \%>\% + mutate(levels = substring(paste0(level, name), 3)) \%>\% + select(levels) \%>\% + write.table(file = hrc_act, row.names = FALSE, col.names = FALSE, quote = FALSE) + +hrc_geo <- "hrc_GEO.hrc" +sdcHierarchies::hier_create(root = "Total", nodes = c("GA", "GB")) \%>\% + sdcHierarchies::hier_add(root = "GA", nodes = c("GA1", "GA2")) \%>\% + sdcHierarchies::hier_add(root = "GB", nodes = c("GB1", "GB2")) \%>\% + sdcHierarchies::hier_convert(as = "argus") \%>\% + slice(-1) \%>\% + mutate(levels = substring(paste0(level, name), 3)) \%>\% + select(levels) \%>\% + write.table(file = hrc_geo, row.names = FALSE, col.names = FALSE, quote = FALSE) + +hrc_sex <- "hrc_SEX.hrc" +sdcHierarchies::hier_create(root = "Total", nodes = c("F", "M")) \%>\% + sdcHierarchies::hier_add(root = "F", nodes = c("F1", "F2")) \%>\% + sdcHierarchies::hier_add(root = "M", nodes = c("M1", "M2")) \%>\% + sdcHierarchies::hier_convert(as = "argus") \%>\% + slice(-1) \%>\% + mutate(levels = substring(paste0(level, name), 3)) \%>\% + select(levels) \%>\% + write.table(file = hrc_sex, row.names = FALSE, col.names = FALSE, quote = FALSE) + +# Trio merged +nb_tab_generated(data = data, + v1 = "ACT", v2 = "GEO", v3 = "SEX", + hrcfiles = c(ACT = hrc_act, GEO = hrc_geo, SEX = hrc_sex)) + +# 2 pairs created +nb_tab_generated(v1 = "ACT", v2 = "GEO", + v3 = "SEX", v4 = "EXO", + hrcfiles = c(ACT = hrc_act, GEO = hrc_geo, SEX = hrc_sex)) +} +\keyword{internal} diff --git a/man/nuts23_fr_corr_table.Rd b/man/nuts23_fr_corr_table.Rd index 1616463..8973fa3 100644 --- a/man/nuts23_fr_corr_table.Rd +++ b/man/nuts23_fr_corr_table.Rd @@ -7,8 +7,8 @@ \format{ A data frame with 92 rows and 3 variables: \describe{ - \item{NUTS2}{NUTS2 levels in France - equivalent of French "Régions"} - \item{NUTS3}{NUTS3 levels in France - equivalent of French "Départements"} +\item{NUTS2}{NUTS2 levels in France - equivalent of French "Régions"} +\item{NUTS3}{NUTS3 levels in France - equivalent of French "Départements"} } } \usage{ diff --git a/man/reduce_dims.Rd b/man/reduce_dims.Rd new file mode 100644 index 0000000..4a6aa53 --- /dev/null +++ b/man/reduce_dims.Rd @@ -0,0 +1,242 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sp_reduce_dims.R +\name{reduce_dims} +\alias{reduce_dims} +\title{General function that selects the appropriate separator and applies dimension reduction.} +\usage{ +reduce_dims( + dfs, + dfs_name, + totcode, + hrcfiles = NULL, + sep_dir = FALSE, + hrc_dir = "hrc_alt", + vars_to_merge = NULL, + nb_tab_option = "min", + limit = NULL, + over_split = FALSE, + vec_sep = c("___", "_XXX_", "_YYY_", "_TTT_", "_UVW_"), + verbose = FALSE +) +} +\arguments{ +\item{dfs}{data.frame with 4 or 5 categorical variables} + +\item{dfs_name}{name of the data.frame in the list provided by the user} + +\item{totcode}{named vector of totals for categorical variables} + +\item{hrcfiles}{named vector indicating the hrc files of hierarchical variables +among the categorical variables of dfs} + +\item{sep_dir}{allows forcing the writing of hrc into a separate folder, +default is FALSE} + +\item{hrc_dir}{folder to write hrc files if writing to a new folder is forced +or if no folder is specified in hrcfiles} + +\item{vars_to_merge}{NULL or vector of variables to be merged: +2 in dimension 4; 3 or 4 in dimension 5} + +\item{nb_tab_option}{strategy to follow for choosing variables automatically: +\itemize{ +\item \code{'min'}: minimize the number of tables; +\item \code{'max'}: maximize the number of tables; +\item \code{'smart'}: minimize the number of tables under the constraint +of their row count. +}} + +\item{limit}{maximum allowed number of rows in the smart or over_split = TRUE case} + +\item{over_split}{indicates if we split in several tables the tables bigger than +limit at the end of the reduction process ; it decreases the number +of hierarchy of these tables} + +\item{vec_sep}{vector of candidate separators to use} + +\item{verbose}{print the different steps of the function to inform the user +of progress} +} +\value{ +A list containing: +\itemize{ +\item \code{tabs}: named list of 3-dimensional dataframes +with nested hierarchies +\item \code{alt_hrc}: named list of hrc specific to the variables created +during merging to go to dimension 3 +\item \code{alt_totcode}: named list of totals specific to the variables +created during merging to go to dimension 3 +\item \code{vars}: categorical variables of the output dataframes +\item \code{sep}: separator used to link the variables +\item \code{totcode}: named vector of totals for all categorical variables +\item \code{hrcfiles}: named vector of hrc for categorical variables +(except the merged one) +\item \code{fus_vars}: named vector of vectors representing the merged +variables during dimension reduction +} +} +\description{ +General function that selects the appropriate separator and applies dimension reduction. +} +\examples{ +library(dplyr) +# Examples for dimension 4 + +data <- expand.grid( + ACT = c("Total", "A", "B", "A1", "A2","A3", "B1", + "B2","B3","B4","C","D","E","F","G","B5"), + GEO = c("Total", "G1", "G2"), + SEX = c("Total", "F", "M"), + AGE = c("Total", "AGE1", "AGE2"), + stringsAsFactors = FALSE +) \%>\% + as.data.frame() \%>\% + mutate(VALUE = 1) + +if(!dir.exists("hrc")) dir.create("hrc") +hrc_act <- "hrc/hrc_ACT4.hrc" + +sdcHierarchies::hier_create( + root = "Total", + nodes = c("A","B","C","D","E","F","G") +) \%>\% + sdcHierarchies::hier_add(root = "A", nodes = c("A1","A2","A3")) \%>\% + sdcHierarchies::hier_add(root = "B", nodes = c("B1","B2","B3","B4","B5")) \%>\% + sdcHierarchies::hier_convert(as = "argus") \%>\% + slice(-1) \%>\% + mutate(levels = substring(paste0(level,name),3)) \%>\% + select(levels) \%>\% + write.table( + file = hrc_act, row.names = FALSE, col.names = FALSE, quote = FALSE + ) + +# Reduce dim by forcing variables to be merged +res1 <- reduce_dims( + dfs = data, + dfs_name = "tab", + totcode = c(SEX = "Total", AGE = "Total", GEO = "Total", ACT = "Total"), + hrcfiles = c(ACT = hrc_act), + sep_dir = TRUE, + vars_to_merge = c("ACT", "GEO"), + hrc_dir = "output", + verbose = TRUE +) + +# Split the output in order to be under the limit & forcing variables to be merged +res1b <- reduce_dims( + dfs = data, + dfs_name = "tab", + totcode = c(SEX = "Total", AGE = "Total", GEO = "Total", ACT = "Total"), + hrcfiles = c(ACT = hrc_act), + sep_dir = TRUE, + hrc_dir = "output", + nb_tab_option = 'smart', + over_split = TRUE, + verbose = TRUE, + limit = 100 +) + +# Result of the function (minimizes the number of created tables by default) +res2 <- reduce_dims( + dfs = data, + dfs_name = "tab", + totcode = c(SEX = "Total", AGE = "Total", GEO = "Total", ACT = "Total"), + hrcfiles = c(ACT = hrc_act), + sep_dir = TRUE, + hrc_dir = "output", + verbose = TRUE +) + +# Result of the function (maximize the number of created tables) +res3 <- reduce_dims( + dfs = data, + dfs_name = "tab", + totcode = c(SEX = "Total", AGE = "Total", GEO = "Total", ACT = "Total"), + hrcfiles = c(ACT = hrc_act), + sep_dir = TRUE, + hrc_dir = "output", + nb_tab_option = "max", + verbose = TRUE +) + +# Example for dimension 5 + +data <- expand.grid( + ACT = c("Total_A", paste0("A", seq(1,5),"_"),paste0("A1_", seq(1,7)),paste0("A2_", seq(1,9))), + GEO = c("Total_G", "GA", "GB", "GA1", "GA2", "GB1", "GB2","GA3","GB3","GB4"), + SEX = c("Total_S", "F", "M","F1","F2","M1","M2"), + AGE = c("Ensemble", "AGE1", "AGE2", "AGE11", "AGE12", "AGE21", "AGE22"), + ECO = c("PIB","Ménages","Entreprises"), + stringsAsFactors = FALSE, + KEEP.OUT.ATTRS = FALSE +) \%>\% + as.data.frame() \%>\% + mutate(VALUE = 1:n()) + +hrc_act <- "hrc/hrc_ACT5.hrc" +sdcHierarchies::hier_create(root = "Total_A", nodes = paste0("A", seq(1,5),"_")) \%>\% + sdcHierarchies::hier_add(root = "A1_", nodes = paste0("A1_", seq(1,7))) \%>\% + sdcHierarchies::hier_add(root = "A2_", nodes = paste0("A2_", seq(1,9))) \%>\% + sdcHierarchies::hier_convert(as = "argus") \%>\% + slice(-1) \%>\% + mutate(levels = substring(paste0(level,name),3)) \%>\% + select(levels) \%>\% + write.table(file = hrc_act, row.names = FALSE, col.names = FALSE, quote = FALSE) + +hrc_age <- "hrc/hrc_AGE5.hrc" +sdcHierarchies::hier_create(root = "Ensemble", nodes = c("AGE1", "AGE2")) \%>\% + sdcHierarchies::hier_add(root = "AGE1", nodes = c("AGE11", "AGE12")) \%>\% + sdcHierarchies::hier_add(root = "AGE2", nodes = c("AGE21", "AGE22")) \%>\% + sdcHierarchies::hier_convert(as = "argus") \%>\% + slice(-1) \%>\% + mutate(levels = substring(paste0(level,name),3)) \%>\% + select(levels) \%>\% + write.table(file = hrc_age, row.names = FALSE, col.names = FALSE, quote = FALSE) + +hrc_geo <- "hrc/hrc_GEO5.hrc" +sdcHierarchies::hier_create(root = "Total_G", nodes = c("GA","GB")) \%>\% + sdcHierarchies::hier_add(root = "GA", nodes = c("GA1","GA2","GA3")) \%>\% + sdcHierarchies::hier_add(root = "GB", nodes = c("GB1","GB2","GB3","GB4")) \%>\% + sdcHierarchies::hier_convert(as = "argus") \%>\% + slice(-1) \%>\% + mutate(levels = substring(paste0(level,name),3)) \%>\% + select(levels) \%>\% + write.table(file = hrc_geo, row.names = FALSE, col.names = FALSE, quote = FALSE) + +# Results of the function +res4 <- reduce_dims( + dfs = data, + dfs_name = "tab", + totcode = c(SEX = "Total_S", AGE = "Ensemble", GEO = "Total_G", ACT = "Total_A", ECO = "PIB"), + hrcfiles = c(ACT = hrc_act, GEO = hrc_geo, AGE = hrc_age), + sep_dir = TRUE, + hrc_dir = "output", + verbose = TRUE +) + +res5 <- reduce_dims( + dfs = data, + dfs_name = "tab", + totcode = c(SEX = "Total_S", AGE = "Ensemble", GEO = "Total_G", ACT = "Total_A", ECO = "PIB"), + hrcfiles = c(ACT = hrc_act, GEO = hrc_geo), + sep_dir = TRUE, + hrc_dir = "output", + nb_tab_option = 'smart', + limit = 1300, + verbose = TRUE +) + +res6 <- reduce_dims( + dfs = data, + dfs_name = "tab", + totcode = c(SEX = "Total_S", AGE = "Ensemble", GEO = "Total_G", ACT = "Total_A", ECO = "PIB"), + hrcfiles = c(ACT = hrc_act, GEO = hrc_geo), + sep_dir = TRUE, + hrc_dir = "output", + nb_tab_option = 'min', + verbose = TRUE, + limit = 4470, + over_split = TRUE +) +} +\keyword{internal} diff --git a/man/restore_format.Rd b/man/restore_format.Rd new file mode 100644 index 0000000..ebdf590 --- /dev/null +++ b/man/restore_format.Rd @@ -0,0 +1,116 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sp_restore_format.R +\name{restore_format} +\alias{restore_format} +\title{Function to reverse the process of dimension reduction} +\usage{ +restore_format(masq, res) +} +\arguments{ +\item{masq}{a list of data.frames on which the secret has been applied} + +\item{res}{the result of the dimension reduction function (to retrieve +the merged variables) and the separator (sep).} +} +\value{ +the original dataframe with 4 or 5 dimensions +} +\description{ +Function to reverse the process of dimension reduction +} +\examples{ +# Examples with dimension 4 +library(dplyr) +data <- expand.grid( + ACT = c("Total", "A", "B", "A1", "A2","A3", "B1", "B2","B3","B4","C", + "name_non_changed_vars","E","F","G","B5"), + GEO = c("Total", "G1", "G2"), + SEX = c("Total", "F", "M"), + AGE = c("Total", "AGE1", "AGE2"), + stringsAsFactors = FALSE +) \%>\% + as.data.frame() + +data <- data \%>\% mutate(VALUE = 1) + +hrc_act <- "hrc_ACT.hrc" + +sdcHierarchies::hier_create( + root = "Total", + nodes = c("A","B","C","name_non_changed_vars","E","F","G") +) \%>\% + sdcHierarchies::hier_add(root = "A", nodes = c("A1","A2","A3")) \%>\% + sdcHierarchies::hier_add(root = "B", nodes = c("B1","B2","B3","B4","B5")) \%>\% + sdcHierarchies::hier_convert(as = "argus") \%>\% + slice(-1) \%>\% + mutate(levels = substring(paste0(level,name),3)) \%>\% + select(levels) \%>\% + write.table(file = hrc_act, row.names = FALSE, col.names = FALSE, quote = FALSE) + +# Result of the function by forcing some variables to be merged +res_red_dim <- reduce_dims( + dfs = data, + dfs_name = "tab", + totcode = c(SEX="Total",AGE="Total", GEO="Total", ACT="Total"), + hrcfiles = c(ACT = hrc_act), + sep_dir = TRUE, + hrc_dir = "output", + vars_to_merge = c("ACT","GEO") +) + +res1 <- restore_format(masq = res_red_dim$tabs, res = res_red_dim) +dim(setdiff(res1,data))[1] == 0 + +# return TRUE +# We have exactly the sames cases in the datatable after splitting and unsplitting data + +# Exemple dimension 5 + +data <- expand.grid( + ACT = c("Total_A", paste0("A", seq(1,5),"_"),paste0("A1_", seq(1,7)), + paste0("A2_", seq(1,9))), + GEO = c("Total_G", "GA", "GB", "GA1", "GA2", "GB1", "GB2","GA3","GB3","GB4"), + SEX = c("Total_S", "F", "M","F1","F2","M1","M2"), + AGE = c("Ensemble", "AGE1", "AGE2", "AGE11", "AGE12", "AGE21", "AGE22"), + ECO = c("PIB","Ménages","Entreprises"), + stringsAsFactors = FALSE, + KEEP.OUT.ATTRS = FALSE +) \%>\% + as.data.frame() + +data <- data \%>\% mutate(VALUE = 1:n()) + +hrc_act <- "hrc_ACT.hrc" +sdcHierarchies::hier_create(root = "Total_A", nodes = paste0("A", seq(1,5),"_")) \%>\% + sdcHierarchies::hier_add(root = "A1_", nodes = paste0("A1_", seq(1,7))) \%>\% + sdcHierarchies::hier_add(root = "A2_", nodes = paste0("A2_", seq(1,9))) \%>\% + sdcHierarchies::hier_convert(as = "argus") \%>\% + slice(-1) \%>\% + mutate(levels = substring(paste0(level,name),3)) \%>\% + select(levels) \%>\% + write.table(file = hrc_act, row.names = FALSE, col.names = FALSE, quote = FALSE) + +hrc_geo <- "hrc_GEO.hrc" +sdcHierarchies::hier_create(root = "Total_G", nodes = c("GA","GB")) \%>\% + sdcHierarchies::hier_add(root = "GA", nodes = c("GA1","GA2","GA3")) \%>\% + sdcHierarchies::hier_add(root = "GB", nodes = c("GB1","GB2","GB3","GB4")) \%>\% + sdcHierarchies::hier_convert(as = "argus") \%>\% + slice(-1) \%>\% + mutate(levels = substring(paste0(level,name),3)) \%>\% + select(levels) \%>\% + write.table(file = hrc_geo, row.names = FALSE, col.names = FALSE, quote = FALSE) + +# function's result + +res_red_dim <- reduce_dims( + dfs = data, + dfs_name = "tab", + totcode = c(SEX="Total_S",AGE="Ensemble", GEO="Total_G", ACT="Total_A", ECO = "PIB"), + hrcfiles = c(ACT = hrc_act, GEO = hrc_geo), + sep_dir = TRUE, + hrc_dir = "output" +) + +res2 <- restore_format(masq = res_red_dim$tabs, res = res_red_dim) +} +\keyword{internal} diff --git a/man/rtauargus-package.Rd b/man/rtauargus-package.Rd index 0641b5f..b0e4253 100644 --- a/man/rtauargus-package.Rd +++ b/man/rtauargus-package.Rd @@ -1,82 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/rtauargus-package.R \docType{package} \name{rtauargus-package} +\alias{rtauargus} \alias{rtauargus-package} -\title{Package 'rtauargus' : Tau-Argus depuis R} - +\title{rtauargus: Using Tau-Argus from R} \description{ - -The \emph{rtauargus} provides an \strong{R} interface for -\strong{Tau-Argus}. - +Protects tables by calling the Tau-Argus software from R. } - -\details{ - -It allows to: - +\seealso{ +Useful links: \itemize{ - \item{create inputs (rda, arb, hst and tab files) from data in R format ;} - \item{generate the sequence of instructions to be executed in batch mode (arb file);} - \item{launch a τ-Argus batch in command line;} - \item{retrieve the results in R.} + \item \url{https://inseefrlab.github.io/rtauargus} + \item \url{https://github.com/inseefrlab/rtauargus} + \item \url{https://inseefrlab.github.io/rtauargus/} + \item Report bugs at \url{https://github.com/inseefrlab/rtauargus/issues} } -These different operations can be executed in one go, but also in a modular way. -They allow to integrate the tasks performed by τ-Argus in a processing chain written in R. - -The package presents other additional functionalities, such as: - -- managing the protection of several tables at once; -- creating a hierarchical variable from correspondence table. - -It's possible to choose a tabular or microdata approach, but the tabular -one is, from now on, encouraged. - -The syntax of some arguments is very close to the batch syntax of -Tau-Argus batch syntax. This allows to use a large number of functions without -multiplying the arguments of the functions. Moreover, the package can be adapted -more easily to possible modifications of the software (new available methods, -additional options...). The syntax rules for writing batch can be found -in the Tau-Argus reference manual. - } +\author{ +\strong{Maintainer}: Julien Jamme \email{julien.jamme@insee.fr} -\section{Note}{ - -The package was developped with Tau-Argus software v.4.2. It is not compatible -with the v.3.5 of Tau-Argus and earlier. - - -} - -\section{Localisation of TauArgus.exe}{ - -The function \code{\link{tab_rtauargus}}, \emph{via} \code{\link{run_arb}}, -needs to know where the TauArgus.exe file is located on your disk. -The option \code{rtauargus.tauargus_exe} contains the information -of its location and has to be filled just after the loading of the package. -The code to modify the default location is: -\code{options(rtauargus.tauargus_exe = "path/to/TauArgus.exe")} -It persists for all the current R session. - -This instruction can be placed in a specific program file to persist during -the session. It's also possible to keep this information permanently -by writing it in .Rprofile for example. - +Authors: +\itemize{ + \item Pierre-Yves Berrard \email{pierre-yves.berrard@insee.fr} + \item Nathanaël Rastout \email{nathanael.rastout@insee.fr} + \item Jeanne Pointet + \item Félix Beroud + \item André-Raymond Socard + \item Wistan Pomel } -\references{ - -Tau-Argus : \itemize{ - - \item{\href{https://github.com/sdcTools/tauargus}{source code, - last release to download, support}(sdcTools github repository)} - - \item{\href{https://research.cbs.nl/casc/tau.htm}{manual} - (on CBS website)} - - - +Other contributors: +\itemize{ + \item Institut National de la Statistique et des Études Économiques [copyright holder] } } +\keyword{internal} diff --git a/man/rtauargus_options.Rd b/man/rtauargus_options.Rd index b6065ab..24d0452 100644 --- a/man/rtauargus_options.Rd +++ b/man/rtauargus_options.Rd @@ -16,8 +16,8 @@ reset_rtauargus_options(...) specified, all the options will be reset. The prefix \code{"rtauargus."} is not required. \cr noms des options à réinitialiser, séparés par des virgules. Si - aucun nom n'est spécifié, toutes les options du package seront - réinitialisées. Le préfixe \code{"rtauargus."} est facultatif.} +aucun nom n'est spécifié, toutes les options du package seront +réinitialisées. Le préfixe \code{"rtauargus."} est facultatif.} } \description{ Manages (displays, modifies, resets) the options of rtauargus package. \cr @@ -40,7 +40,7 @@ been set by the user, are set with their default values (see table below). The already defined options keep the values set by the user. The options can be set during a session with the following instruction -\code{options(rtauargus.}...\code{ = }...\code{)}, or with a configuration +\verb{options(rtauargus.}...\code{=}...\verb{)}, or with a configuration file where the user have written its own options with such instructions, but this latter is not a proper way if reproducibility is sought. Les options du package définissent les comportements par défaut des @@ -63,7 +63,7 @@ de rtauargus qui ne sont pas encore déclarées (cf. tableau ci-dessous). Les options déjà définies par l'utilisateur gardent leurs valeurs. Elles peuvent être redéfinies pour une session par une instruction -\code{options(rtauargus.}...\code{ = }...\code{)}, ou de manière globale si +\verb{options(rtauargus.}...\code{=}...\verb{)}, ou de manière globale si de telles instructions sont placées dans un fichier de configuration propre à l'utilisateur (fortement déconseillé si le programme a vocation à être reproductible). @@ -74,23 +74,23 @@ utiliseront les valeurs par défaut du package.) \section{List of options}{ \tabular{lll}{ - \strong{Option} \tab \strong{Default Value} \tab \strong{Function} \cr - \code{------------------------} \tab \code{---------------------------------} \tab \code{-------------}\cr - rtauargus.decimals \tab \Sexpr{rtauargus:::op.rtauargus$rtauargus.decimals} \tab \link{micro_asc_rda}\cr - rtauargus.totcode \tab "\Sexpr{rtauargus:::op.rtauargus$rtauargus.totcode}" \tab \cr - rtauargus.missing \tab "\Sexpr{rtauargus:::op.rtauargus$rtauargus.missing}" \tab \cr - rtauargus.hierleadstring \tab "\Sexpr{rtauargus:::op.rtauargus$rtauargus.hierleadstring}" \tab \cr - \code{------------------------} \tab \code{---------------------------------} \tab \code{-------------}\cr - rtauargus.response_var \tab "\Sexpr{rtauargus:::op.rtauargus$rtauargus.response_var}" \tab \link{micro_arb} \cr - rtauargus.weighted \tab \Sexpr{rtauargus:::op.rtauargus$rtauargus.weighted} \tab \cr - rtauargus.linked \tab \Sexpr{rtauargus:::op.rtauargus$rtauargus.linked} \tab \cr - rtauargus.output_type \tab "\Sexpr{rtauargus:::op.rtauargus$rtauargus.output_type}" \tab \cr - rtauargus.output_options \tab "\Sexpr{rtauargus:::op.rtauargus$rtauargus.output_options}" \tab \cr - \code{------------------------} \tab \code{---------------------------------} \tab \code{-------------}\cr - rtauargus.missing_dir \tab "\Sexpr{rtauargus:::op.rtauargus$rtauargus.missing_dir}" \tab \link{run_arb} \cr - rtauargus.tauargus_exe \tab "\Sexpr{rtauargus:::op.rtauargus$rtauargus.tauargus_exe}" \tab \cr - rtauargus.show_batch_console \tab \Sexpr{rtauargus:::op.rtauargus$rtauargus.show_batch_console} \tab \cr - rtauargus.import \tab \Sexpr{rtauargus:::op.rtauargus$rtauargus.import} \tab +\strong{Option} \tab \strong{Default Value} \tab \strong{Function} \cr +\verb{------------------------} \tab \verb{---------------------------------} \tab \verb{-------------}\cr +rtauargus.decimals \tab \Sexpr{rtauargus:::op.rtauargus$rtauargus.decimals} \tab \link{micro_asc_rda}\cr +rtauargus.totcode \tab "\Sexpr{rtauargus:::op.rtauargus$rtauargus.totcode}" \tab \cr +rtauargus.missing \tab "\Sexpr{rtauargus:::op.rtauargus$rtauargus.missing}" \tab \cr +rtauargus.hierleadstring \tab "\Sexpr{rtauargus:::op.rtauargus$rtauargus.hierleadstring}" \tab \cr +\verb{------------------------} \tab \verb{---------------------------------} \tab \verb{-------------}\cr +rtauargus.response_var \tab "\Sexpr{rtauargus:::op.rtauargus$rtauargus.response_var}" \tab \link{micro_arb} \cr +rtauargus.weighted \tab \Sexpr{rtauargus:::op.rtauargus$rtauargus.weighted} \tab \cr +rtauargus.linked \tab \Sexpr{rtauargus:::op.rtauargus$rtauargus.linked} \tab \cr +rtauargus.output_type \tab "\Sexpr{rtauargus:::op.rtauargus$rtauargus.output_type}" \tab \cr +rtauargus.output_options \tab "\Sexpr{rtauargus:::op.rtauargus$rtauargus.output_options}" \tab \cr +\verb{------------------------} \tab \verb{---------------------------------} \tab \verb{-------------}\cr +rtauargus.missing_dir \tab "\Sexpr{rtauargus:::op.rtauargus$rtauargus.missing_dir}" \tab \link{run_arb} \cr +rtauargus.tauargus_exe \tab "\Sexpr{rtauargus:::op.rtauargus$rtauargus.tauargus_exe}" \tab \cr +rtauargus.show_batch_console \tab \Sexpr{rtauargus:::op.rtauargus$rtauargus.show_batch_console} \tab \cr +rtauargus.import \tab \Sexpr{rtauargus:::op.rtauargus$rtauargus.import} \tab } } diff --git a/man/rtauargus_plus.Rd b/man/rtauargus_plus.Rd index 2f51853..37acee5 100644 --- a/man/rtauargus_plus.Rd +++ b/man/rtauargus_plus.Rd @@ -17,12 +17,12 @@ rtauargus_plus( \item{grp_size}{number of tables per Tau-Argus call (an integer between between 1 and 10). \cr (nombre de tableaux par appel de Tau-Argus (un entier compris - entre 1 et 10).)} +entre 1 et 10).)} -\item{microdata}{[\strong{required}] data.frame containing the microdata. \cr -([\strong{obligatoire}] data.frame contenant les microdonnées.)} +\item{microdata}{data.frame containing the microdata. \cr +( data.frame contenant les microdonnées.)} -\item{explanatory_vars}{[\strong{required}] categorical variables, in +\item{explanatory_vars}{categorical variables, in form of a list of vectors. Each element of the list is a vector of variable names forming a tab. Example: \code{list(c("CJ", "A21"), c("SEX", "REGION"))} for the first @@ -30,7 +30,7 @@ table crossing \code{CJ} x \code{A21} and the second table crossing \code{SEXE} x \code{REGION}. If a single tabulation, a simple vector of the variables to be crossed is accepted (no need for \code{list(...)}). \cr -([\strong{obligatoire}] variables catégorielles, sous +( variables catégorielles, sous forme de liste de vecteurs. Chaque élément de la liste est un vecteur des noms des variables formant une tabulation. Exemple: \code{list(c("CJ", "A21"), c("SEX", "REGION"))} pour la première @@ -39,28 +39,28 @@ table croisant \code{CJ} x \code{A21} et la seconde croisant Si une seule tabulation, un simple vecteur des variables à croiser est accepté (pas besoin de \code{list(...)}).)} -\item{safety_rules}{[\strong{required}] primary secret rule(s). +\item{safety_rules}{primary secret rule(s). String in Tau-Argus batch syntax. The weighting is treated in a separate parameter (do not specify WGT here, use the \code{weighted}). \cr -([\strong{obligatoire}] règle(s) de secret primaire. +( règle(s) de secret primaire. Chaîne de caractères en syntaxe batch Tau-Argus. La pondération est traitée dans un paramètre à part (ne pas spécifier WGT ici, utiliser le paramètre \code{weighted}).)} -\item{suppress}{[\strong{required}] secondary secret management method +\item{suppress}{secondary secret management method (Tau-Argus batch syntax). Only one method allowed for all tables. Example : \code{"GH(.,100)"} (the dot playing the role of the tabulation number). \cr -[\strong{obligatoire}] méthode de gestion du secret - secondaire (syntaxe batch de Tau-Argus). Une seule méthode autorisée pour - tous les tableaux. Exemple \code{"GH(.,100)"} (le point jouant le rôle du - numéro de tabulation).} +méthode de gestion du secret +secondaire (syntaxe batch de Tau-Argus). Une seule méthode autorisée pour +tous les tableaux. Exemple \code{"GH(.,100)"} (le point jouant le rôle du +numéro de tabulation).} \item{...}{optional parameters for \code{micro_asc_rda}, \code{micro_arb} and \code{run_arb}. See the help for these functions. \cr (paramètres optionnels pour \code{micro_asc_rda}, \code{micro_arb} - et \code{run_arb}. Voir l'aide de ces fonctions.)} +et \code{run_arb}. Voir l'aide de ces fonctions.)} } \value{ A list of data.frames (secret arrays). \cr @@ -69,7 +69,7 @@ A list of data.frames (secret arrays). \cr \description{ Optimization of the function \code{link{rtauargus}} for a large number of crossovers (all having the same parameters). \cr -(Optimisation de la fonction \code{\link{rtauargus}} pour un grand nombre de +(Optimisation de la fonction \code{\link[=micro_rtauargus]{micro_rtauargus()}} pour un grand nombre de croisements (ayant tous les mêmes paramètres).) } \details{ @@ -81,7 +81,7 @@ to read large text files of microdata. \code{rtauargus_plus} helps to improve the speed of execution. The function splits the list of tabs into groups of size \code{grp_size} and -makes a call to \code{rtauargus} for each group. It writes an +makes a call to \code{micro_rtauargus} for each group. It writes an asc file restricted to the only variables actually used within a of a group. @@ -101,7 +101,7 @@ pour lire des fichiers texte volumineux de microdonnées. \code{rtauargus_plus} permet d'améliorer la vitesse d'exécution. La fonction découpe la liste des tabulations en groupes de taille \code{grp_size} et -effectue un appel à \code{rtauargus} pour chaque groupe. Elle écrit un +effectue un appel à \code{micro_rtauargus} pour chaque groupe. Elle écrit un fichier asc restreint aux seules variables effectivement utilisées au sein d'un groupe. @@ -114,7 +114,7 @@ entre une lecture de fichiers asc trop volumineux et un nombre d'appels à Tau-Argus trop important. Elle peut être ajustée en fonction du nombre de variables communes à l'intérieur de chaque groupe de tabulations.) } -\section{Limits in relation to the function \code{rtauargus}}{ +\section{Limits in relation to the function \code{micro_rtauargus}}{ In return for the speed of execution, the crossings must have the @@ -165,8 +165,8 @@ rtauargus_plus( )} } \seealso{ -\code{\link{rtauargus}}, a function called repeatedly by +\code{\link[=micro_rtauargus]{micro_rtauargus()}}, a function called repeatedly by \code{rtauargus_plus}. \cr fonction appelée de manière répétée par - \code{rtauargus_plus}. +\code{rtauargus_plus}. } diff --git a/man/run_arb.Rd b/man/run_arb.Rd index 5216813..3b2bb45 100644 --- a/man/run_arb.Rd +++ b/man/run_arb.Rd @@ -28,8 +28,8 @@ run_arb( results do not exist ("stop" to trigger an error, "create" to create the missing folders). \cr (action si les dossiers où seront écrits les - résultats n'existent pas ("stop" pour déclencher une erreur, "create" pour - créer les dossiers manquants).)} +résultats n'existent pas ("stop" pour déclencher une erreur, "create" pour +créer les dossiers manquants).)} \item{tauargus_exe}{directory and name of the Tau-Argus software. \cr (répertoire et nom du logiciel Tau-Argus.)} @@ -42,7 +42,7 @@ Si NULL, le fichier "logbook.txt" sera suavegardé sur le répertoire de travail \item{show_batch_console}{to display the batch progress in the console. \cr (pour afficher le déroulement du batch dans la - console.)} +console.)} \item{verbose}{boolean, to display the batch execution (if TRUE) or only error messages if any (if FALSE) \cr @@ -52,7 +52,7 @@ uniquement les messages d'erreurs, s'il y en a (si FALSE))} \item{import}{to import in R the files produced, \code{TRUE} by default. \cr (pour importer dans R les fichiers produits, \code{TRUE} par - défaut.)} +défaut.)} \item{...}{additional parameters for \code{system()}. \cr (paramètres supplémentaires pour \code{system()}.)} @@ -103,9 +103,9 @@ réponse) dans les métadonnées (fichier rda).) \section{See also}{ -The function \code{\link{rtauargus}}, which uses this +The function \code{\link[=micro_rtauargus]{micro_rtauargus()}}, which uses this function and inherits its parameters. \cr -(La fonction \code{\link{rtauargus}}, qui utilise cette +(La fonction \code{\link[=micro_rtauargus]{micro_rtauargus()}}, qui utilise cette fonction et hérite de ses paramètres.) } diff --git a/man/sp_format.Rd b/man/sp_format.Rd new file mode 100644 index 0000000..99e2cbd --- /dev/null +++ b/man/sp_format.Rd @@ -0,0 +1,88 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sp_reduce_dims.R +\name{sp_format} +\alias{sp_format} +\title{Change the result of dimension reduction to be directly usable +in rtauargus} +\usage{ +sp_format(res, dfs_name, sep, totcode, hrcfiles) +} +\arguments{ +\item{res}{result of variable merging composed of name_non_changed_vars, a list of lists of tables, +a list of hierarchical files, a list of subtotals associated with these files, +and a list of vectors of variables or a vector of variables depending on the base size +of the dataframes} + +\item{dfs_name}{the name of the entered dataframes} + +\item{sep}{character} + +\item{totcode}{character named vector} + +\item{hrcfiles}{character named vector} +} +\value{ +A list containing: +\itemize{ +\item \code{tabs}: named list of 3-dimensional dataframes +with nested hierarchies +\item \code{alt_hrc}: named list of hrc specific to the variables +created during merging to go to dimension 3 +\item \code{alt_totcode}: named list of totals specific to the variables +created during merging to go to dimension 3 +\item \code{vars}: categorical variables of the output dataframes +\item \code{sep}: separator used to link the variables +\item \code{totcode}: named vector of totals for all categorical variables +\item \code{hrcfiles}: named vector of hrc for categorical variables +(except the merged one) +\item \code{fus_vars}: named vector of vectors representing the merged +variables during dimension reduction +} +} +\description{ +Change the result of dimension reduction to be directly usable +in rtauargus +} +\examples{ +library(dplyr) +data <- expand.grid( + ACT = c("Total", "A", "B", "A1", "A2", "B1", "B2"), + GEO = c("Total", "G1", "G2"), + SEX = c("Total", "F", "M"), + AGE = c("Total", "AGE1", "AGE2"), + stringsAsFactors = FALSE +) \%>\% + as.data.frame() + +data <- data \%>\% mutate(VALUE = 1) + +hrc_act <- "hrc_ACT.hrc" + +sdcHierarchies::hier_create(root = "Total", nodes = c("A","B")) \%>\% + sdcHierarchies::hier_add(root = "A", nodes = c("A1","A2")) \%>\% + sdcHierarchies::hier_add(root = "B", nodes = c("B1","B2")) \%>\% + sdcHierarchies::hier_convert(as = "argus") \%>\% + slice(-1) \%>\% + mutate(levels = substring(paste0(level,name),3)) \%>\% + select(levels) \%>\% + write.table(file = hrc_act, row.names = FALSE, col.names = FALSE, quote = FALSE) + +# Results of the function +res1 <- from_4_to_3( + dfs = data, + dfs_name = "tab", + totcode = c(SEX = "Total", AGE = "Total", GEO = "Total", ACT = "Total"), + hrcfiles = c(ACT = hrc_act), + sep_dir = TRUE, + hrc_dir = "output" +) + +res <- sp_format(res1, + dfs_name = "tab", + sep = "_", + totcode = c(SEX="Total",AGE="Total", + GEO="Total", ACT="Total"), + hrcfiles = c(ACT = hrc_act) + ) +} +\keyword{internal} diff --git a/man/tab_arb.Rd b/man/tab_arb.Rd index 0d5c7cc..d1092fc 100644 --- a/man/tab_arb.Rd +++ b/man/tab_arb.Rd @@ -25,8 +25,8 @@ tab_arb( file \cr (nom du fichier arb généré (avec extension). Si non renseigné, un fichier temporaire.)} -\item{tab_filename}{[\strong{mandatory}] path of the tab_filename.\cr -([\strong{obligatoire}]nom du fichier .tab (avec extension).)} +\item{tab_filename}{path of the tab_filename.\cr +(nom du fichier .tab (avec extension).)} \item{rda_filename}{rda file name (with .rda extension) \cr nom du fichier rda (avec extension)} @@ -36,10 +36,9 @@ Example : hst_filename = "path_to_file/apriori.hst"\cr (fichier(s) d'informations \emph{a priori}. Voir ci-dessous pour la syntaxe. Exemple : hst_filename = "path_to_file/apriori.hst")} -\item{explanatory_vars}{[\strong{mandatory}] -Explanatory vars in a vector +\item{explanatory_vars}{Explanatory vars in a vector Example : \code{c("CJ", "A21")} for the tabular \code{CJ} x \code{A21} \cr -([\strong{obligatoire}] variables catégorielles, sous forme de vecteur. +( variables catégorielles, sous forme de vecteur. Exemple : \code{c("CJ", "A21")} pour le premier Pour un tableau croisant \code{CJ} x \code{A21})} @@ -48,18 +47,16 @@ Pour un tableau croisant \code{CJ} x \code{A21})} (Nom de la variable de réponse dans le tableau \code{""}. Permet de tariter les tableaux de fréquence)} -\item{safety_rules}{[\strong{mandatory}] -Rules for primary suppression with Argus syntax, if the primary suppression +\item{safety_rules}{Rules for primary suppression with Argus syntax, if the primary suppression has been dealt with an apriori file specify manual safety range :"MAN(10)" for example.\cr -([\strong{obligatoire}] Règle(s) de secret primaire. +( Règle(s) de secret primaire. Chaîne de caractères en syntaxe batch Tau-Argus. Si le secret primaire a été traité dans un fichier d'apriori : utiliser "MAN(10)")} -\item{suppress}{[\strong{mandatory}] -Algortihm for secondary suppression (Tau-Argus batch syntax), and the +\item{suppress}{Algortihm for secondary suppression (Tau-Argus batch syntax), and the parameters for it.\cr -([\strong{obligatoire}] Algorithme de gestion du secret secondaire +( Algorithme de gestion du secret secondaire (syntaxe batch de Tau-Argus), ainsi que les potentiels paramètres associés)} \item{separator}{Character used as separator in the .tab file. \cr @@ -78,19 +75,19 @@ Pour le format SBS utiliser \code{"4"})} \item{output_options}{Additionnal parameter for the output, by default : code{"AS+"} (print Status). To specify no options : \code{""}.\cr (Options supplémentaires des fichiers en sortie. Valeur - par défaut du package : \code{"AS+"} (affichage du statut). Pour ne - spécifier aucune option, \code{""}.)} +par défaut du package : \code{"AS+"} (affichage du statut). Pour ne +spécifier aucune option, \code{""}.)} \item{gointeractive}{Boolean, if TRUE will open a Tau-Argus window and launch the batch in it (\code{FALSE} by default). \cr (Possibilité de lancer le batch depuis le menu de Tau-Argus - (\code{FALSE} par défaut).)} +(\code{FALSE} par défaut).)} } \value{ A list containing two elements : - the arb file name and the output name (usefull if the name is generated randomly) \cr - (Une liste de deux éléments : le nom du fichier arb, le nom - fichiers en sortie (utile pour récupérer les noms générés aléatoirement).) +the arb file name and the output name (usefull if the name is generated randomly) \cr +(Une liste de deux éléments : le nom du fichier arb, le nom +fichiers en sortie (utile pour récupérer les noms générés aléatoirement).) } \description{ Function doesn't check if the asc and rda files exists\cr diff --git a/man/tab_multi_manager.Rd b/man/tab_multi_manager.Rd index 15a04b0..872e54c 100644 --- a/man/tab_multi_manager.Rd +++ b/man/tab_multi_manager.Rd @@ -20,11 +20,14 @@ tab_multi_manager( ip_start = 10, ip_end = 0, num_iter_max = 10, + split_tab = FALSE, + nb_tab_option = "smart", + limit = 14700, ... ) } \arguments{ -\item{list_tables}{named list of dataframes representing the tables to protect} +\item{list_tables}{named list of \code{data.frame} or \code{data.table} representing the tables to protect} \item{list_explanatory_vars}{named list of character vectors of explanatory variables of each table mentionned in list_tables. Names of the list are the same as of the list of tables.} @@ -58,8 +61,8 @@ attribuer la valeur de \code{rtauargus.totcode}.)} \item{freq}{Name of the column containing the cell frequency. \cr (Nom de la colonne contenant les effectifs pour une cellule)} -\item{secret_var}{Boolean variable which specifies the secret, primary or not : - equal to "TRUE" if a cell is concerned by the secret,"FALSE" otherwise. +\item{secret_var}{Nae of the boolean variable which specifies the secret, primary or not : +equal to "TRUE" if a cell is concerned by the secret,"FALSE" otherwise. will be exported in the apriori file. \cr (Variable indiquant le secret de type booléen: prend la valeur "TRUE" quand les cellules du tableau doivent être masquées @@ -75,10 +78,9 @@ correspond à la valeur de la cellule. peut être spécifié pour chacune des c peut contenir des NA pour les coûts que l'on ne souhaite pas modifier.) (nombre minimal de décimales à afficher (voir section 'Number of decimals').)} -\item{suppress}{[\strong{mandatory}] -Algortihm for secondary suppression (Tau-Argus batch syntax), and the +\item{suppress}{Algortihm for secondary suppression (Tau-Argus batch syntax), and the parameters for it.\cr -([\strong{obligatoire}] Algorithme de gestion du secret secondaire +( Algorithme de gestion du secret secondaire (syntaxe batch de Tau-Argus), ainsi que les potentiels paramètres associés)} \item{ip_start}{integer: Interval protection level to apply at first treatment of each table} @@ -87,6 +89,24 @@ parameters for it.\cr \item{num_iter_max}{integer: Maximum of treatments to do on each table (default to 10)} +\item{split_tab}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} boolean, +whether to reduce dimension to 3 while treating a table of dimension 4 or 5 +(default to \code{FALSE})} + +\item{nb_tab_option}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} strategy to follow +to choose variables automatically while splitting: +\itemize{ +\item{\code{"min"}: minimize the number of tables;} +\item{\code{"max"}: maximize the number of tables;} +\item{\code{"smart"}: minimize the number of tables under the constraint +of their row count.} +}} + +\item{limit}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} numeric, used to choose +which variable to merge (if nb_tab_option = 'smart') +and split table with a number of row above this limit in order to avoid +tauargus failures} + \item{...}{other arguments of \code{tab_rtauargus2()}} } \value{ @@ -130,7 +150,7 @@ purrr::map( \dontrun{ options( rtauargus.tauargus_exe = - "Y:/Logiciels/TauArgus/TauArgus4.2.2b1/TauArgus.exe" + "Y:/Logiciels/TauArgus/TauArgus4.2.3/TauArgus.exe" ) res_1 <- tab_multi_manager( list_tables = list_data_2_tabs, @@ -145,6 +165,33 @@ res_1 <- tab_multi_manager( secret_var = "is_secret_prim", totcode = "Total" ) + + +# With the reduction dimensions feature + +data("datatest1") +data("datatest2") + +datatest2b <- datatest2 \%>\% + filter(cj == "Total", treff == "Total", type_distrib == "Total") \%>\% + select(-cj, -treff, -type_distrib) + +str(datatest2b) + +res <- tab_multi_manager( + list_tables = list(d1 = datatest1, d2 = datatest2b), + list_explanatory_vars = list( + d1 = names(datatest1)[1:4], + d2 = names(datatest2b)[1:2] + ), + dir_name = "tauargus_files", + value = "pizzas_tot_abs", + freq = "nb_obs_rnd", + secret_var = "is_secret_prim", + totcode = "Total", + split_tab = TRUE +) + } } diff --git a/man/tab_rda.Rd b/man/tab_rda.Rd index f38e13f..1b32480 100644 --- a/man/tab_rda.Rd +++ b/man/tab_rda.Rd @@ -24,14 +24,13 @@ tab_rda( hierleadstring = getOption("rtauargus.hierleadstring"), codelist = NULL, separator = getOption("rtauargus.separator"), - secret_prim = NULL + secret_no_pl = NULL ) } \arguments{ -\item{tabular}{[\strong{mandatory}] -data.frame which contains the tabulated data and +\item{tabular}{data.frame which contains the tabulated data and an additional boolean variable that indicates the primary secret of type boolean \cr -([\strong{obligatoire}] data.frame contenant les données tabulées et +( data.frame contenant les données tabulées et une variable supplémentaire indiquant le secret primaire de type booléen.)} \item{tab_filename}{tab file name (with .tab extension) \cr @@ -43,16 +42,16 @@ nom du fichier rda (avec extension)} \item{hst_filename}{hst file name (with .hst extension) \cr nom du fichier hst (avec extension)} -\item{explanatory_vars}{[\strong{mandatory}] Vector of explanatory variables \cr -[\strong{obligatoire}] Variables catégorielles, sous forme de vecteurs \cr +\item{explanatory_vars}{Vector of explanatory variables \cr +Variables catégorielles, sous forme de vecteurs \cr Example : \code{c("A21", "TREFF", "REG")} for a table crossing \code{A21} x \code{TREFF} x \code{REG} (Variable indiquant le secret primaire de type booléen: prend la valeur "TRUE" quand les cellules du tableau doivent être masquées par le secret primaire, "FALSE" sinon. Permet de créer un fichier d'apriori)} -\item{secret_var}{Boolean variable which specifies the secret, primary or not : - equal to "TRUE" if a cell is concerned by the secret,"FALSE" otherwise. +\item{secret_var}{Nae of the boolean variable which specifies the secret, primary or not : +equal to "TRUE" if a cell is concerned by the secret,"FALSE" otherwise. will be exported in the apriori file. \cr (Variable indiquant le secret de type booléen: prend la valeur "TRUE" quand les cellules du tableau doivent être masquées @@ -122,9 +121,10 @@ code in the hierarchy. \cr} \item{separator}{Character used as separator in the .tab file. \cr (Caractère utilisé en tant que separateur dans le fichier .tab)} -\item{secret_prim}{Boolean variable which gives the primary secret : equal to -"TRUE" if a cell is concerned by the primary secret,"FALSE" otherwise. -will be exported in the apriori file \cr} +\item{secret_no_pl}{name of a boolean variable which indicates the cells +on which the protection levels won't be applied. If \code{secret_no_pl = NULL} +(default), the protection levels are applied on each cell which gets a \code{TRUE} +status for the \code{secret_var}.\cr} } \value{ Return the rda file name as a list (invisible).\cr @@ -152,7 +152,6 @@ If there is the additional boolean variable which indicates the primary secret in the table (of tabulated data), the function tab_rda will create an apriori file in a format conforming to tauargus. \cr - Le fichier d'apriori (.hst) récapitule pour chaque valeurs du tableau si elles sont concernées par le secret primaire ou non. Avec ce fichier tau-argus n'aura plus besoin de poser le secret primaire lui même, @@ -183,19 +182,19 @@ variables pouvant prendre ce paramètre.) For example : \itemize{ - \item{\code{totcode = "global"} : writes \code{ "global"} for each - explanatory vars} - \item{\code{totcode = c("global", size="total", income="total")} : - \code{ "global"} for each variable except for \code{size} and - \code{income}} assigned with \code{ "total"} - by default : { "Total"} - \item{\code{totcode = "global"} : écrit \code{ "global"} pour - toutes les variables catégorielles} - \item{\code{totcode = c("global", size="total", income="total")} : - \code{ "global"} pour toutes les variables catégorielles - sauf \code{size} and \code{income}} qui se verront affecter - le total : \code{ "total"} - Par defaut : { "Total"} +\item{\code{totcode = "global"} : writes \verb{ "global"} for each +explanatory vars} +\item{\code{totcode = c("global", size="total", income="total")} : +\verb{ "global"} for each variable except for \code{size} and +\code{income}} assigned with \verb{ "total"} +by default : {\if{html}{\out{}} "Total"} +\item{\code{totcode = "global"} : écrit \verb{ "global"} pour +toutes les variables catégorielles} +\item{\code{totcode = c("global", size="total", income="total")} : +\verb{ "global"} pour toutes les variables catégorielles +sauf \code{size} and \code{income}} qui se verront affecter +le total : \verb{ "total"} +Par defaut : {\if{html}{\out{}} "Total"} } } @@ -215,11 +214,11 @@ even if there is only one element. emph{Example :}\code{c(category="category.hrc")} \cr (Le paramètre \code{hrc} obéit aux mêmes règles de syntaxe que \code{totcode} - et \code{codelist} (vecteur nommé contenant autant d'éléments +et \code{codelist} (vecteur nommé contenant autant d'éléments que de variables à décrire). La hiérarchie est définie dans un fichier hrc à part (\strong{hiercodelist}) -qui peut être écrit à l'aide de la fonction \code{\link{write_hrc2}}. +qui peut être écrit à l'aide de la fonction \code{\link[=write_hrc2]{write_hrc2()}}. La fonction attend l'emplacement de ce fichier (et un éventuel \code{hierleadstring} s'il diffère de l'option par défaut du package). diff --git a/man/tab_rtauargus.Rd b/man/tab_rtauargus.Rd index af5eab0..36d3544 100644 --- a/man/tab_rtauargus.Rd +++ b/man/tab_rtauargus.Rd @@ -2,17 +2,17 @@ % Please edit documentation in R/tab_rtauargus.R \name{tab_rtauargus} \alias{tab_rtauargus} -\title{All in once for tabular} +\title{Protect one table by suppressing cells with Tau-Argus} \usage{ tab_rtauargus( tabular, + explanatory_vars, files_name = NULL, dir_name = NULL, - explanatory_vars, totcode = getOption("rtauargus.totcode"), hrc = NULL, secret_var = NULL, - secret_prim = NULL, + secret_no_pl = NULL, cost_var = NULL, value = "value", freq = "freq", @@ -24,30 +24,32 @@ tab_rtauargus( output_type = 4, output_options = "", unif_labels = TRUE, + split_tab = FALSE, + nb_tab_option = "smart", + limit = 14700, ... ) } \arguments{ -\item{tabular}{[\strong{mandatory}] -data.frame which contains the tabulated data and +\item{tabular}{data.frame which contains the tabulated data and an additional boolean variable that indicates the primary secret of type boolean \cr -([\strong{obligatoire}] data.frame contenant les données tabulées et +( data.frame contenant les données tabulées et une variable supplémentaire indiquant le secret primaire de type booléen.)} -\item{files_name}{string used to name all the files needed to process. -All files will have the same name, only their extension will be different.} - -\item{dir_name}{string indicated the path of the directory in which to save -all the files (.rda, .hst, .txt, .arb, .csv) generated by the function.} - -\item{explanatory_vars}{[\strong{mandatory}] Vector of explanatory variables \cr -[\strong{obligatoire}] Variables catégorielles, sous forme de vecteurs \cr +\item{explanatory_vars}{Vector of explanatory variables \cr +Variables catégorielles, sous forme de vecteurs \cr Example : \code{c("A21", "TREFF", "REG")} for a table crossing \code{A21} x \code{TREFF} x \code{REG} (Variable indiquant le secret primaire de type booléen: prend la valeur "TRUE" quand les cellules du tableau doivent être masquées par le secret primaire, "FALSE" sinon. Permet de créer un fichier d'apriori)} +\item{files_name}{string used to name all the files needed to process. +All files will have the same name, only their extension will be different.} + +\item{dir_name}{string indicated the path of the directory in which to save +all the files (.rda, .hst, .txt, .arb, .csv) generated by the function.} + \item{totcode}{Code(s) which represent the total of a categorical variable (see section 'Specific parameters' for this parameter's syntax). If unspecified for a variable(neither by default nor explicitly) @@ -64,16 +66,17 @@ attribuer la valeur de \code{rtauargus.totcode}.)} (Caractère qui, répété n fois, indique que la valeur est à n niveaux de profondeur dans la hiérarchie.)} -\item{secret_var}{Boolean variable which specifies the secret, primary or not : - equal to "TRUE" if a cell is concerned by the secret,"FALSE" otherwise. +\item{secret_var}{Nae of the boolean variable which specifies the secret, primary or not : +equal to "TRUE" if a cell is concerned by the secret,"FALSE" otherwise. will be exported in the apriori file. \cr (Variable indiquant le secret de type booléen: prend la valeur "TRUE" quand les cellules du tableau doivent être masquées "FALSE" sinon. Permet de créer un fichier d'apriori)} -\item{secret_prim}{Boolean variable which gives the primary secret : equal to -"TRUE" if a cell is concerned by the primary secret,"FALSE" otherwise. -will be exported in the apriori file \cr} +\item{secret_no_pl}{name of a boolean variable which indicates the cells +on which the protection levels won't be applied. If \code{secret_no_pl = NULL} +(default), the protection levels are applied on each cell which gets a \code{TRUE} +status for the \code{secret_var}.\cr} \item{cost_var}{Numeric variable allow to change the cost suppression of a cell for secondary suppression, it's the value of the cell by default, can be @@ -99,24 +102,22 @@ contributor of a cell. \cr (Nom de la colonne contenant la valeur du plus gros contributeur d'une cellule)} -\item{suppress}{[\strong{mandatory}] -Algortihm for secondary suppression (Tau-Argus batch syntax), and the +\item{suppress}{Algortihm for secondary suppression (Tau-Argus batch syntax), and the parameters for it.\cr -([\strong{obligatoire}] Algorithme de gestion du secret secondaire +( Algorithme de gestion du secret secondaire (syntaxe batch de Tau-Argus), ainsi que les potentiels paramètres associés)} -\item{safety_rules}{[\strong{mandatory}] -Rules for primary suppression with Argus syntax, if the primary suppression +\item{safety_rules}{Rules for primary suppression with Argus syntax, if the primary suppression has been dealt with an apriori file specify manual safety range :"MAN(10)" for example.\cr -([\strong{obligatoire}] Règle(s) de secret primaire. +( Règle(s) de secret primaire. Chaîne de caractères en syntaxe batch Tau-Argus. Si le secret primaire a été traité dans un fichier d'apriori : utiliser "MAN(10)")} \item{show_batch_console}{to display the batch progress in the console. \cr (pour afficher le déroulement du batch dans la - console.)} +console.)} \item{output_type}{Type of the output file (Argus codification) By default \code{"2"} (csv for pivot-table). @@ -128,24 +129,52 @@ Pour le format SBS utiliser \code{"4"})} \item{output_options}{Additionnal parameter for the output, by default : code{"AS+"} (print Status). To specify no options : \code{""}.\cr (Options supplémentaires des fichiers en sortie. Valeur - par défaut du package : \code{"AS+"} (affichage du statut). Pour ne - spécifier aucune option, \code{""}.)} +par défaut du package : \code{"AS+"} (affichage du statut). Pour ne +spécifier aucune option, \code{""}.)} \item{unif_labels}{boolean, if explanatory variables have to be standardized} +\item{split_tab}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} boolean, +whether to reduce dimension to 3 while treating a table of dimension 4 or 5 +(default to \code{FALSE})} + +\item{nb_tab_option}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} strategy to follow +to choose variables automatically while splitting: +\itemize{ +\item{\code{"min"}: minimize the number of tables;} +\item{\code{"max"}: maximize the number of tables;} +\item{\code{"smart"}: minimize the number of tables under the constraint +of their row count.} +}} + +\item{limit}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} numeric, used to choose +which variable to merge (if nb_tab_option = 'smart') +and split table with a number of row above this limit in order to avoid +tauargus failures} + \item{...}{any parameter of the tab_rda, tab_arb or run_arb functions, relevant for the treatment of tabular.} } \value{ -If output_type equals to 4, then the original tabular is returned with a new +If output_type equals to 4 and split_tab = FALSE, +then the original tabular is returned with a new column called Status, indicating the status of the cell coming from Tau-Argus : "A" for a primary secret due to frequency rule, "B" for a primary secret due to dominance rule, "D" for secondary secret and "V" for no secret cell. -If output_type doesn't equal to 4, then the raw result from tau-argus is returned. +If split_tab = TRUE, +then the original tabular is returned with some new columns which are boolean +variables indicating the status of a cell at each iteration of the protection +process as we get with \code{tab_multi_manager()} function. \code{TRUE} +denotes a cell that have to be suppressed. The last column is then the +final status of the suppression process of the original table. + +If \code{split_tab = FALSE} and \code{output_type} doesn't equal to \code{4}, +then the raw result from tau-argus is returned. } \description{ -All in once for tabular +The function prepares all the files needed by Tau-Argus and launches the +software with the good settings and gets back the result. } \section{Standardization of explanatory variables and hierarchies}{ @@ -182,7 +211,7 @@ hrc_file_activity <- activity_corr_table \%>\% # Compute the secondary secret ---- options( rtauargus.tauargus_exe = - "Y:/Logiciels/TauArgus/TauArgus4.2.2b1/TauArgus.exe" + "Y:/Logiciels/TauArgus/TauArgus4.2.3/TauArgus.exe" ) res <- tab_rtauargus( @@ -197,5 +226,19 @@ res <- tab_rtauargus( freq = "N_OBS", verbose = FALSE ) + +# Reduce dims feature + +data(datatest1) +res_dim4 <- tab_rtauargus( + tabular = datatest1, + dir_name = "tauargus_files", + explanatory_vars = c("A10", "treff","type_distrib","cj"), + totcode = rep("Total", 4), + secret_var = "is_secret_prim", + value = "pizzas_tot_abs", + freq = "nb_obs_rnd", + split_tab = TRUE +) } } diff --git a/man/tab_rtauargus2.Rd b/man/tab_rtauargus2.Rd index ffeab54..e8db463 100644 --- a/man/tab_rtauargus2.Rd +++ b/man/tab_rtauargus2.Rd @@ -12,20 +12,22 @@ tab_rtauargus2( totcode, hrc = NULL, secret_var = NULL, - secret_prim = NULL, + secret_no_pl = NULL, cost_var = NULL, value = "value", freq = "freq", ip = 10, suppress = "MOD(1,5,1,0,0)", + split_tab = TRUE, + nb_tab_option = "smart", + limit = 14700, ... ) } \arguments{ -\item{tabular}{[\strong{mandatory}] -data.frame which contains the tabulated data and +\item{tabular}{data.frame which contains the tabulated data and an additional boolean variable that indicates the primary secret of type boolean \cr -([\strong{obligatoire}] data.frame contenant les données tabulées et +( data.frame contenant les données tabulées et une variable supplémentaire indiquant le secret primaire de type booléen.)} \item{files_name}{string used to name all the files needed to process. @@ -34,8 +36,8 @@ All files will have the same name, only their extension will be different.} \item{dir_name}{string indicated the path of the directory in which to save all the files (.rda, .hst, .txt, .arb, .csv) generated by the function.} -\item{explanatory_vars}{[\strong{mandatory}] Vector of explanatory variables \cr -[\strong{obligatoire}] Variables catégorielles, sous forme de vecteurs \cr +\item{explanatory_vars}{Vector of explanatory variables \cr +Variables catégorielles, sous forme de vecteurs \cr Example : \code{c("A21", "TREFF", "REG")} for a table crossing \code{A21} x \code{TREFF} x \code{REG} (Variable indiquant le secret primaire de type booléen: @@ -58,16 +60,17 @@ attribuer la valeur de \code{rtauargus.totcode}.)} (Caractère qui, répété n fois, indique que la valeur est à n niveaux de profondeur dans la hiérarchie.)} -\item{secret_var}{Boolean variable which specifies the secret, primary or not : - equal to "TRUE" if a cell is concerned by the secret,"FALSE" otherwise. +\item{secret_var}{Nae of the boolean variable which specifies the secret, primary or not : +equal to "TRUE" if a cell is concerned by the secret,"FALSE" otherwise. will be exported in the apriori file. \cr (Variable indiquant le secret de type booléen: prend la valeur "TRUE" quand les cellules du tableau doivent être masquées "FALSE" sinon. Permet de créer un fichier d'apriori)} -\item{secret_prim}{Boolean variable which gives the primary secret : equal to -"TRUE" if a cell is concerned by the primary secret,"FALSE" otherwise. -will be exported in the apriori file \cr} +\item{secret_no_pl}{name of a boolean variable which indicates the cells +on which the protection levels won't be applied. If \code{secret_no_pl = NULL} +(default), the protection levels are applied on each cell which gets a \code{TRUE} +status for the \code{secret_var}.\cr} \item{cost_var}{Numeric variable allow to change the cost suppression of a cell for secondary suppression, it's the value of the cell by default, can be @@ -87,12 +90,29 @@ peut contenir des NA pour les coûts que l'on ne souhaite pas modifier.) \item{ip}{Interval Protection Level (10 by default)} -\item{suppress}{[\strong{mandatory}] -Algortihm for secondary suppression (Tau-Argus batch syntax), and the +\item{suppress}{Algortihm for secondary suppression (Tau-Argus batch syntax), and the parameters for it.\cr -([\strong{obligatoire}] Algorithme de gestion du secret secondaire +( Algorithme de gestion du secret secondaire (syntaxe batch de Tau-Argus), ainsi que les potentiels paramètres associés)} +\item{split_tab}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} boolean, +whether to reduce dimension to 3 while treating a table of dimension 4 or 5 +(default to \code{FALSE})} + +\item{nb_tab_option}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} strategy to follow +to choose variables automatically while splitting: +\itemize{ +\item{\code{"min"}: minimize the number of tables;} +\item{\code{"max"}: maximize the number of tables;} +\item{\code{"smart"}: minimize the number of tables under the constraint +of their row count.} +}} + +\item{limit}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} numeric, used to choose +which variable to merge (if nb_tab_option = 'smart') +and split table with a number of row above this limit in order to avoid +tauargus failures} + \item{...}{Other arguments of \code{tab_rtauargus} function} } \value{ @@ -125,7 +145,7 @@ hrc_file_activity <- activity_corr_table \%>\% # Compute the secondary secret ---- options( rtauargus.tauargus_exe = - "Y:/Logiciels/TauArgus/TauArgus4.2.2b1/TauArgus.exe" + "Y:/Logiciels/TauArgus/TauArgus4.2.3/TauArgus.exe" ) res <- tab_rtauargus2( @@ -139,6 +159,19 @@ res <- tab_rtauargus2( value = "TOT", freq = "N_OBS" ) + +# reduce dimensions feature +data(datatest1) +res_dim4 <- tab_rtauargus2( + tabular = datatest1, + dir_name = "tauargus_files", + explanatory_vars = c("A10", "treff","type_distrib","cj"), + totcode = rep("Total", 4), + secret_var = "is_secret_prim", + value = "pizzas_tot_abs", + freq = "nb_obs_rnd", + split_tab = TRUE +) } } \seealso{ diff --git a/man/tab_rtauargus4.Rd b/man/tab_rtauargus4.Rd new file mode 100644 index 0000000..e8b5ef7 --- /dev/null +++ b/man/tab_rtauargus4.Rd @@ -0,0 +1,176 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sp_tab_rtauargus.R +\name{tab_rtauargus4} +\alias{tab_rtauargus4} +\title{Call Tau-Argus to protect a 4 or 5 dimensions table by splitting it +in several 3 dimensions table.} +\usage{ +tab_rtauargus4( + tabular, + explanatory_vars, + dir_name, + secret_var, + totcode, + files_name = NULL, + hrc = NULL, + secret_no_pl = NULL, + cost_var = NULL, + value = "value", + freq = "freq", + ip = 10, + suppress = "MOD(1,5,1,0,0)", + safety_rules = paste0("MAN(", ip, ")"), + nb_tab_option = "smart", + limit = 14700L, + dfs_name = "tab", + ... +) +} +\arguments{ +\item{tabular}{data.frame which contains the tabulated data and +an additional boolean variable that indicates the primary secret of type boolean \cr +( data.frame contenant les données tabulées et +une variable supplémentaire indiquant le secret primaire de type booléen.)} + +\item{explanatory_vars}{Vector of explanatory variables \cr +Variables catégorielles, sous forme de vecteurs \cr +Example : \code{c("A21", "TREFF", "REG")} for a table crossing +\code{A21} x \code{TREFF} x \code{REG} +(Variable indiquant le secret primaire de type booléen: +prend la valeur "TRUE" quand les cellules du tableau doivent être masquées +par le secret primaire, "FALSE" sinon. Permet de créer un fichier d'apriori)} + +\item{dir_name}{string indicated the path of the directory in which to save +all the files (.rda, .hst, .txt, .arb, .csv) generated by the function.} + +\item{secret_var}{Nae of the boolean variable which specifies the secret, primary or not : +equal to "TRUE" if a cell is concerned by the secret,"FALSE" otherwise. +will be exported in the apriori file. \cr +(Variable indiquant le secret de type booléen: +prend la valeur "TRUE" quand les cellules du tableau doivent être masquées +"FALSE" sinon. Permet de créer un fichier d'apriori)} + +\item{totcode}{Code(s) which represent the total of a categorical variable +(see section 'Specific parameters' for this parameter's syntax). +If unspecified for a variable(neither by default nor explicitly) +it will be set to \code{rtauargus.totcode}. \cr +(Code(s) pour le total d'une variable catégorielle (voir +section 'Specific parameters' pour la syntaxe de ce paramètre). Les +variables non spécifiées (ni par défaut, ni explicitement) se verront +attribuer la valeur de \code{rtauargus.totcode}.)} + +\item{files_name}{string used to name all the files needed to process. +All files will have the same name, only their extension will be different.} + +\item{hrc}{Informations of hierarchical variables (see section +'Hierarchical variables'). \cr +(Informations sur les variables hiérarchiques (voir section +'Hierarchical variables').) +(Caractère qui, répété n fois, indique que la valeur est +à n niveaux de profondeur dans la hiérarchie.)} + +\item{secret_no_pl}{name of a boolean variable which indicates the cells +on which the protection levels won't be applied. If \code{secret_no_pl = NULL} +(default), the protection levels are applied on each cell which gets a \code{TRUE} +status for the \code{secret_var}.\cr} + +\item{cost_var}{Numeric variable allow to change the cost suppression of a cell +for secondary suppression, it's the value of the cell by default, can be +specified for each cell, fill with NA if the cost doesn't need to be changed +for all cells \cr +(Variable numeric qui permet de changer la coût de suppression d'une cellule, +pris en compte dans les algorithmes de secret secondaire.Par défaut le coût +correspond à la valeur de la cellule. peut être spécifié pour chacune des cellules, +peut contenir des NA pour les coûts que l'on ne souhaite pas modifier.) +(nombre minimal de décimales à afficher (voir section 'Number of decimals').)} + +\item{value}{Name of the column containing the value of the cells. \cr +(Nom de la colonne contenant la valeur des cellules)} + +\item{freq}{Name of the column containing the cell frequency. \cr +(Nom de la colonne contenant les effectifs pour une cellule)} + +\item{ip}{Value of the safety margin in \% (must be an integer). +(Valeur pour les intervalles de protection en \%, doit être entier )} + +\item{suppress}{Algortihm for secondary suppression (Tau-Argus batch syntax), and the +parameters for it.\cr +( Algorithme de gestion du secret secondaire +(syntaxe batch de Tau-Argus), ainsi que les potentiels paramètres associés)} + +\item{safety_rules}{Rules for primary suppression with Argus syntax, if the primary suppression +has been dealt with an apriori file specify manual safety range :"MAN(10)" +for example.\cr +( Règle(s) de secret primaire. +Chaîne de caractères en syntaxe batch Tau-Argus. Si le secret primaire +a été traité dans un fichier d'apriori : utiliser "MAN(10)")} + +\item{nb_tab_option}{strategy to follow for choosing variables automatically: +\itemize{ +\item \code{'min'}: minimize the number of tables; +\item \code{'max'}: maximize the number of tables; +\item \code{'smart'}: minimize the number of tables under the constraint +of their row count. +}} + +\item{limit}{numeric, used to choose which variable to merge (if nb_tab_option = 'smart') +and split table with a number of row above this limit in order to avoid +tauargus failures} + +\item{dfs_name}{name used to write hrc files when reducing dims} + +\item{...}{additional parameters#'} +} +\value{ +The original tabular is returned with additional variables indicating +whether or not the cell has to be masked according to Tau-Argus +} +\description{ +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} +} +\examples{ +\dontrun{ +#Please don't forget to specify the localisation of Tau-Argus in your computer +options( + rtauargus.tauargus_exe = + "Y:/Logiciels/TauArgus/TauArgus4.2.3/TauArgus.exe" +) + +data(datatest1) +expl_vars <- c("A10", "treff","type_distrib","cj") + +res_dim4 <- tab_rtauargus4( + tabular = datatest1, + files_name = "datatest1", + dir_name = "tauargus_files", + explanatory_vars = expl_vars, + totcode = setNames(rep("Total", 4), expl_vars), + secret_var = "is_secret_prim", + value = "pizzas_tot_abs", + freq = "nb_obs_rnd", + verbose = TRUE, + nb_tab_option = "min", + verbose = TRUE +) + +# With a data of 5 variables + +data(datatest2) +expl_vars <- c("A10", "treff","type_distrib","cj","nuts1") + +res_dim5 <- tab_rtauargus4( + tabular = datatest2, + files_name = "datatest2", + dir_name = "tauargus_files", + explanatory_vars = expl_vars, + totcode = setNames(rep("Total", 5), expl_vars), + secret_var = "is_secret_prim", + value = "pizzas_tot_abs", + freq = "nb_obs_rnd", + verbose = TRUE, + nb_tab_option = "min", # split into the minimum of tables. + verbose = TRUE, + suppress = "GH(1,100)" # We use hypercube to save time. +) +} +} diff --git a/man/tabulate_micro_data.Rd b/man/tabulate_micro_data.Rd new file mode 100644 index 0000000..635777c --- /dev/null +++ b/man/tabulate_micro_data.Rd @@ -0,0 +1,67 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tabul_group_fun.R +\name{tabulate_micro_data} +\alias{tabulate_micro_data} +\title{tabulate grouped data with all margins, handling hierarchical variables} +\usage{ +tabulate_micro_data( + df, + cat_vars = NULL, + hrc_vars = NULL, + pond_var = NULL, + resp_var = NULL, + marge_label = "Total" +) +} +\arguments{ +\item{df}{data.frame or data.table} + +\item{cat_vars}{vector of categorical variables but not hierarchical} + +\item{hrc_vars}{named list (name = VAR final name, value = VAR current names)} + +\item{pond_var}{weight (NULL if no weight is used)} + +\item{resp_var}{vector of response variables (NULL to only compute frequency table)} + +\item{marge_label}{label of margins (applied to all cat and hrc variables)} +} +\value{ +a tibble +} +\description{ +tabulate grouped data with all margins, handling hierarchical variables +} +\examples{ +library(data.table) + +data("indiv_dt") + +#Non hierarchical variables +res_all_dtp <- tabulate_micro_data( + df = indiv_dt, + #categorical but not hierarchical variables + cat_vars = c("A10", "SIZE","CJ"), + #weight var + pond_var = "WEIGHT", + #response variable + resp_var = "TURNOVER", + # Labels of the margins + marge_label = "Total" +) +str(res_all_dtp) + +#With one hierarchical variable +res_all_dtph <- tabulate_micro_data( + df = indiv_dt, + #categorical but not hierarchical variables + cat_vars = c("SIZE","CJ"), + #categorical nested variables + hrc_vars = list(ACTIVITY = c("A10","A21")), + pond_var = "WEIGHT", + resp_var = c("TURNOVER","PRODUCTION"), + marge_label = "Total" +) +str(res_all_dtph) + +} diff --git a/man/turnover_act_cj.Rd b/man/turnover_act_cj.Rd index d71d978..b4c342c 100644 --- a/man/turnover_act_cj.Rd +++ b/man/turnover_act_cj.Rd @@ -7,12 +7,12 @@ \format{ A tibble/data frame with 406 rows and 5 variables: \describe{ - \item{ACTIVITY}{business sector, hierarchical variables with three levels described - in the activity_corr_table dataset. The root is noted "Total"} - \item{CJ}{Type of companies (3 categories + overall category "Total")} - \item{N_OBS}{Frequency, number of companies} - \item{TOT}{turnover} - \item{MAX}{turnover of the company which contributes the most to the cell.} +\item{ACTIVITY}{business sector, hierarchical variables with three levels described +in the activity_corr_table dataset. The root is noted "Total"} +\item{CJ}{Type of companies (3 categories + overall category "Total")} +\item{N_OBS}{Frequency, number of companies} +\item{TOT}{turnover} +\item{MAX}{turnover of the company which contributes the most to the cell.} } } \usage{ diff --git a/man/turnover_act_nuts_size.Rd b/man/turnover_act_nuts_size.Rd index d0b943d..a319eef 100644 --- a/man/turnover_act_nuts_size.Rd +++ b/man/turnover_act_nuts_size.Rd @@ -7,18 +7,18 @@ \format{ A tibble/data frame with 3 168 rows and 6 variables: \describe{ - \item{ACTIVITY}{business sector, hierarchical variables with three levels described - in the activity_corr_table dataset. The root is noted "Total"} - \item{NUTS}{nuts - european denomination of administrative levels. - Hierarchical variables with two levels (nuts2 and nuts3) described - in the nuts23_fr_corr_table dataset. Only "FR41", "FR42" and "FR43" NUTS2 - areas and their corresponding NUTS3 areas are in the data. - The root is noted "Total_EAST"} - \item{SIZE}{size of the companies (Number of employees in three categories - + overall category "Total")} - \item{N_OBS}{Frequency, number of companies} - \item{TOT}{turnover value in euros} - \item{MAX}{turnover of the company which contributes the most to the cell.} +\item{ACTIVITY}{business sector, hierarchical variables with three levels described +in the activity_corr_table dataset. The root is noted "Total"} +\item{NUTS}{nuts - european denomination of administrative levels. +Hierarchical variables with two levels (nuts2 and nuts3) described +in the nuts23_fr_corr_table dataset. Only "FR41", "FR42" and "FR43" NUTS2 +areas and their corresponding NUTS3 areas are in the data. +The root is noted "Total_EAST"} +\item{SIZE}{size of the companies (Number of employees in three categories +and overall category "Total")} +\item{N_OBS}{Frequency, number of companies} +\item{TOT}{turnover value in euros} +\item{MAX}{turnover of the company which contributes the most to the cell.} } } \usage{ diff --git a/man/turnover_act_size.Rd b/man/turnover_act_size.Rd index f7bdfd0..fc30fbb 100644 --- a/man/turnover_act_size.Rd +++ b/man/turnover_act_size.Rd @@ -7,13 +7,13 @@ \format{ A tibble/data frame with 414 rows and 5 variables: \describe{ - \item{ACTIVITY}{business sector, hierarchical variables with three levels described - in the activity_corr_table dataset. The root is noted "Total"} - \item{SIZE}{size of the companies (Number of employees in three categories - + overall category "Total")} - \item{N_OBS}{Frequency, number of companies} - \item{TOT}{turnover value in euros} - \item{MAX}{turnover of the company which contributes the most to the cell.} +\item{ACTIVITY}{business sector, hierarchical variables with three levels described +in the activity_corr_table dataset. The root is noted "Total"} +\item{SIZE}{size of the companies (Number of employees in three categories +and overall category "Total")} +\item{N_OBS}{Frequency, number of companies} +\item{TOT}{turnover value in euros} +\item{MAX}{turnover of the company which contributes the most to the cell.} } } \usage{ diff --git a/man/turnover_nuts_cj.Rd b/man/turnover_nuts_cj.Rd index 41bd437..fd98eda 100644 --- a/man/turnover_nuts_cj.Rd +++ b/man/turnover_nuts_cj.Rd @@ -7,13 +7,13 @@ \format{ A tibble/data frame with 452 rows and 5 variables: \describe{ - \item{NUTS}{nuts - european denomination of administrative levels. - Hierarchical variables with two levels (nuts2 and nuts3) described - in the nuts23_fr_corr_table dataset. The root is noted "Total"} - \item{CJ}{Type of companies (3 categories + overall category "Total")} - \item{N_OBS}{Frequency, number of companies} - \item{TOT}{turnover value in euros} - \item{MAX}{turnover of the company which contributes the most to the cell.} +\item{NUTS}{nuts - european denomination of administrative levels. +Hierarchical variables with two levels (nuts2 and nuts3) described +in the nuts23_fr_corr_table dataset. The root is noted "Total"} +\item{CJ}{Type of companies (3 categories + overall category "Total")} +\item{N_OBS}{Frequency, number of companies} +\item{TOT}{turnover value in euros} +\item{MAX}{turnover of the company which contributes the most to the cell.} } } \usage{ diff --git a/man/turnover_nuts_size.Rd b/man/turnover_nuts_size.Rd index ff2b4ba..8eef25f 100644 --- a/man/turnover_nuts_size.Rd +++ b/man/turnover_nuts_size.Rd @@ -7,14 +7,14 @@ \format{ A tibble/data frame with 460 rows and 5 variables: \describe{ - \item{NUTS}{nuts - european denomination of administrative levels. - Hierarchical variables with two levels (nuts2 and nuts3) described - in the nuts23_fr_corr_table dataset. The root is noted "Total"} - \item{SIZE}{size of the companies (Number of employees in three categories - + overall category "Total")} - \item{N_OBS}{Frequency, number of companies} - \item{TOT}{turnover value in euros} - \item{MAX}{turnover of the company which contributes the most to the cell.} +\item{NUTS}{nuts - european denomination of administrative levels. +Hierarchical variables with two levels (nuts2 and nuts3) described +in the nuts23_fr_corr_table dataset. The root is noted "Total"} +\item{SIZE}{size of the companies (Number of employees in three categories +and overall category "Total")} +\item{N_OBS}{Frequency, number of companies} +\item{TOT}{turnover value in euros} +\item{MAX}{turnover of the company which contributes the most to the cell.} } } \usage{ diff --git a/man/var_to_merge.Rd b/man/var_to_merge.Rd new file mode 100644 index 0000000..763f1f6 --- /dev/null +++ b/man/var_to_merge.Rd @@ -0,0 +1,137 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sp_var_to_merge.R +\name{var_to_merge} +\alias{var_to_merge} +\title{General function to choose variables to merge, +limiting the number of generated tables while ensuring not to generate +tables that are too large.} +\usage{ +var_to_merge( + dfs, + totcode, + hrcfiles = NULL, + nb_var = 4, + nb_tab_option = "min", + limit = 150 +) +} +\arguments{ +\item{dfs}{data.frame} + +\item{totcode}{named vector of totals for categorical variables} + +\item{hrcfiles}{named vector of hrc files for categorical variables} + +\item{nb_var}{number of variables to merge} + +\item{nb_tab_option}{strategy to follow for choosing variables automatically: +\itemize{ +\item \code{'min'}: minimize the number of tables; +\item \code{'max'}: maximize the number of tables; +\item \code{'smart'}: minimize the number of tables under the constraint of their row count. +}} + +\item{limit}{maximum allowed row count in the 'smart' case} +} +\value{ +A list of vectors representing the chosen variables to merge +} +\description{ +General function to choose variables to merge, +limiting the number of generated tables while ensuring not to generate +tables that are too large. +} +\examples{ +library(dplyr) +data <- expand.grid( + ACT = c("Total", "A", "B", "A1", "A2", "B1", "B2"), + GEO = c("Total", "GA", "GB", "GA1", "GA2"), + SEX = c("Total", "F", "M"), + AGE = c("Total", "AGE1", "AGE2"), + stringsAsFactors = FALSE, + KEEP.OUT.ATTRS = FALSE +) \%>\% + as.data.frame() + +data <- data \%>\% mutate(VALUE = 1:n()) + +hrc_act <- "hrc_ACT.hrc" +sdcHierarchies::hier_create(root = "Total", nodes = c("A","B")) \%>\% + sdcHierarchies::hier_add(root = "A", nodes = c("A1","A2")) \%>\% + sdcHierarchies::hier_add(root = "B", nodes = c("B1","B2")) \%>\% + sdcHierarchies::hier_convert(as = "argus") \%>\% + slice(-1) \%>\% + mutate(levels = substring(paste0(level,name),3)) \%>\% + select(levels) \%>\% + write.table(file = hrc_act, row.names = FALSE, col.names = FALSE, quote = FALSE) + +hrc_geo <- "hrc_GEO.hrc" +sdcHierarchies::hier_create(root = "Total", nodes = c("GA","GB")) \%>\% + sdcHierarchies::hier_add(root = "GA", nodes = c("GA1","GA2")) \%>\% + sdcHierarchies::hier_convert(as = "argus") \%>\% + slice(-1) \%>\% + mutate(levels = substring(paste0(level,name),3)) \%>\% + select(levels) \%>\% + write.table(file = hrc_geo, row.names = FALSE, col.names = FALSE, quote = FALSE) + +totcode <- c(SEX="Total",AGE="Total", GEO="Total", ACT="Total") + +hrcfiles <- c(ACT = hrc_act, GEO = hrc_geo) + +# Consistent: choose two hierarchical variables +res1 <- var_to_merge(dfs = data, + totcode = totcode, + hrcfiles = hrcfiles, + nb_var = 2, + nb_tab_option = 'max') +res1 +max(unlist(length_tabs(dfs = data, + hrcfiles = hrcfiles, + totcode = totcode, + v1 = res1$vars[1], v2 = res1$vars[2]))) + +# Consistent: choose two non-hierarchical variables +res2 <- var_to_merge(dfs = data, + totcode = totcode, + hrcfiles = hrcfiles, + nb_var = 2, + nb_tab_option = 'min') +res2 +max(unlist(length_tabs(dfs = data, + hrcfiles = hrcfiles, + totcode = totcode, + v1 = res2$vars[1], v2 = res2$vars[2]))) + +res3 <- var_to_merge(dfs = data, + totcode = totcode, + hrcfiles = hrcfiles, + limit = 200, + nb_var = 2, + nb_tab_option = 'smart') +res3 +max(unlist(length_tabs(dfs = data, + hrcfiles = hrcfiles, + totcode = totcode, + v1 = res3$vars[1], v2 = res3$vars[2]))) + +# Obtains 147, which is well below 200 + +res4 <- var_to_merge(dfs = data, + totcode = totcode, + hrcfiles = hrcfiles, + limit = 5, + nb_var = 2, + nb_tab_option = 'smart') +res4 +max(unlist(length_tabs(dfs = data, + hrcfiles = hrcfiles, + totcode = totcode, + v1 = res4$vars[1], v2 = res4$vars[2]))) + +# Receives a warning: unable to reach the announced value +# There are 63 rows (equivalent to the max +# -> this is what reduces the table size) +# And the warning announces 63 rows, which is consistent with the output + +} +\keyword{internal} diff --git a/man/write_hrc.Rd b/man/write_hrc.Rd index 08c5dc8..dec1ea8 100644 --- a/man/write_hrc.Rd +++ b/man/write_hrc.Rd @@ -15,13 +15,13 @@ write_hrc( ) } \arguments{ -\item{microdata}{[\strong{required}] data.frame containing the microdata. \cr -([\strong{obligatoire}] data.frame contenant les microdonnées.)} +\item{microdata}{data.frame containing the microdata. \cr +( data.frame contenant les microdonnées.)} -\item{vars_hrc}{\strong{[mandatory]} vector of variable names +\item{vars_hrc}{vector of variable names constituting the hierarchy, from the finest to the most aggregated level.\cr -(\strong{[obligatoire]} vecteur des noms des variables - constituant la hiérarchie, du niveau le plus fin au niveau le plus agrégé.)} +(vecteur des noms des variables +constituant la hiérarchie, du niveau le plus fin au niveau le plus agrégé.)} \item{hierleadstring}{character which, repeated n times, indicates that the value is at n levels deep in the hierarchy. \cr @@ -40,32 +40,32 @@ immediately above} lower} }\cr (remplissage d'éventuelles valeurs manquantes, à l'aide d'une - autre variable :\itemize{ - \item{\code{"up"} (défaut) : variable hiérarchique de niveau - immédiatement supérieur} - \item{\code{"down"} : variable hiérarchique de niveau immédiatement - inférieur} - })} +autre variable :\itemize{ +\item{\code{"up"} (défaut) : variable hiérarchique de niveau +immédiatement supérieur} +\item{\code{"down"} : variable hiérarchique de niveau immédiatement +inférieur} +})} \item{compact}{to prune branches repeating a single value to the lowest level of depth (\code{TRUE} by default).\cr (pour élaguer les branches répétant une unique valeur jusqu'au - plus bas niveau de profondeur (\code{TRUE} par défaut).)} +plus bas niveau de profondeur (\code{TRUE} par défaut).)} \item{hierlevels}{if only one variable is specified in \code{vars_hrc}, allows to generate the hierarchy according to the position of the characters in the string. For example, \code{hierlevels = "2 3"} to build a hierarchy from a common code.\cr (si une seule variable est spécifiée dans \code{vars_hrc}, - permet de générer la hiérarchie selon la position des caractères dans la - chaîne. Par exemple, \code{hierlevels = "2 3"} pour construire une - hiérarchie département-commune à partir d'un code commune.)} +permet de générer la hiérarchie selon la position des caractères dans la +chaîne. Par exemple, \code{hierlevels = "2 3"} pour construire une +hiérarchie département-commune à partir d'un code commune.)} } \value{ The name of the hrc file (useful in the case of a temporary file with random name).\cr (Le nom du fichier hrc (utile dans le cas d'un fichier temporaire au - nom aléatoire).) +nom aléatoire).) } \description{ Creates an hrc file (hierarchy) from several variables in a set of diff --git a/man/write_hrc2.Rd b/man/write_hrc2.Rd index 36f8cf3..b44ea90 100644 --- a/man/write_hrc2.Rd +++ b/man/write_hrc2.Rd @@ -10,7 +10,7 @@ write_hrc2( sort_table = FALSE, rev = FALSE, hier_lead_string = getOption("rtauargus.hierleadstring"), - adjust_unique_roots = FALSE, + adjust_unique_roots = TRUE, add_char = "ZZZ" ) } @@ -38,7 +38,7 @@ value mentionned in the package options (i.e. "@" at the package startup). \cr Caractère unique repérant le niveau de profondeur dans le .hrc} -\item{adjust_unique_roots}{boolean. If TRUE will add fictional roots to the +\item{adjust_unique_roots}{boolean. If TRUE (default) will add fictional roots to the correspondence table, by doing so there will be no unique roots in the hrc file. With tabular function, unique roots are not handled by Tau-Argus. \cr Si TRUE la fonction va ajouter des feuilles fictives au fichier .hrc afin @@ -48,7 +48,7 @@ des problèmes dans l'exécution de Tau-Argus} \item{add_char}{character If adjust_unique_roots is TRUE add_char is the string that will be used to create fictional roots, be sure that this string does not create duplicates.The string will be paste at the beginning of a unique root - default = "ZZZ" \cr +default = "ZZZ" \cr character Si adjust_unique_roots est TRUE add_char est l'élément qui sera utilisé afin de créer des feuilles fictives, il faut être sur que cela ne crée pas de doublons dans la hiérarchie.La chaine de caractère sera @@ -85,7 +85,7 @@ Here is how a correspondence table is assumed to look like: \tabular{lll}{ \strong{type} \tab \strong{details} \cr - \code{-------} \tab \code{------} \cr +\verb{-------} \tab \verb{------} \cr planet \tab telluric \cr planet \tab gasgiant \cr star \tab bluestar \cr @@ -124,14 +124,14 @@ level. \tabular{lll}{ \strong{type} \tab \strong{details} \cr - \code{-------} \tab \code{------} \cr +\verb{-------} \tab \verb{------} \cr planet \tab telluric \cr - \tab gasgiant \cr +\tab gasgiant \cr star \tab bluestar \cr - \tab whitedwarf \cr - \tab reddwarf \cr +\tab whitedwarf \cr +\tab reddwarf \cr other \tab blackhole \cr - \tab pulsar \cr +\tab pulsar \cr } Such cases still issue a warning for the presence of NAs, but do not pose @@ -143,7 +143,7 @@ to the lowest detail, creating NAs. \tabular{lll}{ \strong{type} \tab \strong{details} \cr - \code{-------} \tab \code{------} \cr +\verb{-------} \tab \verb{------} \cr planet \tab telluric \cr planet \tab gasgiant \cr star \tab \cr @@ -168,7 +168,7 @@ Voici l'aspect général que devrait avoir une table de correspondance : \tabular{lll}{ \strong{type} \tab \strong{details} \cr - \code{-------} \tab \code{------} \cr +\verb{-------} \tab \verb{------} \cr planet \tab telluric \cr planet \tab gasgiant \cr star \tab bluestar \cr @@ -211,14 +211,14 @@ niveau donné verticalement. \tabular{lll}{ \strong{type} \tab \strong{details} \cr - \code{-------} \tab \code{------} \cr +\verb{-------} \tab \verb{------} \cr planet \tab telluric \cr - \tab gasgiant \cr +\tab gasgiant \cr star \tab bluestar \cr - \tab whitedwarf \cr - \tab reddwarf \cr +\tab whitedwarf \cr +\tab reddwarf \cr other \tab blackhole \cr - \tab pulsar \cr +\tab pulsar \cr } De tels cas émettent toujours un avertissement du fait de la présence de NA, @@ -231,7 +231,7 @@ manquante. \tabular{lll}{ \strong{type} \tab \strong{details} \cr - \code{-------} \tab \code{------} \cr +\verb{-------} \tab \verb{------} \cr planet \tab telluric \cr planet \tab gasgiant \cr star \tab \cr diff --git a/vignettes/GEO.png b/vignettes/GEO.png new file mode 100644 index 0000000..fbf5dc8 Binary files /dev/null and b/vignettes/GEO.png differ diff --git a/vignettes/equation.png b/vignettes/equation.png new file mode 100644 index 0000000..938e348 Binary files /dev/null and b/vignettes/equation.png differ diff --git a/vignettes/options_safety_rules.Rmd b/vignettes/options_safety_rules.Rmd index 5e4e7d3..13f666e 100644 --- a/vignettes/options_safety_rules.Rmd +++ b/vignettes/options_safety_rules.Rmd @@ -94,10 +94,10 @@ More details are available in the τ-Argus manual, especially in the section #### About this vignette - Authors: **Nathanael Rastout** -- Last update: **06/01/2023** -- Version of rtauargus used: **1.1.1** -- Version of τ-Argus used : **TauArgus 4.2.2b1** -- R version used : **4.1.3** +- Last update: **21/09/2023** +- Version of rtauargus used: **1.2.0** +- Version of τ-Argus used : **TauArgus 4.2.3** +- R version used : **4.2.3**

summary ↑ diff --git a/vignettes/options_safety_rules.Rmd.orig b/vignettes/options_safety_rules.Rmd.orig index a458480..76050fb 100644 --- a/vignettes/options_safety_rules.Rmd.orig +++ b/vignettes/options_safety_rules.Rmd.orig @@ -98,7 +98,7 @@ More details are available in the τ-Argus manual, especially in the section - Authors: **Nathanael Rastout** - Last update: **`r format(Sys.time(), "%d/%m/%Y")`** - Version of rtauargus used: **`r packageVersion("rtauargus")`** -- Version of τ-Argus used : **TauArgus 4.2.2b1** +- Version of τ-Argus used : **TauArgus 4.2.3** - R version used : **`r packageVersion("base")`**

diff --git a/vignettes/options_safety_rules_fr.Rmd b/vignettes/options_safety_rules_fr.Rmd index d92cd7f..62e5312 100644 --- a/vignettes/options_safety_rules_fr.Rmd +++ b/vignettes/options_safety_rules_fr.Rmd @@ -94,10 +94,10 @@ Plus de détails sont disponibles dans le manuel de τ-Argus, notamment dans #### About this vignette - Authors: **Nathanael Rastout** -- Last update: **06/01/2023** -- Version of rtauargus used: **1.1.1** -- Version of τ-Argus used : **TauArgus 4.2.2b1** -- R version used : **4.1.3** +- Last update: **21/09/2023** +- Version of rtauargus used: **1.2.0** +- Version of τ-Argus used : **TauArgus 4.2.3** +- R version used : **4.2.3**

summary ↑ diff --git a/vignettes/options_safety_rules_fr.Rmd.orig b/vignettes/options_safety_rules_fr.Rmd.orig index c9003eb..c493bd4 100644 --- a/vignettes/options_safety_rules_fr.Rmd.orig +++ b/vignettes/options_safety_rules_fr.Rmd.orig @@ -98,7 +98,7 @@ Plus de détails sont disponibles dans le manuel de τ-Argus, notamment dans - Authors: **Nathanael Rastout** - Last update: **`r format(Sys.time(), "%d/%m/%Y")`** - Version of rtauargus used: **`r packageVersion("rtauargus")`** -- Version of τ-Argus used : **TauArgus 4.2.2b1** +- Version of τ-Argus used : **TauArgus 4.2.3** - R version used : **`r packageVersion("base")`**

diff --git a/vignettes/precompilation.R b/vignettes/precompilation.R index e17cfa6..556428d 100644 --- a/vignettes/precompilation.R +++ b/vignettes/precompilation.R @@ -25,6 +25,12 @@ knitr::knit( encoding = "UTF-8" ) +knitr::knit( + "vignettes/split_tab.Rmd.orig", + "vignettes/split_tab.Rmd", + encoding = "UTF-8" +) + ### Versions françaises knitr::knit( @@ -50,3 +56,9 @@ knitr::knit( "vignettes/options_safety_rules_fr.Rmd", encoding = "UTF-8" ) + +knitr::knit( + "vignettes/split_tab_fr.Rmd.orig", + "vignettes/split_tab_fr.Rmd", + encoding = "UTF-8" +) diff --git a/vignettes/protect_multi_tables.Rmd b/vignettes/protect_multi_tables.Rmd index 9762782..8705726 100644 --- a/vignettes/protect_multi_tables.Rmd +++ b/vignettes/protect_multi_tables.Rmd @@ -178,7 +178,7 @@ Let's specify the location of the TauArgus.exe file in our computer: ```r options( rtauargus.tauargus_exe = - "Y:/Logiciels/TauArgus/TauArgus4.2.2b1/TauArgus.exe" + "Y:/Logiciels/TauArgus/TauArgus4.2.3/TauArgus.exe" ) ``` @@ -520,17 +520,7 @@ sdcHierarchies::hier_create( nodes = activity_corr_table_D_TO_M$A21 ) %>% sdcHierarchies::hier_display() -#> D_TO_M -#> +-D -#> +-E -#> +-F -#> +-G -#> +-H -#> +-I -#> +-J -#> +-K -#> +-L -#> \-M +#> Error in sdcHierarchies::hier_create(root = "D_TO_M", nodes = activity_corr_table_D_TO_M$A21): object 'activity_corr_table_D_TO_M' not found ``` To handle this case, the preferred approach is to create a third table crossing @@ -638,10 +628,10 @@ res <- tab_multi_manager( #### About this vignette - Authors: **Julien Jamme** & **Nathanael Rastout** -- Last update: **06/01/2023** -- Version of rtauargus used: **1.1.1** -- Version of τ-Argus used : **TauArgus 4.2.2b1** -- R version used : **4.1.3** +- Last update: **21/09/2023** +- Version of rtauargus used: **1.2.0** +- Version of τ-Argus used : **TauArgus 4.2.3** +- R version used : **4.2.3**

summary ↑ diff --git a/vignettes/protect_multi_tables.Rmd.orig b/vignettes/protect_multi_tables.Rmd.orig index b0c5348..f01047a 100644 --- a/vignettes/protect_multi_tables.Rmd.orig +++ b/vignettes/protect_multi_tables.Rmd.orig @@ -185,7 +185,7 @@ Let's specify the location of the TauArgus.exe file in our computer: ```{r version} options( rtauargus.tauargus_exe = - "Y:/Logiciels/TauArgus/TauArgus4.2.2b1/TauArgus.exe" + "Y:/Logiciels/TauArgus/TauArgus4.2.3/TauArgus.exe" ) ``` @@ -474,7 +474,7 @@ res <- tab_multi_manager( - Authors: **Julien Jamme** & **Nathanael Rastout** - Last update: **`r format(Sys.time(), "%d/%m/%Y")`** - Version of rtauargus used: **`r packageVersion("rtauargus")`** -- Version of τ-Argus used : **TauArgus 4.2.2b1** +- Version of τ-Argus used : **TauArgus 4.2.3** - R version used : **`r packageVersion("base")`**

diff --git a/vignettes/protect_multi_tables_fr.Rmd b/vignettes/protect_multi_tables_fr.Rmd index 8e3dcd9..6c4aaec 100644 --- a/vignettes/protect_multi_tables_fr.Rmd +++ b/vignettes/protect_multi_tables_fr.Rmd @@ -184,7 +184,7 @@ Spécifions l'emplacement du fichier TauArgus.exe sur notre ordinateur : ``{r version} options( rtauargus.tauargus_exe = - "Y:/Logiciels/TauArgus/TauArgus4.2.2b1/TauArgus.exe" + "Y:/Logiciels/TauArgus/TauArgus4.2.3/TauArgus.exe" ) ``` @@ -655,10 +655,10 @@ res <- tab_multi_manager( #### Détail de la vignette - Authors: **Julien Jamme** & **Nathanael Rastout** - - Last update: **06/01/2023** - - Version of rtauargus used: **1.1.1** - - Version of τ-Argus used : **TauArgus 4.2.2b1** - - R version used : **4.1.3** + - Last update: **21/09/2023** + - Version of rtauargus used: **1.2.0** + - Version of τ-Argus used : **TauArgus 4.2.3** + - R version used : **4.2.3**

summary ↑ diff --git a/vignettes/protect_multi_tables_fr.Rmd.orig b/vignettes/protect_multi_tables_fr.Rmd.orig index a0f6cb5..4be1366 100644 --- a/vignettes/protect_multi_tables_fr.Rmd.orig +++ b/vignettes/protect_multi_tables_fr.Rmd.orig @@ -192,7 +192,7 @@ Spécifions l'emplacement du fichier TauArgus.exe sur notre ordinateur : ``{r version} options( rtauargus.tauargus_exe = - "Y:/Logiciels/TauArgus/TauArgus4.2.2b1/TauArgus.exe" + "Y:/Logiciels/TauArgus/TauArgus4.2.3/TauArgus.exe" ) ``` @@ -492,7 +492,7 @@ res <- tab_multi_manager( - Authors: **Julien Jamme** & **Nathanael Rastout** - Last update: **`r format(Sys.time(), "%d/%m/%Y")`** - Version of rtauargus used: **`r packageVersion("rtauargus")`** - - Version of τ-Argus used : **TauArgus 4.2.2b1** + - Version of τ-Argus used : **TauArgus 4.2.3** - R version used : **`r packageVersion("base")`**

diff --git a/vignettes/rtauargus.Rmd b/vignettes/rtauargus.Rmd index c09f0ec..c60e52f 100644 --- a/vignettes/rtauargus.Rmd +++ b/vignettes/rtauargus.Rmd @@ -45,7 +45,7 @@ more easily to possible modifications of the software (new methods available, additional options...). The syntax rules for writing batch are given in the τ-Argus reference manual and will be specified in a dedicated help section. -> The package was developed on the basis of open source versions of τ-Argus (versions 4.2 and above), in particular the latest version available at the time of development (4.2.2b1). +> The package was developed on the basis of open source versions of τ-Argus (versions 4.2 and above), in particular the latest version available at the time of development (4.2.3). > > It is not compatible with version 3.5.**_ @@ -82,10 +82,13 @@ The latest releases can be downloaded here: [https://github.com/sdcTools/tauargu ``` • purrr (>= 0.2), • dplyr (>= 0.7), +• data.table, • gdata, • stringr, • rlang, -• zoo +• zoo, +• sdcHierarchies, +• lifecycle ``` The package _rtauargus_ can be installed now. @@ -115,7 +118,7 @@ predefined. This can be changed for the duration of the R session, as follows: ```r -loc_tauargus <- "Y:/Logiciels/TauArgus/TauArgus4.2.2b1/TauArgus.exe" +loc_tauargus <- "Y:/Logiciels/TauArgus/TauArgus4.2.3/TauArgus.exe" options(rtauargus.tauargus_exe = loc_tauargus) ``` @@ -497,10 +500,10 @@ of `tab_multi_manager()` function. #### About this vignette - Authors: **Julien Jamme** & **Nathanael Rastout** -- Last update: **06/01/2023** -- Version of rtauargus used: **1.1.1** -- Version of τ-Argus used : **TauArgus 4.2.2b1** -- R version used : **4.1.3** +- Last update: **11/01/2024** +- Version of rtauargus used: **1.2.0** +- Version of τ-Argus used : **TauArgus 4.2.3** +- R version used : **4.2.3**

summary ↑ diff --git a/vignettes/rtauargus.Rmd.orig b/vignettes/rtauargus.Rmd.orig index ec54bfe..94a7e11 100644 --- a/vignettes/rtauargus.Rmd.orig +++ b/vignettes/rtauargus.Rmd.orig @@ -51,7 +51,7 @@ more easily to possible modifications of the software (new methods available, additional options...). The syntax rules for writing batch are given in the τ-Argus reference manual and will be specified in a dedicated help section. -> The package was developed on the basis of open source versions of τ-Argus (versions 4.2 and above), in particular the latest version available at the time of development (4.2.2b1). +> The package was developed on the basis of open source versions of τ-Argus (versions 4.2 and above), in particular the latest version available at the time of development (4.2.3). > > It is not compatible with version 3.5.**_ @@ -114,7 +114,7 @@ In particular, a plausible location for the τ-Argus software is predefined. This can be changed for the duration of the R session, as follows: ```{r opt_exe} -loc_tauargus <- "Y:/Logiciels/TauArgus/TauArgus4.2.2b1/TauArgus.exe" +loc_tauargus <- "Y:/Logiciels/TauArgus/TauArgus4.2.3/TauArgus.exe" options(rtauargus.tauargus_exe = loc_tauargus) ``` @@ -400,7 +400,7 @@ of `tab_multi_manager()` function. - Authors: **Julien Jamme** & **Nathanael Rastout** - Last update: **`r format(Sys.time(), "%d/%m/%Y")`** - Version of rtauargus used: **`r packageVersion("rtauargus")`** -- Version of τ-Argus used : **TauArgus 4.2.2b1** +- Version of τ-Argus used : **TauArgus 4.2.3** - R version used : **`r packageVersion("base")`**

diff --git a/vignettes/rtauargus_fr.Rmd b/vignettes/rtauargus_fr.Rmd index a86cf5b..e99c31a 100644 --- a/vignettes/rtauargus_fr.Rmd +++ b/vignettes/rtauargus_fr.Rmd @@ -45,7 +45,7 @@ plus facilement aux éventuelles modifications du logiciel (nouveaux algorithmes disponibles, options supplémentaires...). Les règles de syntaxes d'écriture du batch sont détaillés dans le manuel de référence de τ-Argus et sont égalements précisées dans la vignette *options_safety_rules*. -> Le package a été développé sur la base de versions open source de τ-Argus (versions 4.2 et supérieures), en particulier la dernière version disponible au moment du développement (4.2.2b1). +> Le package a été développé sur la base de versions open source de τ-Argus (versions 4.2 et supérieures), en particulier la dernière version disponible au moment du développement (4.2.3). > > Il n'est pas compatible avec la version 3.5.**_. @@ -85,7 +85,8 @@ _rtauargus_ nécessite quelques autres packages R. Voici les dépendances à ins • gdata, • stringr, • rlang, -• zoo +• zoo, +• sdcHierarchies ``` Le package _rtauargus_ peut être installé maintenant. @@ -116,7 +117,7 @@ prédéfini. Celui-ci peut être modifié pour la durée de la session R, comme ```r -loc_tauargus <- "Y:/Logiciels/TauArgus/TauArgus4.2.2b1/TauArgus.exe" +loc_tauargus <- "Y:/Logiciels/TauArgus/TauArgus4.2.3/TauArgus.exe" options(rtauargus.tauargus_exe = loc_tauargus) ``` @@ -497,10 +498,10 @@ de la fonction `tab_multi_manager()`. #### Détail de la vignette - Authors: **Julien Jamme** & **Nathanael Rastout** -- Last update: **06/01/2023** -- Version of rtauargus used: **1.1.1** -- Version of τ-Argus used : **TauArgus 4.2.2b1** -- R version used : **4.1.3** +- Last update: **21/09/2023** +- Version of rtauargus used: **1.2.0** +- Version of τ-Argus used : **TauArgus 4.2.3** +- R version used : **4.2.3**

summary ↑ diff --git a/vignettes/rtauargus_fr.Rmd.orig b/vignettes/rtauargus_fr.Rmd.orig index 1f2f59c..da1e1f5 100644 --- a/vignettes/rtauargus_fr.Rmd.orig +++ b/vignettes/rtauargus_fr.Rmd.orig @@ -51,7 +51,7 @@ plus facilement aux éventuelles modifications du logiciel (nouveaux algorithmes disponibles, options supplémentaires...). Les règles de syntaxes d'écriture du batch sont détaillés dans le manuel de référence de τ-Argus et sont égalements précisées dans la vignette *options_safety_rules*. -> Le package a été développé sur la base de versions open source de τ-Argus (versions 4.2 et supérieures), en particulier la dernière version disponible au moment du développement (4.2.2b1). +> Le package a été développé sur la base de versions open source de τ-Argus (versions 4.2 et supérieures), en particulier la dernière version disponible au moment du développement (4.2.3). > > Il n'est pas compatible avec la version 3.5.**_. @@ -115,7 +115,7 @@ prédéfini. Celui-ci peut être modifié pour la durée de la session R, comme ```{r opt_exe} -loc_tauargus <- "Y:/Logiciels/TauArgus/TauArgus4.2.2b1/TauArgus.exe" +loc_tauargus <- "Y:/Logiciels/TauArgus/TauArgus4.2.3/TauArgus.exe" options(rtauargus.tauargus_exe = loc_tauargus) ``` @@ -400,7 +400,7 @@ de la fonction `tab_multi_manager()`. - Authors: **Julien Jamme** & **Nathanael Rastout** - Last update: **`r format(Sys.time(), "%d/%m/%Y")`** - Version of rtauargus used: **`r packageVersion("rtauargus")`** -- Version of τ-Argus used : **TauArgus 4.2.2b1** +- Version of τ-Argus used : **TauArgus 4.2.3** - R version used : **`r packageVersion("base")`**

diff --git a/vignettes/rtauargus_micro.Rmd b/vignettes/rtauargus_micro.Rmd index bb4ca5c..d0999f0 100644 --- a/vignettes/rtauargus_micro.Rmd +++ b/vignettes/rtauargus_micro.Rmd @@ -49,7 +49,7 @@ the batch writing are in the reference manual of τ-Argus. > The package was developed on the basis of open source versions of τ > Argus (versions 4.1 and higher), especially the latest version -> available during development (4.1.7)._ +> available during development (4.2.3)._ > > It is not compatible with version 3.5.**_ > @@ -118,10 +118,13 @@ To work, _rtauargus_ depends on other R packages. These have to be installed bef ``` - purrr (>= 0.2), - dplyr (>= 0.7), +- data.table, - gdata, - stringr, - rlang, -- zoo +- zoo, +- sdcHierarchies, +- lifecycle ``` ### Package @@ -155,7 +158,7 @@ message indicates that this location is unknown, so we modify it: ```r -loc_tauargus <- "Y:/Logiciels/TauArgus/TauArgus4.2.2b1/TauArgus.exe" +loc_tauargus <- "Y:/Logiciels/TauArgus/TauArgus4.2.3/TauArgus.exe" options(rtauargus.tauargus_exe = loc_tauargus) ``` @@ -168,7 +171,7 @@ After this small adjustment, the package is ready to be used. ### Function `rtauargus`. -The eponymous function `rtauargus` performs a processing and retrieves the +The function `micro_rtauargus` performs a processing and retrieves the results immediately in R. Completely abstaining from the internal workings of τ-Argus, it allows to @@ -190,19 +193,19 @@ The last two arguments use the batch syntax τ-Argus. ```r -rtauargus( +micro_rtauargus( microdata = microdata, explanatory_vars = "V1", safety_rules = "FREQ(3,10)", suppress = "GH(1,100)" ) -#> Start of batch procedure; file: C:\Users\TMM7AN\AppData\Local\Temp\RtmpUjZH4W\RTA_812070d42323.arb -#> "C:\Users\TMM7AN\AppData\Local\Temp\RtmpUjZH4W\RTA_8120d497993.asc" -#> "C:\Users\TMM7AN\AppData\Local\Temp\RtmpUjZH4W\RTA_8120d497993.rda" +#> Start of batch procedure; file: C:\Users\TMM7AN.AD\AppData\Local\Temp\Rtmpu6McgM\RTA_8a488545f97.arb +#> "C:\Users\TMM7AN.AD\AppData\Local\Temp\Rtmpu6McgM\RTA_8a484e1611c.asc" +#> "C:\Users\TMM7AN.AD\AppData\Local\Temp\Rtmpu6McgM\RTA_8a484e1611c.rda" #> "V1"|""|| #> FREQ(3,10) #> -#> Start explore file: C:\Users\TMM7AN\AppData\Local\Temp\RtmpUjZH4W\RTA_8120d497993.asc +#> Start explore file: C:\Users\TMM7AN.AD\AppData\Local\Temp\Rtmpu6McgM\RTA_8a484e1611c.asc #> Start computing tables #> Table: V1 | has been specified #> Tables have been computed @@ -214,9 +217,9 @@ rtauargus( #> Number of suppressions: 1 #> The hypercube procedure has been applied #> 1 cells have been suppressed -#> (1,4,AS+SE+,"C:\Users\TMM7AN\AppData\Local\Temp\RtmpUjZH4W\RTA_81201c341372.sbs") +#> (1,4,,"C:\Users\TMM7AN.AD\AppData\Local\Temp\Rtmpu6McgM\RTA_8a4818452b3.sbs") #> Table: V1 | has been written -#> Output file name: C:\Users\TMM7AN\AppData\Local\Temp\RtmpUjZH4W\RTA_81201c341372.sbs +#> Output file name: C:\Users\TMM7AN.AD\AppData\Local\Temp\Rtmpu6McgM\RTA_8a4818452b3.sbs #> End of TauArgus run #> NULL ``` @@ -241,7 +244,7 @@ data.frame), which we can continue to manipulate. ```r secret1 <- - rtauargus( + micro_rtauargus( microdata = microdata, explanatory_vars = list("V1", c("V1", "V2")), # 2 tabs (V1, V1xV2) safety_rules = "FREQ(3,10)", @@ -249,15 +252,15 @@ secret1 <- output_options = c("", "AS+"), # no status for the 1st array show_batch_console = FALSE # to hide the log ) -#> Start of batch procedure; file: C:\Users\TMM7AN\AppData\Local\Temp\RtmpUjZH4W\RTA_812013c83d91.arb -#> "C:\Users\TMM7AN\AppData\Local\Temp\RtmpUjZH4W\RTA_812015647f11.asc" -#> "C:\Users\TMM7AN\AppData\Local\Temp\RtmpUjZH4W\RTA_812015647f11.rda" +#> Start of batch procedure; file: C:\Users\TMM7AN.AD\AppData\Local\Temp\Rtmpu6McgM\RTA_8a48bef7dcb.arb +#> "C:\Users\TMM7AN.AD\AppData\Local\Temp\Rtmpu6McgM\RTA_8a484664de8.asc" +#> "C:\Users\TMM7AN.AD\AppData\Local\Temp\Rtmpu6McgM\RTA_8a484664de8.rda" #> "V1"|""|| #> FREQ(3,10) #> "V1""V2"|""|| #> FREQ(3,10) #> -#> Start explore file: C:\Users\TMM7AN\AppData\Local\Temp\RtmpUjZH4W\RTA_812015647f11.asc +#> Start explore file: C:\Users\TMM7AN.AD\AppData\Local\Temp\Rtmpu6McgM\RTA_8a484664de8.asc #> Start computing tables #> Table: V1 | has been specified #> Table: V1 x V2 | has been specified @@ -270,18 +273,18 @@ secret1 <- #> Number of suppressions: 1 #> The hypercube procedure has been applied #> 1 cells have been suppressed -#> (1,4,,"C:\Users\TMM7AN\AppData\Local\Temp\RtmpUjZH4W\RTA_81207aa2345e.sbs") +#> (1,4,,"C:\Users\TMM7AN.AD\AppData\Local\Temp\Rtmpu6McgM\RTA_8a4862103fbe.sbs") #> Table: V1 | has been written -#> Output file name: C:\Users\TMM7AN\AppData\Local\Temp\RtmpUjZH4W\RTA_81207aa2345e.sbs +#> Output file name: C:\Users\TMM7AN.AD\AppData\Local\Temp\Rtmpu6McgM\RTA_8a4862103fbe.sbs #> GH(2,100) #> Start of the hypercube protection for table V1 x V2 | #> End of hypercube protection. Time used 1 seconds #> Number of suppressions: 2 #> The hypercube procedure has been applied #> 2 cells have been suppressed -#> (2,4,AS+,"C:\Users\TMM7AN\AppData\Local\Temp\RtmpUjZH4W\RTA_812061143ab3.sbs") +#> (2,4,AS+,"C:\Users\TMM7AN.AD\AppData\Local\Temp\Rtmpu6McgM\RTA_8a48589d7890.sbs") #> Table: V1 x V2 | has been written -#> Output file name: C:\Users\TMM7AN\AppData\Local\Temp\RtmpUjZH4W\RTA_812061143ab3.sbs +#> Output file name: C:\Users\TMM7AN.AD\AppData\Local\Temp\Rtmpu6McgM\RTA_8a48589d7890.sbs #> End of TauArgus run secret1 @@ -312,7 +315,7 @@ The following section provides details on the operation of the system. ## Process decomposition -Using the `rtauargus` function is convenient in terms of lines of code to +Using the `micro_rtauargus` function is convenient in terms of lines of code to enter. However, this approach can be quite cumbersome if the tables to be secreted have in common a certain number of characteristics. @@ -323,7 +326,7 @@ examples seen so far, where all intermediate files were systematically regenerated, even if their content was identical. To do this, we can use the functions called successively by the -function `rtauargus`, namely : +function `micro_rtauargus`, namely : > **[micro_asc_rda()](#function-micro_asc_rda)   →  [micro_arb()](#function-micro_arb)   →  @@ -356,6 +359,7 @@ Content of the files created : file.show("Z:/microdata.asc", "Z:/microdata.rda", pager = "internal") ``` + ``` microdata.asc microdata.rda @@ -407,6 +411,7 @@ Content of the files created : file.show("Z:/microdata.asc", "Z:/microdata.rda", pager = "internal") ``` + ``` microdata.asc microdata.rda @@ -440,10 +445,10 @@ in a temporary folder. It is possible to retrieve the location and name names_asc_rda <- micro_asc_rda(microdata) names_asc_rda #> $asc_filename -#> [1] "C:\\Users\\TMM7AN\\AppData\\Local\\Temp\\RtmpUjZH4W\\RTA_812057c861f1.asc" +#> [1] "C:\\Users\\TMM7AN.AD\\AppData\\Local\\Temp\\Rtmpu6McgM\\RTA_8a4868f0147e.asc" #> #> $rda_filename -#> [1] "C:\\Users\\TMM7AN\\AppData\\Local\\Temp\\RtmpUjZH4W\\RTA_812057c861f1.rda" +#> [1] "C:\\Users\\TMM7AN.AD\\AppData\\Local\\Temp\\Rtmpu6McgM\\RTA_8a4868f0147e.rda" ```

@@ -483,22 +488,21 @@ micro_arb( Content of the file created : -`````r +```r file.show("Z:/microdata.arb", pager = "internal") -```` +``` -````` ``` // Batch generated by package *rtauargus* -// (2023-01-06 16:40:59 CET) +// (2024-01-11 16:39:46 CET) "Z:\microdata.asc" "Z:\microdata.rda" "V1"|""|| FREQ(3,10) GH(1,100) - (1,4,AS+SE+,"Z:\results\secretV1.csv") + (1,4,,"Z:\results\secretV1.csv") ``` #### Elaborate example @@ -529,22 +533,21 @@ micro_arb( ``` -`````r +```r file.show("Z:/microdata.arb", pager = "internal") -```` +``` -````` ``` // Batch generated by package *rtauargus* -// (2023-01-06 16:40:59 CET) +// (2024-01-11 16:39:46 CET) "Z:\microdata.asc" "Z:\microdata.rda" "V1"|""|| FREQ(3,10) GH(1,100) - (1,4,AS+SE+,"Z:\results\secretV1.csv") + (1,4,,"Z:\results\secretV1.csv") ``` #### Temporary files @@ -566,11 +569,11 @@ infos_arb <- ) infos_arb #> $arb_filename -#> [1] "C:\\Users\\TMM7AN\\AppData\\Local\\Temp\\RtmpUjZH4W\\RTA_81205864399e.arb" +#> [1] "C:\\Users\\TMM7AN.AD\\AppData\\Local\\Temp\\Rtmpu6McgM\\RTA_8a481d6c467c.arb" #> #> $output_names -#> [1] "C:\\Users\\TMM7AN\\AppData\\Local\\Temp\\RtmpUjZH4W\\RTA_81201e3eade.sbs" -#> [2] "C:\\Users\\TMM7AN\\AppData\\Local\\Temp\\RtmpUjZH4W\\RTA_8120f21e29.sbs" +#> [1] "C:\\Users\\TMM7AN.AD\\AppData\\Local\\Temp\\Rtmpu6McgM\\RTA_8a483d7451b6.sbs" +#> [2] "C:\\Users\\TMM7AN.AD\\AppData\\Local\\Temp\\Rtmpu6McgM\\RTA_8a48637e49a.sbs" ```

@@ -605,7 +608,7 @@ secret2 <- run_arb("Z:/microdata.arb", missing_dir = "create") #> Number of suppressions: 1 #> The hypercube procedure has been applied #> 1 cells have been suppressed -#> (1,4,AS+SE+,"Z:\results\secretV1.csv") +#> (1,4,,"Z:\results\secretV1.csv") #> Table: V1 | has been written #> Output file name: Z:\results\secretV1.csv #> End of TauArgus run @@ -716,6 +719,7 @@ The available options and their default values are listed below: + |Option |Default value |Type | Function concerned| |:----------------------------|:------------------------------------|:---------|------------------:| |rtauargus.decimals |0 |integer | tab_rda| @@ -740,6 +744,8 @@ below: |rtauargus.import |FALSE |logical | run_arb| |rtauargus.is_tabular |TRUE |logical | run_arb| + + ### Display To view the options set for the current session: @@ -769,7 +775,7 @@ rtauargus_options() #> [1] "stop" #> #> $rtauargus.output_options -#> [1] "AS+SE+" +#> [1] "" #> #> $rtauargus.output_type #> [1] "4" @@ -784,7 +790,7 @@ rtauargus_options() #> [1] FALSE #> #> $rtauargus.tauargus_exe -#> [1] "Y:/Logiciels/TauArgus/TauArgus4.2.2b1/TauArgus.exe" +#> [1] "Y:/Logiciels/TauArgus/TauArgus4.2.3/TauArgus.exe" #> #> $rtauargus.totcode #> [1] "Total" @@ -829,7 +835,7 @@ str(rtauargus_options()) #> $ rtauargus.response_var : chr "VAL" #> $ rtauargus.separator : chr "," #> $ rtauargus.show_batch_console: logi FALSE -#> $ rtauargus.tauargus_exe : chr "Y:/Logiciels/TauArgus/TauArgus4.2.2b1/TauArgus.exe" +#> $ rtauargus.tauargus_exe : chr "Y:/Logiciels/TauArgus/TauArgus4.2.3/TauArgus.exe" #> $ rtauargus.totcode : chr "Total" #> $ rtauargus.weighted : logi FALSE ``` @@ -858,7 +864,7 @@ str(rtauargus_options()) #> $ rtauargus.response_var : chr "" #> $ rtauargus.separator : chr "," #> $ rtauargus.show_batch_console: logi FALSE -#> $ rtauargus.tauargus_exe : chr "Y:/Logiciels/TauArgus/TauArgus4.2.2b1/TauArgus.exe" +#> $ rtauargus.tauargus_exe : chr "Y:/Logiciels/TauArgus/TauArgus4.2.3/TauArgus.exe" #> $ rtauargus.totcode : chr "Total" #> $ rtauargus.weighted : logi FALSE ``` @@ -913,7 +919,7 @@ getting started: - creation of hierarchical variables from microdata (function `write_hrc`). - taking into account of apriori file (argument `apriori` in `micro_arb`) -- use of the `rtauargus` function from microdata already under +- use of the `micro_rtauargus` function from microdata already under as a text file (not data.frame) - optimized launch of a large number of crosses with the same parameters (primary secrecy rules, secondary secrecy method, ...) : function @@ -941,10 +947,10 @@ For any return or error report, please use - Author: **Pierre-Yves Berrard** - Updated by: **Julien Jamme** -- Last update: **06/01/2023** -- Version of rtauargus used: **1.1.1** -- Version of τ-Argus used : **TauArgus 4.2.2b1** -- R version used : **4.1.3** +- Last update: **11/01/2024** +- Version of rtauargus used: **1.2.0** +- Version of τ-Argus used : **TauArgus 4.2.3** +- R version used : **4.2.3**

summary ↑ diff --git a/vignettes/rtauargus_micro.Rmd.orig b/vignettes/rtauargus_micro.Rmd.orig index f5cc120..bb3c27a 100644 --- a/vignettes/rtauargus_micro.Rmd.orig +++ b/vignettes/rtauargus_micro.Rmd.orig @@ -56,7 +56,7 @@ the batch writing are in the reference manual of τ-Argus. > The package was developed on the basis of open source versions of τ > Argus (versions 4.1 and higher), especially the latest version -> available during development (4.1.7)._ +> available during development (4.2.3)._ > > It is not compatible with version 3.5.**_ > @@ -145,7 +145,7 @@ predefined. It is possible to change it for the duration of the R session. A message indicates that this location is unknown, so we modify it: ```{r opt_exe} -loc_tauargus <- "Y:/Logiciels/TauArgus/TauArgus4.2.2b1/TauArgus.exe" +loc_tauargus <- "Y:/Logiciels/TauArgus/TauArgus4.2.3/TauArgus.exe" options(rtauargus.tauargus_exe = loc_tauargus) ``` @@ -158,7 +158,7 @@ After this small adjustment, the package is ready to be used. ### Function `rtauargus`. -The eponymous function `rtauargus` performs a processing and retrieves the +The function `micro_rtauargus` performs a processing and retrieves the results immediately in R. Completely abstaining from the internal workings of τ-Argus, it allows to @@ -179,7 +179,7 @@ The last two arguments use the batch syntax τ-Argus. #### Minimalist example ```{r rtauargus_ex1} -rtauargus( +micro_rtauargus( microdata = microdata, explanatory_vars = "V1", safety_rules = "FREQ(3,10)", @@ -206,7 +206,7 @@ data.frame), which we can continue to manipulate. ```{r rtauargus_ex2} secret1 <- - rtauargus( + micro_rtauargus( microdata = microdata, explanatory_vars = list("V1", c("V1", "V2")), # 2 tabs (V1, V1xV2) safety_rules = "FREQ(3,10)", @@ -242,7 +242,7 @@ The following section provides details on the operation of the system. ## Process decomposition -Using the `rtauargus` function is convenient in terms of lines of code to +Using the `micro_rtauargus` function is convenient in terms of lines of code to enter. However, this approach can be quite cumbersome if the tables to be secreted have in common a certain number of characteristics. @@ -253,7 +253,7 @@ examples seen so far, where all intermediate files were systematically regenerated, even if their content was identical. To do this, we can use the functions called successively by the -function `rtauargus`, namely : +function `micro_rtauargus`, namely : > **[micro_asc_rda()](#function-micro_asc_rda)   →  [micro_arb()](#function-micro_arb)   →  @@ -282,7 +282,8 @@ micro_asc_rda(microdata, asc_filename = "Z:/microdata.asc") Content of the files created : ```{r file_show_asc_rda, eval = FALSE} file.show("Z:/microdata.asc", "Z:/microdata.rda", pager = "internal") -```` +``` + ```{r show_asc_rda, echo = FALSE, comment = ""} microdata.asc <- c("", readLines("Z:/microdata.asc")) microdata.rda <- c("", readLines("Z:/microdata.rda")) @@ -325,7 +326,8 @@ micro_asc_rda( ``` Content of the files created : ```{r file_show_asc_rda2, ref.label = 'file_show_asc_rda', eval = FALSE} -```` +``` + ```{r show_asc_rda2, ref.label = 'show_asc_rda', echo = FALSE, comment = ""} ``` @@ -377,7 +379,7 @@ micro_arb( Content of the file created : ```{r file_show_arb, eval = FALSE} file.show("Z:/microdata.arb", pager = "internal") -```` +``` ```{r show_arb, echo = FALSE, comment = ""} cat(readLines("Z:/microdata.arb"), sep = "\n") @@ -411,7 +413,7 @@ micro_arb( ``` ```{r file_show_arb2, ref.label = 'file_show_arb', eval = FALSE} -```` +``` ```{r show_arb2, ref.label = 'show_arb', echo = FALSE, eval = TRUE, comment = ""} ``` @@ -652,7 +654,7 @@ getting started: - creation of hierarchical variables from microdata (function `write_hrc`). - taking into account of apriori file (argument `apriori` in `micro_arb`) -- use of the `rtauargus` function from microdata already under +- use of the `micro_rtauargus` function from microdata already under as a text file (not data.frame) - optimized launch of a large number of crosses with the same parameters (primary secrecy rules, secondary secrecy method, ...) : function @@ -682,7 +684,7 @@ For any return or error report, please use - Updated by: **Julien Jamme** - Last update: **`r format(Sys.time(), "%d/%m/%Y")`** - Version of rtauargus used: **`r packageVersion("rtauargus")`** -- Version of τ-Argus used : **TauArgus 4.2.2b1** +- Version of τ-Argus used : **TauArgus 4.2.3** - R version used : **`r packageVersion("base")`**

diff --git a/vignettes/rtauargus_micro_fr.Rmd b/vignettes/rtauargus_micro_fr.Rmd index 9283bb6..c7d9e13 100644 --- a/vignettes/rtauargus_micro_fr.Rmd +++ b/vignettes/rtauargus_micro_fr.Rmd @@ -49,7 +49,7 @@ l'écriture de batch figurent dans le manuel de référence de τ-Argus. > _Le package a été développé sur la base des versions open source de τ > -Argus (versions 4.1 et supérieures), en particulier la dernière version -> disponible lors du développement (4.1.7)._ +> disponible lors du développement (4.2.3)._ > > _**Il n'est pas compatible avec la version 3.5.**_ > @@ -119,10 +119,13 @@ installer au préalable (entre parenthèses les versions minimales requises). ``` • purrr (>= 0.2), • dplyr (>= 0.7), +• data.table, • gdata, • stringr, • rlang, -• zoo +• zoo, +• sdcHierarchies, +• lifecycle ``` ### Package @@ -156,7 +159,7 @@ message indique que cet emplacement est inconnu, on le modifie donc : ```r -loc_tauargus <- "Y:/Logiciels/TauArgus/TauArgus4.2.2b1/TauArgus.exe" +loc_tauargus <- "Y:/Logiciels/TauArgus/TauArgus4.2.3/TauArgus.exe" options(rtauargus.tauargus_exe = loc_tauargus) ``` @@ -197,29 +200,7 @@ rtauargus( safety_rules = "FREQ(3,10)", suppress = "GH(1,100)" ) -#> Start of batch procedure; file: C:\Users\TMM7AN\AppData\Local\Temp\RtmpUjZH4W\RTA_81206895bcf.arb -#> "C:\Users\TMM7AN\AppData\Local\Temp\RtmpUjZH4W\RTA_812031f6796d.asc" -#> "C:\Users\TMM7AN\AppData\Local\Temp\RtmpUjZH4W\RTA_812031f6796d.rda" -#> "V1"|""|| -#> FREQ(3,10) -#> -#> Start explore file: C:\Users\TMM7AN\AppData\Local\Temp\RtmpUjZH4W\RTA_812031f6796d.asc -#> Start computing tables -#> Table: V1 | has been specified -#> Tables have been computed -#> Micro data file read; processing time 0 seconds -#> Tables from microdata have been read -#> GH(1,100) -#> Start of the hypercube protection for table V1 | -#> End of hypercube protection. Time used 1 seconds -#> Number of suppressions: 1 -#> The hypercube procedure has been applied -#> 1 cells have been suppressed -#> (1,4,AS+SE+,"C:\Users\TMM7AN\AppData\Local\Temp\RtmpUjZH4W\RTA_8120677543.sbs") -#> Table: V1 | has been written -#> Output file name: C:\Users\TMM7AN\AppData\Local\Temp\RtmpUjZH4W\RTA_8120677543.sbs -#> End of TauArgus run -#> NULL +#> Error in rtauargus(microdata = donnees, explanatory_vars = "V1", safety_rules = "FREQ(3,10)", : could not find function "rtauargus" ``` Comme aucune variable de réponse n'est renseignée, un comptage est effectué @@ -250,40 +231,7 @@ secret1 <- output_options = c("", "AS+"), # pas de statut pour le 1er tableau show_batch_console = FALSE # pour masquer le journal ) -#> Start of batch procedure; file: C:\Users\TMM7AN\AppData\Local\Temp\RtmpUjZH4W\RTA_8120162912aa.arb -#> "C:\Users\TMM7AN\AppData\Local\Temp\RtmpUjZH4W\RTA_81202ad4acf.asc" -#> "C:\Users\TMM7AN\AppData\Local\Temp\RtmpUjZH4W\RTA_81202ad4acf.rda" -#> "V1"|""|| -#> FREQ(3,10) -#> "V1""V2"|""|| -#> FREQ(3,10) -#> -#> Start explore file: C:\Users\TMM7AN\AppData\Local\Temp\RtmpUjZH4W\RTA_81202ad4acf.asc -#> Start computing tables -#> Table: V1 | has been specified -#> Table: V1 x V2 | has been specified -#> Tables have been computed -#> Micro data file read; processing time 0 seconds -#> Tables from microdata have been read -#> GH(1,100) -#> Start of the hypercube protection for table V1 | -#> End of hypercube protection. Time used 1 seconds -#> Number of suppressions: 1 -#> The hypercube procedure has been applied -#> 1 cells have been suppressed -#> (1,4,,"C:\Users\TMM7AN\AppData\Local\Temp\RtmpUjZH4W\RTA_812020b86297.sbs") -#> Table: V1 | has been written -#> Output file name: C:\Users\TMM7AN\AppData\Local\Temp\RtmpUjZH4W\RTA_812020b86297.sbs -#> GH(2,100) -#> Start of the hypercube protection for table V1 x V2 | -#> End of hypercube protection. Time used 1 seconds -#> Number of suppressions: 2 -#> The hypercube procedure has been applied -#> 2 cells have been suppressed -#> (2,4,AS+,"C:\Users\TMM7AN\AppData\Local\Temp\RtmpUjZH4W\RTA_81202d404386.sbs") -#> Table: V1 x V2 | has been written -#> Output file name: C:\Users\TMM7AN\AppData\Local\Temp\RtmpUjZH4W\RTA_81202d404386.sbs -#> End of TauArgus run +#> Error in rtauargus(microdata = donnees, explanatory_vars = list("V1", : could not find function "rtauargus" secret1 #> NULL @@ -359,6 +307,7 @@ Contenu des fichiers créés : file.show("Z:/donnees.asc", "Z:/donnees.rda", pager = "internal") ``` + ``` donnees.asc donnees.rda @@ -410,6 +359,7 @@ Contenu des fichiers créés : file.show("Z:/donnees.asc", "Z:/donnees.rda", pager = "internal") ``` + ``` donnees.asc donnees.rda @@ -443,10 +393,10 @@ dans un dossier temporaire. Il est possible de récupérer l'emplacement et le n noms_asc_rda <- micro_asc_rda(donnees) noms_asc_rda #> $asc_filename -#> [1] "C:\\Users\\TMM7AN\\AppData\\Local\\Temp\\RtmpUjZH4W\\RTA_812041431d18.asc" +#> [1] "C:\\Users\\TMM7AN.AD\\AppData\\Local\\Temp\\Rtmpu6McgM\\RTA_8a4827bb513c.asc" #> #> $rda_filename -#> [1] "C:\\Users\\TMM7AN\\AppData\\Local\\Temp\\RtmpUjZH4W\\RTA_812041431d18.rda" +#> [1] "C:\\Users\\TMM7AN.AD\\AppData\\Local\\Temp\\Rtmpu6McgM\\RTA_8a4827bb513c.rda" ```

@@ -486,15 +436,14 @@ micro_arb( Contenu du fichier créé : -`````r +```r file.show("Z:/donnees.arb", pager = "internal") -```` +``` -````` ``` // Batch generated by package *rtauargus* -// (2023-01-06 16:42:02 CET) +// (2024-01-11 16:39:53 CET) "Z:\donnees.asc" "Z:\donnees.rda" "V1"|""|| @@ -533,15 +482,14 @@ micro_arb( ``` -`````r +```r file.show("Z:/donnees.arb", pager = "internal") -```` +``` -````` ``` // Batch generated by package *rtauargus* -// (2023-01-06 16:42:02 CET) +// (2024-01-11 16:39:53 CET) "Z:\donnees.asc" "Z:\donnees.rda" "V1"|"VAL"|| @@ -550,9 +498,9 @@ file.show("Z:/donnees.arb", pager = "internal") FREQ(3,10)|NK(1,85) GH(1,100) - (1,4,AS+SE+,"C:\Users\TMM7AN\AppData\Local\Temp\RtmpUjZH4W\RTA_81207dcc37da.sbs") + (1,4,AS+SE+,"C:\Users\TMM7AN.AD\AppData\Local\Temp\Rtmpu6McgM\RTA_8a4869675510.sbs") GH(2,100) - (2,4,AS+SE+,"C:\Users\TMM7AN\AppData\Local\Temp\RtmpUjZH4W\RTA_8120449fd77.sbs") + (2,4,AS+SE+,"C:\Users\TMM7AN.AD\AppData\Local\Temp\Rtmpu6McgM\RTA_8a48384e7b7f.sbs") ``` #### Fichiers temporaires @@ -574,11 +522,11 @@ infos_arb <- ) infos_arb #> $arb_filename -#> [1] "C:\\Users\\TMM7AN\\AppData\\Local\\Temp\\RtmpUjZH4W\\RTA_81204a096641.arb" +#> [1] "C:\\Users\\TMM7AN.AD\\AppData\\Local\\Temp\\Rtmpu6McgM\\RTA_8a484da665e2.arb" #> #> $output_names -#> [1] "C:\\Users\\TMM7AN\\AppData\\Local\\Temp\\RtmpUjZH4W\\RTA_812068be1986.sbs" -#> [2] "C:\\Users\\TMM7AN\\AppData\\Local\\Temp\\RtmpUjZH4W\\RTA_812075c6777b.sbs" +#> [1] "C:\\Users\\TMM7AN.AD\\AppData\\Local\\Temp\\Rtmpu6McgM\\RTA_8a48649a5045.sbs" +#> [2] "C:\\Users\\TMM7AN.AD\\AppData\\Local\\Temp\\Rtmpu6McgM\\RTA_8a4834183530.sbs" ```

@@ -616,18 +564,18 @@ secret2 <- run_arb("Z:/donnees.arb") #> Number of suppressions: 1 #> The hypercube procedure has been applied #> 1 cells have been suppressed -#> (1,4,AS+SE+,"C:\Users\TMM7AN\AppData\Local\Temp\RtmpUjZH4W\RTA_81207dcc37da.sbs") +#> (1,4,AS+SE+,"C:\Users\TMM7AN.AD\AppData\Local\Temp\Rtmpu6McgM\RTA_8a4869675510.sbs") #> Table: V1 | VAL has been written -#> Output file name: C:\Users\TMM7AN\AppData\Local\Temp\RtmpUjZH4W\RTA_81207dcc37da.sbs +#> Output file name: C:\Users\TMM7AN.AD\AppData\Local\Temp\Rtmpu6McgM\RTA_8a4869675510.sbs #> GH(2,100) #> Start of the hypercube protection for table V2 x V3 | VAL #> End of hypercube protection. Time used 1 seconds #> Number of suppressions: 2 #> The hypercube procedure has been applied #> 2 cells have been suppressed -#> (2,4,AS+SE+,"C:\Users\TMM7AN\AppData\Local\Temp\RtmpUjZH4W\RTA_8120449fd77.sbs") +#> (2,4,AS+SE+,"C:\Users\TMM7AN.AD\AppData\Local\Temp\Rtmpu6McgM\RTA_8a48384e7b7f.sbs") #> Table: V2 x V3 | VAL has been written -#> Output file name: C:\Users\TMM7AN\AppData\Local\Temp\RtmpUjZH4W\RTA_8120449fd77.sbs +#> Output file name: C:\Users\TMM7AN.AD\AppData\Local\Temp\Rtmpu6McgM\RTA_8a48384e7b7f.sbs #> End of TauArgus run secret2 @@ -736,6 +684,7 @@ Les options disponibles ainsi que leurs valeurs par défaut sont listées ci-dessous : + |Option |Valeur par défaut |Type | Fonction concernée| |:----------------------------|:------------------------------------|:---------|------------------:| |rtauargus.decimals |0 |integer | tab_rda| @@ -760,6 +709,8 @@ ci-dessous : |rtauargus.import |FALSE |logical | run_arb| |rtauargus.is_tabular |TRUE |logical | run_arb| + + ### Affichage Pour afficher les options définies pour la session en cours : @@ -804,7 +755,7 @@ rtauargus_options() #> [1] FALSE #> #> $rtauargus.tauargus_exe -#> [1] "Y:/Logiciels/TauArgus/TauArgus4.2.2b1/TauArgus.exe" +#> [1] "Y:/Logiciels/TauArgus/TauArgus4.2.3/TauArgus.exe" #> #> $rtauargus.totcode #> [1] "Total" @@ -849,7 +800,7 @@ str(rtauargus_options()) #> $ rtauargus.response_var : chr "VAL" #> $ rtauargus.separator : chr "," #> $ rtauargus.show_batch_console: logi FALSE -#> $ rtauargus.tauargus_exe : chr "Y:/Logiciels/TauArgus/TauArgus4.2.2b1/TauArgus.exe" +#> $ rtauargus.tauargus_exe : chr "Y:/Logiciels/TauArgus/TauArgus4.2.3/TauArgus.exe" #> $ rtauargus.totcode : chr "Total" #> $ rtauargus.weighted : logi FALSE ``` @@ -878,7 +829,7 @@ str(rtauargus_options()) #> $ rtauargus.response_var : chr "" #> $ rtauargus.separator : chr "," #> $ rtauargus.show_batch_console: logi FALSE -#> $ rtauargus.tauargus_exe : chr "Y:/Logiciels/TauArgus/TauArgus4.2.2b1/TauArgus.exe" +#> $ rtauargus.tauargus_exe : chr "Y:/Logiciels/TauArgus/TauArgus4.2.3/TauArgus.exe" #> $ rtauargus.totcode : chr "Total" #> $ rtauargus.weighted : logi FALSE ``` @@ -960,10 +911,10 @@ Pour tout retour ou signalement d'erreur, utiliser de préférence #### À propos de cette vignette - Auteur : **Pierre-Yves Berrard** -- Dernière mise à jour : **06/01/2023** -- Version de rtauargus utilisée : **1.1.1** -- Version de τ-Argus utilisée : **TauArgus 4.2.2b1** -- Version de R utilisée : **4.1.3** +- Dernière mise à jour : **11/01/2024** +- Version de rtauargus utilisée : **1.2.0** +- Version de τ-Argus utilisée : **TauArgus 4.2.3** +- Version de R utilisée : **4.2.3**

sommaire ↑ diff --git a/vignettes/rtauargus_micro_fr.Rmd.orig b/vignettes/rtauargus_micro_fr.Rmd.orig index 277cdce..66bfad9 100644 --- a/vignettes/rtauargus_micro_fr.Rmd.orig +++ b/vignettes/rtauargus_micro_fr.Rmd.orig @@ -56,7 +56,7 @@ l'écriture de batch figurent dans le manuel de référence de τ-Argus. > _Le package a été développé sur la base des versions open source de τ > -Argus (versions 4.1 et supérieures), en particulier la dernière version -> disponible lors du développement (4.1.7)._ +> disponible lors du développement (4.2.3)._ > > _**Il n'est pas compatible avec la version 3.5.**_ > @@ -146,7 +146,7 @@ prédéfini. Il est possible de le changer pour toute la durée de la session R. message indique que cet emplacement est inconnu, on le modifie donc : ```{r opt_exe} -loc_tauargus <- "Y:/Logiciels/TauArgus/TauArgus4.2.2b1/TauArgus.exe" +loc_tauargus <- "Y:/Logiciels/TauArgus/TauArgus4.2.3/TauArgus.exe" options(rtauargus.tauargus_exe = loc_tauargus) ``` @@ -285,7 +285,8 @@ micro_asc_rda(donnees, asc_filename = "Z:/donnees.asc") Contenu des fichiers créés : ```{r file_show_asc_rda, eval = FALSE} file.show("Z:/donnees.asc", "Z:/donnees.rda", pager = "internal") -```` +``` + ```{r show_asc_rda, echo = FALSE, comment = ""} donnees.asc <- c("", readLines("Z:/donnees.asc")) donnees.rda <- c("", readLines("Z:/donnees.rda")) @@ -328,7 +329,8 @@ micro_asc_rda( ``` Contenu des fichiers créés : ```{r file_show_asc_rda2, ref.label = 'file_show_asc_rda', eval = FALSE} -```` +``` + ```{r show_asc_rda2, ref.label = 'show_asc_rda', echo = FALSE, comment = ""} ``` @@ -380,7 +382,7 @@ micro_arb( Contenu du fichier créé : ```{r file_show_arb, eval = FALSE} file.show("Z:/donnees.arb", pager = "internal") -```` +``` ```{r show_arb, echo = FALSE, comment = ""} cat(readLines("Z:/donnees.arb"), sep = "\n") @@ -414,7 +416,7 @@ micro_arb( ``` ```{r file_show_arb2, ref.label = 'file_show_arb', eval = FALSE} -```` +``` ```{r show_arb2, ref.label = 'show_arb', echo = FALSE, eval = TRUE, comment = ""} ``` @@ -684,7 +686,7 @@ Pour tout retour ou signalement d'erreur, utiliser de préférence - Auteur : **Pierre-Yves Berrard** - Dernière mise à jour : **`r format(Sys.time(), "%d/%m/%Y")`** - Version de rtauargus utilisée : **`r packageVersion("rtauargus")`** -- Version de τ-Argus utilisée : **TauArgus 4.2.2b1** +- Version de τ-Argus utilisée : **TauArgus 4.2.3** - Version de R utilisée : **`r packageVersion("base")`**

diff --git a/vignettes/split_tab_fr.Rmd b/vignettes/split_tab_fr.Rmd new file mode 100644 index 0000000..0dd431b --- /dev/null +++ b/vignettes/split_tab_fr.Rmd @@ -0,0 +1,430 @@ +--- +title: "Comment poser du secret sur des tableaux à 4 ou 5 dimensions avec `rtauargus`?" +subtitle:

![logo rtauargus](../man/figures/rtauargus_logo_small.png)

+output: + rmarkdown::html_vignette: + toc: true + toc_depth: 2 + number_section: true + fig_caption: true +vignette: > + %\VignetteIndexEntry{Gestion des tableaux à 4 ou 5 dimensions} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + + + + + +Les algorithmes de pose du secret secondaire tels qu'Optimal ou Modular implémentés +dans `Tau-Argus` reposent sur un programme d'optimisation sous contraintes. Le +nombre de ces constraintes devient très difficile à gérer - du moins dans un temps +raisonnable - pour `Tau-Argus`. La pose du secret +sur un tableau à 5 dimensions directement avec `Tau-Argus` est impossible et les résultats +fournis par le logiciel lors du traitement d'un tableau de 4 dimensions sont +accompagnés d'un avertissement. Par ailleurs, un tableau à 4 dimensions dont l'une +est hiérarchique est un problème, en général, très difficile pour `Tau-Argus`. + +Le package `rtauargus` offre désormais la possibilité de protéger des tableaux +de 4 à 5 dimensions. Pour les tableaux de 4 dimensions, la protection est +même plus rapide et permet de traiter des tableaux plus grands encore (en nombre +de lignes). Certains tableaux demeurent néanmoins trop complexes pour `Tau-Argus`. + +Cette nouvelle fonctionnalité est mise à disposition dans les fonctions +`tab_rtauargus` et `tab_multi_manager` avec l'argument `split_tab`. +Quand il est renseigné à `TRUE`, les fonctions réduisent +la dimension d'un tableau de 4 ou 5 dimensions en construisant un certain nombre +de tableaux liés de 3 dimensions chacun. Ces tableaux contiennent exactement les +mêmes cellules que le tableau original. Puis, la fonction `tab_multi_manager` est +appelée pour organiser la pose du secret sur l'ensemble de ces tableaux. +`A la fin du processus, on récupère la table de départ - avec ses dimensions +originales - augmentée des informations sur le secret secondaire. + +Ainsi, point important à retenir, la pose du secret secondaire est toujours +réalisée par les algorithmes de `Tau-Argus`, le package `rtauargus` ne fait que +découper le tableau en sous-tableaux à trois dimensions et assurer la cohérence +du secret entre ces différents sous-tableaux liés entre eux. + +Cette méthode est susceptible de générer plus de secret qu'une attaque directe +du problème. Mais celle-ci étant impossible dans les cas visés, c'est un moindre mal. + +Pour utiliser cette fonctionnalité, il est nécessaire que l'utilisateur +pose de lui-même le secret primaire en amont. + + +### Comment la réduction d'un tableau à 4 ou 5 dimensions est-elle réalisée ? + +Pour passer de 5 à 3 dimensions, on utilise deux fois de suite la méthode +utilisée pour réduire un tableau en sous-tableaux ayant une dimension de moins. + +Le passage de 4 à 3 dimensions consiste d'abord à fusionner deux des quatre +dimensions du tableau original, c'est-à-dire qu'on remplace deux des variables +de départ par une seule dont les modalités sont la fusion des modalités des +deux variables choisies. + +Dans l'exemple ci-dessous, on fusionne par exemple les deux variables `SEX` et +`AGE` pour n'en faire qu'une seule appelée `SEX_AGE`. + +
+base_tab +
+ +Mais, la nouvelle variable contient des modalités non-emboîtées entre elles, +comme, par exemple, les modalités `Femme_Total` et `Ensemble_Adulte`. + +
+ Total_Ensemble = Total_Adulte + Total_Enfant +
+ +
+ Total_Ensemble = Femme_Ensemble + Homme_Ensemble +
+ +Or, nous savons gérer la présence de hiérarchies non-emboîtées dans un tableau. +Il suffit, en effet, de construire les sous-tableaux de même structure de telle +sorte que dans chacun de ces sous-tableaux la variable incriminée soit +restreinte à une partie parfaitement emboîtée de ses modalités. +Pour poser le secret sur les sous-tableaux liés engendrés, il est fait appel +à la fonction `tab_multi_manager`, spécialement conçue pour assurer la cohérence +du secret entre tableaux liés. + +Ici, la fusion nécessite la construction de deux sous-tableaux, chacun ayant une +variable `SEX__AGE` parfaitement hiérarchique. Le premier rassemble les modalités +`Total_Ensemble`, `Total_Adulte` et `Total_Enfant`, le second +`Total_Ensemble`, `Femme_Ensemble` et `Homme_Ensemble`. + +Cet exemple est le plus simple qu'il soit: en fusionnant deux variables +non hiérarchiques, on est amené à générer deux sous-tableaux. + +Lorsque l'une des variables fusionnées est elle-même hiérarchique, +la construction des sous-tableaux s'appuie sur les différents sous-totaux +présents dans la hiérarchie, comme indiqué dans la figure ci-dessous: + +
+ hierarchie +
+ +### Pour aller plus loin + +- [Article présentant la méthode et des résultats de simulation](https://github.com/InseeFrLab/dims_reduction_tables_workshop_20231215) + + +### Les principaux arguments ajoutés à `tab_rtauargus()` et `tab_multi_manager()` + +Pour activer le découpage des tableaux de dimension 4 ou 5 en sous-tableaux +de 3 dimensions, l'utilisateur doit activer l'argument `split_tab` en le passant +à `TRUE`. Cette valeur est sa valeur par défaut. Ainsi, l'utilisateur veillera +à passer cet argument à `FALSE` s'il souhaite qu'un tableau à 4 dimensions soit +traité directement par `Tau-Argus`. + +L'argument `nb_tab_option` permet de préciser la façon dont le tableau est découpé: + +- L'option `"min"` consiste à construire le minimum de tables à 3 dimensions +lors de la fusion. Ce sont donc des variables non hiérarchiques qui seront +fusionnées en priorité. +- L'option `"max"` au contraire construit le maximum de tables à 3 dimensions possible +lors de la fusion. Ce sont donc des variables hiérarchiques qui seront +fusionnées en priorité. Cette option est intéressante pour réduire la taille +des sous-tableaux produits et donc augmenter les chances que `Tau-Argus` puisse +les traiter. L'inconvénient de cette option est qu'elle peut générer un très +grand nombre de tableaux, nombre dépendant du nombre de noeuds dans les hiérarchies. +- L'option `"smart"` va chercher à construire le minimum de tables à 3 dimensions +tout en cherchant à respecter une limite de taille (nombre de lignes) pour les +sous-tableaux construits. Cette limite est fixée par l'argument `limit`. Par +défaut, la valeur est fixée à `14 700` - choisie de par notre expérience. Si +la limite n'est pas atteignable, un avertissement est affiché mais cela ne bloque +pas la construction des tables et la pose du masque. + +Par défaut, l'argument `nb_tab_option` est fixé à `"smart"`. Il est déconseillé +de changer cette valeur dans un premier temps. + +- L'argument `nb_tab = "min"` est intéressant en termes de secret car c'est +l'option qui génèrera le moins de sur-secret puisqu'elle construira le moins +possible de sous-tableaux liés. Il peut être intéressant à utiliser si la table +de départ contient un nombre de lignes raisonnables, +c'est-à-dire si chaque dimension n'est pas trop ventilée. +- L'argument `nb_tab = "max"` créant le plus de tableaux possibles engendrera +nécessairement le plus de secret, mais elle pourra aussi convenir quand les +tableaux d'origine sont très longs et nécessitent d'être beaucoup découpés pour +que la pose du secret soit possible avec `Tau-Argus`. + +### Exemple + +Comme tout est géré par la fonction `tab_rtauargus`, la pose du secret sur un tableau +à 4 ou 5 dimensions n'ajoute pas de difficultés particulières pour l'utilisateur +en réalité. + +Nous présentons ici un exemple de pose de secret sur un tableau à 4 dimensions. + +#### Préparation des données + + +```r +library(tictoc) +library(dplyr) +library(rtauargus) +``` + + +```r +options( + rtauargus.tauargus_exe = + "Y:/Logiciels/TauArgus/TauArgus4.2.3/TauArgus.exe" +) +``` + + + +```r +data("datatest1") +str(datatest1) +#> tibble [689 x 12] (S3: tbl_df/tbl/data.frame) +#> $ treff : chr [1:689] "tr1" "tr2" "tr3" "Total" ... +#> $ cj : chr [1:689] "Total" "Total" "Total" "PA" ... +#> $ type_distrib : chr [1:689] "Total" "Total" "Total" "Total" ... +#> $ A10 : chr [1:689] "Total" "Total" "Total" "Total" ... +#> $ nb_obs : num [1:689] 227093 7803 227 1443 125904 ... +#> $ pizzas_tot : num [1:689] 5900938 6538420 6794859 15128 10168928 ... +#> $ pizzas_max : num [1:689] 135444 267177 852749 2756 523505 ... +#> $ is_secret_freq: logi [1:689] FALSE FALSE FALSE FALSE FALSE FALSE ... +#> $ is_secret_dom : logi [1:689] FALSE FALSE FALSE FALSE FALSE FALSE ... +#> $ is_secret_prim: logi [1:689] FALSE FALSE FALSE FALSE FALSE FALSE ... +#> $ nb_obs_rnd : num [1:689] 227094 7804 228 1444 125905 ... +#> $ pizzas_tot_abs: num [1:689] 5900938 6538420 6794859 15128 10168928 ... +#> - attr(*, ".internal.selfref")= +``` + + +#### Préparation des arguments pour `tab_rtauargus` + + +```r + +totcode <- c(treff ="Total",cj ="Total", A10 = "Total", type_distrib ="Total") +explanatory_vars<- names(totcode) +``` + +#### Pose du secret primaire + + +```r +datatest1_with_prim <- datatest1 %>% + mutate( + is_secret_freq = (nb_obs > 0 & nb_obs < 3), + is_secret_dom = (pizzas_tot != 0) & (pizzas_max > 0.85*pizzas_tot), + pizzas_tot= round(abs(pizzas_tot),2) + ) %>% + mutate( + is_secret_prim = is_secret_freq , + nb_obs = ceiling(nb_obs) + ) +``` + +#### Pose du secret secondaire avec `rtauargus` + +Pour poser le secret secondaire, on utilise la fonction `tab_rtauargus` avec +l'argument `split_tab = TRUE`. Ainsi, `Tau-Argus` n'aura pas à traiter directement +le secret sur la table à 4 dimensions. La fonction `tab_rtauargus` va +découper la table originale en un certain nombre de tables qui seront protégées +comme des tables liées avec un appel sous-jacent à `tab_multi_manager`. + + + +```r +tictoc::tic() +res_wi_split <- tab_rtauargus( + tabular = datatest1_with_prim, + files_name = "datatest1", + dir_name = "example_1", + explanatory_vars = explanatory_vars, + totcode = totcode, + value = "pizzas_tot", + freq = "nb_obs", + secret_var = "is_secret_prim", + verbose = TRUE, + split_tab = TRUE +) +#> +#> Reducing dims... +#> tab +#> +#> Choosing variables... +#> +#> Reducing from 4 to 3... +#> tab has generated 2 tables in total +#> +#> Spliting... +#> treff___cj +#> tab has generated 2 tables in total +#> +#> --- Current table to treat: tab1 --- +#> --- Current table to treat: tab2 --- +#> --- Current table to treat: tab1 --- +#> --- Current table to treat: tab2 --- +#> --- Current table to treat: tab1 --- +tictoc::toc() +#> 45.31 sec elapsed +``` + +La réduction de dimensions a conduit à construire deux sous-tableaux de 3 +dimensions à partir du tableau original. + + +```r +str(res_wi_split) +#> 'data.frame': 689 obs. of 17 variables: +#> $ treff : chr "Total" "tr1" "tr1" "tr1" ... +#> $ cj : chr "Total" "LL" "SP" "Total" ... +#> $ type_distrib : chr "1" "1" "1" "1" ... +#> $ A10 : chr "AZ" "AZ" "AZ" "AZ" ... +#> $ nb_obs : num 115 89 21 110 6 ... +#> $ pizzas_tot : num 8447 769 1301 532 7915 ... +#> $ pizzas_max : num 1471.2 13.8 96.4 96.4 1471.2 ... +#> $ is_secret_freq: logi FALSE FALSE FALSE FALSE FALSE FALSE ... +#> $ is_secret_dom : logi FALSE TRUE FALSE FALSE FALSE FALSE ... +#> $ is_secret_prim: logi FALSE FALSE FALSE FALSE FALSE FALSE ... +#> $ nb_obs_rnd : num 115 89 21 110 6 ... +#> $ pizzas_tot_abs: num 8447 769 1301 532 7915 ... +#> $ is_secret_1 : logi FALSE TRUE FALSE TRUE TRUE TRUE ... +#> $ is_secret_2 : logi FALSE TRUE TRUE TRUE TRUE TRUE ... +#> $ is_secret_3 : logi FALSE TRUE TRUE TRUE TRUE TRUE ... +#> $ is_secret_4 : logi FALSE TRUE TRUE TRUE TRUE TRUE ... +#> $ is_secret_5 : logi FALSE TRUE TRUE TRUE TRUE TRUE ... +``` + + +```r +stats_wi_split <- res_wi_split %>% + rename_with( ~"final_suppress", last_col()) %>% + mutate( + flag = case_when( + is_secret_freq ~ "A", + is_secret_dom ~ "B", + final_suppress ~ "D", + TRUE ~"V" + ) + ) %>% + count(flag) +stats_wi_split +#> flag n +#> 1 A 79 +#> 2 B 35 +#> 3 D 278 +#> 4 V 297 +``` + +Avec l'option de découpage du tableau de départ, la pose du secret sur +le tableau à 4 dimensions génère +278 suppressions secondaires. + +#### Comparaison avec une pose du secret sans réduire la dimension de la table + +`Tau-Argus` est en capacité de poser le secret secondaire sur un tableau à 4 +dimensions avec `Modular`, mais, en plus d'émettre un avertissement sur le +fait qu'avec ce nombre de dimensions, la qualité du résultat n'est pas garantie, +il est aussi beaucoup plus long à poser le secret secondaire que lorsqu'il +traite deux tables liées issues du tableau original. + + +```r +tictoc::tic() +res_wo_split <- tab_rtauargus( + tabular = datatest1_with_prim, + files_name = "datatest1", + dir_name = "example_1/wo_split", + explanatory_vars = explanatory_vars, + totcode = totcode, + value = "pizzas_tot", + freq = "nb_obs", + secret_var = "is_secret_prim", + verbose = TRUE, + split_tab = FALSE +) +#> Warning : +#> It is highly recommended to use split_tab = TRUE when using rtauargus with 4 or 5 dimensions tables. +#> It allows to split the table in several tables with 3 dimensions. +#> +#> With split_tab = FALSE, tauargus treats the table in 4 or 5 dimensions. +#> In this case, the secondary secret may not being optimal according to tauargus itself +#> and the process may take longer. +#> Start of batch procedure; file: Z:\SDC\OutilsConfidentialite\rtauargus\vignettes\example_1\wo_split\datatest1.arb +#> "Z:\SDC\OutilsConfidentialite\rtauargus\vignettes\example_1\wo_split\datatest1.tab" +#> "Z:\SDC\OutilsConfidentialite\rtauargus\vignettes\example_1\wo_split\datatest1.rda" +#> "treff""cj""A10""type_distrib"|"pizzas_tot"|| +#> MAN(10) +#> 1 +#> Tables have been read +#> "Z:\SDC\OutilsConfidentialite\rtauargus\vignettes\example_1\wo_split\datatest1.hst",1,",",0,0 +#> Apriory file: Z:\SDC\OutilsConfidentialite\rtauargus\vignettes\example_1\wo_split\datatest1.hst has been applied +#> to table: treff x cj x A10 x type_distrib | pizzas_tot +#> Apriori file Z:\SDC\OutilsConfidentialite\rtauargus\vignettes\example_1\wo_split\datatest1.hst has been read +#> MOD(1,5,1,0,0) +#> Start of the modular protection for table treff x cj x A10 x type_distrib | pizzas_tot +#> WARNING: The table has 4 dimensions. +#> Running Modular can take a lot of time and maybe it is difficult to obtain a correct result. +#> Please check the results carefully. +#> +#> End of modular protection. Time used 301 seconds +#> Number of suppressions: 291 +#> (1,4,,"Z:\SDC\OutilsConfidentialite\rtauargus\vignettes\example_1\wo_split\datatest1.csv") +#> Table: treff x cj x A10 x type_distrib | pizzas_tot has been written +#> Output file name: Z:\SDC\OutilsConfidentialite\rtauargus\vignettes\example_1\wo_split\datatest1.csv +#> End of TauArgus run +tictoc::toc() +#> 306.34 sec elapsed +``` + +Avec seulement 689 lignes et aucune variable hiérarchique, la pose directe du +secret prend environ 5 minutes à `Tau-Argus`. + + +```r +stats_wo_split <- res_wo_split %>% + mutate( + flag = case_when( + is_secret_freq ~ "A", + is_secret_dom ~ "B", + TRUE ~ Status + ) + ) %>% + count(flag) +stats_wo_split +#> flag n +#> 1 A 79 +#> 2 B 35 +#> 3 D 265 +#> 4 V 310 +``` + +La pose directe du secret sur le tableau à 4 dimensions génère +265 suppressions secondaires. + + +On peut observer que le traitement de la table à 4 dimensions directement +génère un masque un peu moins chargé en secret: 266 cellules en secret secondaire, +contre 278 avec une réduction de dimensions. + +Il est difficile d'anticiper cette différence puisqu'elle est très dépendante +de la structuration des données, de la présence du secret primaire au sein du +tableau original et de la façon dont les sous-tableaux vont être construits. + + +## Détail de la vignette + +- Authors: ** Wistan Pomel ** & ** Andre-Raymond Socard ** & +**Julien Jamme** & +**Nathanael Rastout** +- Last update: **11/01/2024** +- Version of rtauargus used: **1.2.0** +- Version of τ-Argus used : **TauArgus 4.2.3** +- R version used : **4.2.3** + +

+ summary ↑ +

+ diff --git a/vignettes/split_tab_fr.Rmd.orig b/vignettes/split_tab_fr.Rmd.orig new file mode 100644 index 0000000..5db881e --- /dev/null +++ b/vignettes/split_tab_fr.Rmd.orig @@ -0,0 +1,337 @@ +--- +title: "Comment poser du secret sur des tableaux à 4 ou 5 dimensions avec `rtauargus`?" +subtitle:

![logo rtauargus](../man/figures/rtauargus_logo_small.png)

+output: + rmarkdown::html_vignette: + toc: true + toc_depth: 2 + number_section: true + fig_caption: true +vignette: > + %\VignetteIndexEntry{Gestion des tableaux à 4 ou 5 dimensions} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + + + +```{r setup, include=FALSE} +knitr::opts_chunk$set( + echo = TRUE, + collapse = TRUE, + comment = "#>", + warning = FALSE +) +knitr::opts_knit$set(root.dir = getwd()) +``` + +Les algorithmes de pose du secret secondaire tels qu'Optimal ou Modular implémentés +dans `Tau-Argus` reposent sur un programme d'optimisation sous contraintes. Le +nombre de ces constraintes devient très difficile à gérer - du moins dans un temps +raisonnable - pour `Tau-Argus`. La pose du secret +sur un tableau à 5 dimensions directement avec `Tau-Argus` est impossible et les résultats +fournis par le logiciel lors du traitement d'un tableau de 4 dimensions sont +accompagnés d'un avertissement. Par ailleurs, un tableau à 4 dimensions dont l'une +est hiérarchique est un problème, en général, très difficile pour `Tau-Argus`. + +Le package `rtauargus` offre désormais la possibilité de protéger des tableaux +de 4 à 5 dimensions. Pour les tableaux de 4 dimensions, la protection est +même plus rapide et permet de traiter des tableaux plus grands encore (en nombre +de lignes). Certains tableaux demeurent néanmoins trop complexes pour `Tau-Argus`. + +Cette nouvelle fonctionnalité est mise à disposition dans les fonctions +`tab_rtauargus` et `tab_multi_manager` avec l'argument `split_tab`. +Quand il est renseigné à `TRUE`, les fonctions réduisent +la dimension d'un tableau de 4 ou 5 dimensions en construisant un certain nombre +de tableaux liés de 3 dimensions chacun. Ces tableaux contiennent exactement les +mêmes cellules que le tableau original. Puis, la fonction `tab_multi_manager` est +appelée pour organiser la pose du secret sur l'ensemble de ces tableaux. +`A la fin du processus, on récupère la table de départ - avec ses dimensions +originales - augmentée des informations sur le secret secondaire. + +Ainsi, point important à retenir, la pose du secret secondaire est toujours +réalisée par les algorithmes de `Tau-Argus`, le package `rtauargus` ne fait que +découper le tableau en sous-tableaux à trois dimensions et assurer la cohérence +du secret entre ces différents sous-tableaux liés entre eux. + +Cette méthode est susceptible de générer plus de secret qu'une attaque directe +du problème. Mais celle-ci étant impossible dans les cas visés, c'est un moindre mal. + +Pour utiliser cette fonctionnalité, il est nécessaire que l'utilisateur +pose de lui-même le secret primaire en amont. + + +### Comment la réduction d'un tableau à 4 ou 5 dimensions est-elle réalisée ? + +Pour passer de 5 à 3 dimensions, on utilise deux fois de suite la méthode +utilisée pour réduire un tableau en sous-tableaux ayant une dimension de moins. + +Le passage de 4 à 3 dimensions consiste d'abord à fusionner deux des quatre +dimensions du tableau original, c'est-à-dire qu'on remplace deux des variables +de départ par une seule dont les modalités sont la fusion des modalités des +deux variables choisies. + +Dans l'exemple ci-dessous, on fusionne par exemple les deux variables `SEX` et +`AGE` pour n'en faire qu'une seule appelée `SEX_AGE`. + +
+base_tab +
+ +Mais, la nouvelle variable contient des modalités non-emboîtées entre elles, +comme, par exemple, les modalités `Femme_Total` et `Ensemble_Adulte`. + +
+ Total_Ensemble = Total_Adulte + Total_Enfant +
+ +
+ Total_Ensemble = Femme_Ensemble + Homme_Ensemble +
+ +Or, nous savons gérer la présence de hiérarchies non-emboîtées dans un tableau. +Il suffit, en effet, de construire les sous-tableaux de même structure de telle +sorte que dans chacun de ces sous-tableaux la variable incriminée soit +restreinte à une partie parfaitement emboîtée de ses modalités. +Pour poser le secret sur les sous-tableaux liés engendrés, il est fait appel +à la fonction `tab_multi_manager`, spécialement conçue pour assurer la cohérence +du secret entre tableaux liés. + +Ici, la fusion nécessite la construction de deux sous-tableaux, chacun ayant une +variable `SEX__AGE` parfaitement hiérarchique. Le premier rassemble les modalités +`Total_Ensemble`, `Total_Adulte` et `Total_Enfant`, le second +`Total_Ensemble`, `Femme_Ensemble` et `Homme_Ensemble`. + +Cet exemple est le plus simple qu'il soit: en fusionnant deux variables +non hiérarchiques, on est amené à générer deux sous-tableaux. + +Lorsque l'une des variables fusionnées est elle-même hiérarchique, +la construction des sous-tableaux s'appuie sur les différents sous-totaux +présents dans la hiérarchie, comme indiqué dans la figure ci-dessous: + +
+ hierarchie +
+ +### Pour aller plus loin + +- [Article présentant la méthode et des résultats de simulation](https://github.com/InseeFrLab/dims_reduction_tables_workshop_20231215) + + +### Les principaux arguments ajoutés à `tab_rtauargus()` et `tab_multi_manager()` + +Pour activer le découpage des tableaux de dimension 4 ou 5 en sous-tableaux +de 3 dimensions, l'utilisateur doit activer l'argument `split_tab` en le passant +à `TRUE`. Cette valeur est sa valeur par défaut. Ainsi, l'utilisateur veillera +à passer cet argument à `FALSE` s'il souhaite qu'un tableau à 4 dimensions soit +traité directement par `Tau-Argus`. + +L'argument `nb_tab_option` permet de préciser la façon dont le tableau est découpé: + +- L'option `"min"` consiste à construire le minimum de tables à 3 dimensions +lors de la fusion. Ce sont donc des variables non hiérarchiques qui seront +fusionnées en priorité. +- L'option `"max"` au contraire construit le maximum de tables à 3 dimensions possible +lors de la fusion. Ce sont donc des variables hiérarchiques qui seront +fusionnées en priorité. Cette option est intéressante pour réduire la taille +des sous-tableaux produits et donc augmenter les chances que `Tau-Argus` puisse +les traiter. L'inconvénient de cette option est qu'elle peut générer un très +grand nombre de tableaux, nombre dépendant du nombre de noeuds dans les hiérarchies. +- L'option `"smart"` va chercher à construire le minimum de tables à 3 dimensions +tout en cherchant à respecter une limite de taille (nombre de lignes) pour les +sous-tableaux construits. Cette limite est fixée par l'argument `limit`. Par +défaut, la valeur est fixée à `14 700` - choisie de par notre expérience. Si +la limite n'est pas atteignable, un avertissement est affiché mais cela ne bloque +pas la construction des tables et la pose du masque. + +Par défaut, l'argument `nb_tab_option` est fixé à `"smart"`. Il est déconseillé +de changer cette valeur dans un premier temps. + +- L'argument `nb_tab = "min"` est intéressant en termes de secret car c'est +l'option qui génèrera le moins de sur-secret puisqu'elle construira le moins +possible de sous-tableaux liés. Il peut être intéressant à utiliser si la table +de départ contient un nombre de lignes raisonnables, +c'est-à-dire si chaque dimension n'est pas trop ventilée. +- L'argument `nb_tab = "max"` créant le plus de tableaux possibles engendrera +nécessairement le plus de secret, mais elle pourra aussi convenir quand les +tableaux d'origine sont très longs et nécessitent d'être beaucoup découpés pour +que la pose du secret soit possible avec `Tau-Argus`. + +### Exemple + +Comme tout est géré par la fonction `tab_rtauargus`, la pose du secret sur un tableau +à 4 ou 5 dimensions n'ajoute pas de difficultés particulières pour l'utilisateur +en réalité. + +Nous présentons ici un exemple de pose de secret sur un tableau à 4 dimensions. + +#### Préparation des données + +```{r} +library(tictoc) +library(dplyr) +library(rtauargus) +``` + +```{r} +options( + rtauargus.tauargus_exe = + "Y:/Logiciels/TauArgus/TauArgus4.2.3/TauArgus.exe" +) +``` + + +```{r} +data("datatest1") +str(datatest1) +``` + + +#### Préparation des arguments pour `tab_rtauargus` + +```{r} + +totcode <- c(treff ="Total",cj ="Total", A10 = "Total", type_distrib ="Total") +explanatory_vars<- names(totcode) + +``` + +#### Pose du secret primaire + +```{r} +datatest1_with_prim <- datatest1 %>% + mutate( + is_secret_freq = (nb_obs > 0 & nb_obs < 3), + is_secret_dom = (pizzas_tot != 0) & (pizzas_max > 0.85*pizzas_tot), + pizzas_tot= round(abs(pizzas_tot),2) + ) %>% + mutate( + is_secret_prim = is_secret_freq , + nb_obs = ceiling(nb_obs) + ) +``` + +#### Pose du secret secondaire avec `rtauargus` + +Pour poser le secret secondaire, on utilise la fonction `tab_rtauargus` avec +l'argument `split_tab = TRUE`. Ainsi, `Tau-Argus` n'aura pas à traiter directement +le secret sur la table à 4 dimensions. La fonction `tab_rtauargus` va +découper la table originale en un certain nombre de tables qui seront protégées +comme des tables liées avec un appel sous-jacent à `tab_multi_manager`. + + +```{r} +tictoc::tic() +res_wi_split <- tab_rtauargus( + tabular = datatest1_with_prim, + files_name = "datatest1", + dir_name = "example_1", + explanatory_vars = explanatory_vars, + totcode = totcode, + value = "pizzas_tot", + freq = "nb_obs", + secret_var = "is_secret_prim", + verbose = TRUE, + split_tab = TRUE +) +tictoc::toc() +``` + +La réduction de dimensions a conduit à construire deux sous-tableaux de 3 +dimensions à partir du tableau original. + +```{r} +str(res_wi_split) +``` + +```{r} +stats_wi_split <- res_wi_split %>% + rename_with( ~"final_suppress", last_col()) %>% + mutate( + flag = case_when( + is_secret_freq ~ "A", + is_secret_dom ~ "B", + final_suppress ~ "D", + TRUE ~"V" + ) + ) %>% + count(flag) +stats_wi_split +``` + +Avec l'option de découpage du tableau de départ, la pose du secret sur +le tableau à 4 dimensions génère +`r stats_wi_split %>% filter(flag == "D") %>% pull(n)` suppressions secondaires. + +#### Comparaison avec une pose du secret sans réduire la dimension de la table + +`Tau-Argus` est en capacité de poser le secret secondaire sur un tableau à 4 +dimensions avec `Modular`, mais, en plus d'émettre un avertissement sur le +fait qu'avec ce nombre de dimensions, la qualité du résultat n'est pas garantie, +il est aussi beaucoup plus long à poser le secret secondaire que lorsqu'il +traite deux tables liées issues du tableau original. + +```{r} +tictoc::tic() +res_wo_split <- tab_rtauargus( + tabular = datatest1_with_prim, + files_name = "datatest1", + dir_name = "example_1/wo_split", + explanatory_vars = explanatory_vars, + totcode = totcode, + value = "pizzas_tot", + freq = "nb_obs", + secret_var = "is_secret_prim", + verbose = TRUE, + split_tab = FALSE +) +tictoc::toc() +``` + +Avec seulement 689 lignes et aucune variable hiérarchique, la pose directe du +secret prend environ 5 minutes à `Tau-Argus`. + +```{r} +stats_wo_split <- res_wo_split %>% + mutate( + flag = case_when( + is_secret_freq ~ "A", + is_secret_dom ~ "B", + TRUE ~ Status + ) + ) %>% + count(flag) +stats_wo_split +``` + +La pose directe du secret sur le tableau à 4 dimensions génère +`r stats_wo_split %>% filter(flag == "D") %>% pull(n)` suppressions secondaires. + + +On peut observer que le traitement de la table à 4 dimensions directement +génère un masque un peu moins chargé en secret: 266 cellules en secret secondaire, +contre 278 avec une réduction de dimensions. + +Il est difficile d'anticiper cette différence puisqu'elle est très dépendante +de la structuration des données, de la présence du secret primaire au sein du +tableau original et de la façon dont les sous-tableaux vont être construits. + + +## Détail de la vignette + +- Authors: ** Wistan Pomel ** & ** Andre-Raymond Socard ** & +**Julien Jamme** & +**Nathanael Rastout** +- Last update: **`r format(Sys.time(), "%d/%m/%Y")`** +- Version of rtauargus used: **`r packageVersion("rtauargus")`** +- Version of τ-Argus used : **TauArgus 4.2.3** +- R version used : **`r packageVersion("base")`** + +

+ summary ↑ +

+ diff --git a/vignettes/tab_SEX_AGE.png b/vignettes/tab_SEX_AGE.png new file mode 100644 index 0000000..48e0611 Binary files /dev/null and b/vignettes/tab_SEX_AGE.png differ diff --git a/vignettes/tab_SEX_AGE_en.png b/vignettes/tab_SEX_AGE_en.png new file mode 100644 index 0000000..25417b0 Binary files /dev/null and b/vignettes/tab_SEX_AGE_en.png differ