Merge pull request #20 from InseeFrLab/dev2023

Dev2023
InseeFrLab · Jan 19, 2024 · 2d3c77a · 2d3c77a
2 parents f3810af + c9a89a5
commit 2d3c77a
Show file tree

Hide file tree

Showing 107 changed files with 8,280 additions and 1,126 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -13,3 +13,9 @@
 ^_pkgdown\.yml$
 ^docs$
 ^pkgdown$
+^\.github$
+^README\.Rmd$
+^tauargus_files/*$
+tauargus_files/*
+^_pkgdown_old\.yml$
+^README\.html$
diff --git a/.gitignore b/.gitignore
@@ -14,3 +14,5 @@ vignettes/tauargus_exe.ini
 .hst
 
 docs
+
+output/
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -2,15 +2,18 @@ Package: rtauargus
 Type: Package
 Title: Using Tau-Argus from R
 Language: fr
-Version: 1.1.2
+Version: 1.2.0
 Depends: R (>= 3.5.0)
 Imports:
     purrr (>= 0.2),
     dplyr (>= 0.7),
+    data.table,
     gdata,
     stringr,
     rlang,
-    zoo
+    zoo,
+    sdcHierarchies,
+    lifecycle
 Suggests:
     testthat,
     knitr,
@@ -39,6 +42,14 @@ Authors@R: c(
 	  "Félix", "Beroud",
 	  role = c("aut")
 	),
+	person(
+	  "André-Raymond", "Socard",
+	  role = c("aut")
+	),
+	person(
+	  "Wistan", "Pomel",
+	  role = c("aut")
+	),
     person(
       family = "Institut National de la Statistique et des Études Économiques",
       role = "cph"
@@ -48,9 +59,12 @@ Description: Protects tables by calling the Tau-Argus software from R.
 License: MIT + file LICENSE
 Encoding: UTF-8
 LazyData: true
-RoxygenNote: 7.1.2
+RoxygenNote: 7.2.3
 VignetteBuilder: knitr
 URL: https://inseefrlab.github.io/rtauargus,
     https://github.com/inseefrlab/rtauargus,
     https://inseefrlab.github.io/rtauargus/
 BugReports: https://github.com/inseefrlab/rtauargus/issues
+Roxygen: list(markdown = TRUE)
+StagedInstall: no
+
diff --git a/NAMESPACE b/NAMESPACE
@@ -1,25 +1,48 @@
 # Generated by roxygen2: do not edit by hand
 
+export(from_4_to_3)
+export(from_4_to_3_case_0_hr)
+export(from_4_to_3_case_1_hr)
+export(from_4_to_3_case_2_hr)
+export(from_5_to_3)
 export(import)
+export(length_tabs)
 export(micro_arb)
 export(micro_asc_rda)
+export(micro_rtauargus)
+export(nb_tab_generated)
+export(reduce_dims)
 export(reset_rtauargus_options)
-export(rtauargus)
+export(restore_format)
 export(rtauargus_options)
 export(rtauargus_plus)
 export(run_arb)
+export(sp_format)
 export(tab_arb)
 export(tab_multi_manager)
 export(tab_rda)
 export(tab_rtauargus)
 export(tab_rtauargus2)
+export(tab_rtauargus4)
+export(tabulate_micro_data)
+export(var_to_merge)
 export(write_hrc)
 export(write_hrc2)
+import(data.table, except = transpose)
 importFrom(dplyr,"%>%")
 importFrom(dplyr,arrange)
+importFrom(dplyr,filter)
 importFrom(dplyr,mutate)
+importFrom(dplyr,select)
+importFrom(lifecycle,badge)
+importFrom(lifecycle,deprecated)
 importFrom(purrr,map)
 importFrom(purrr,map_at)
 importFrom(purrr,transpose)
 importFrom(rlang,.data)
+importFrom(sdcHierarchies,hier_convert)
+importFrom(sdcHierarchies,hier_import)
+importFrom(stats,setNames)
+importFrom(stringr,str_detect)
+importFrom(utils,combn)
 importFrom(zoo,na.locf)
diff --git a/NEWS.md b/NEWS.md
@@ -4,6 +4,29 @@ subtitle: History of changes / Historique des modifications
 output: rmarkdown::html_vignette
 ---
 
+
+## rtauargus 1.2.0
+
+[01/2024]
+
+* Implementation of a method to tackle some tables of 4/5 dimensions. 
+
+> The method is quickly explained and its use is shown in a specific vignette (french).
+> A paper explaining more deeply the idea and the modus operandi is available 
+here: "https://github.com/InseeFrLab/dims_reduction_tables_workshop_20231215".
+
+* Implementation of the function `tabulate_micro_data()` to compute tabular data from 
+a microdata file.
+
+> The function can create frequency and magnitude tabular data with hierarchical variables. 
+The tabular data computed contains the information to compute primary secret 
+according to frequency rule and (1,k)-dominance rule.
+
+* Resolution of a malfunction while dealing with costs.
+
+* **rtauargus()** function has been renamed more properly as **micro_rtauargus()**.
+Its arguments and its behaviour remain the same.
+
 ## rtauargus 1.1.2
 
 [01/02/2023]

diff --git a/R/data.R b/R/data.R
@@ -8,7 +8,7 @@
 #'  \item{ACTIVITY}{business sector, hierarchical variables with three levels described
 #'   in the activity_corr_table dataset. The root is noted "Total"}
 #'   \item{SIZE}{size of the companies (Number of employees in three categories
-#'   + overall category "Total")}
+#'   and overall category "Total")}
 #'   \item{N_OBS}{Frequency, number of companies}
 #'   \item{TOT}{turnover value in euros}
 #'   \item{MAX}{turnover of the company which contributes the most to the cell.}
@@ -44,7 +44,7 @@
 #'  Hierarchical variables with two levels (nuts2 and nuts3) described
 #'   in the nuts23_fr_corr_table dataset. The root is noted "Total"}
 #'   \item{SIZE}{size of the companies (Number of employees in three categories
-#'   + overall category "Total")}
+#'   and overall category "Total")}
 #'   \item{N_OBS}{Frequency, number of companies}
 #'   \item{TOT}{turnover value in euros}
 #'   \item{MAX}{turnover of the company which contributes the most to the cell.}
@@ -81,7 +81,7 @@
 #'   \item{A21}{business sectors in 21 categories}
 #'   \item{A88}{business sectors in 88 categories}
 #' }
-#' @details Use the \code{write_hrc2} function to create a .hrc file from this
+#' @details Use the `write_hrc2` function to create a .hrc file from this
 #' correspondence table.
 "activity_corr_table"
 
@@ -95,7 +95,7 @@
 #'   \item{NUTS2}{NUTS2 levels in France - equivalent of French "Régions"}
 #'   \item{NUTS3}{NUTS3 levels in France - equivalent of French "Départements"}
 #' }
-#' @details Use the \code{write_hrc2} function to create a .hrc file from this
+#' @details Use the `write_hrc2` function to create a .hrc file from this
 #' correspondence table.
 "nuts23_fr_corr_table"
 
@@ -116,7 +116,7 @@
 #'   areas and their corresponding NUTS3 areas are in the data.
 #'   The root is noted "Total_EAST"}
 #'   \item{SIZE}{size of the companies (Number of employees in three categories
-#'   + overall category "Total")}
+#'   and overall category "Total")}
 #'   \item{N_OBS}{Frequency, number of companies}
 #'   \item{TOT}{turnover value in euros}
 #'   \item{MAX}{turnover of the company which contributes the most to the cell.}
@@ -125,3 +125,64 @@
 #' activity_corr_table
 #' nuts23_fr_corr_table
 "turnover_act_nuts_size"
+
+
+#' data crossing 4 categorical variables, none are hierarchical.
+#'
+#' @format A tibble/data frame with 689 rows and 12 variables:
+#' \describe{
+#'  \item{A10}{business sector, not hierarchical}
+#'   \item{cj}{legal category, not hierarchical}
+#'   \item{type_distrib}{type of distribution, not hierarchical}
+#'   \item{treff}{Number of employees (categorical), not hierarchical}
+#'   \item{nb_obs}{Frequency, number of companies}
+#'  \item{nb_obs_rnd}{Frequency rounded, number of companies}
+#'   \item{pizzas_tot}{turnover value in euros}
+#'   \item{pizzas_tot_abs}{turnover absolute value in euros}
+#'    \item{pizzas_max}{turnover max value in euros}
+#'    \item{is_secret_freq}{Boolean, TRUE if primary secret for frequency rule}
+#'    \item{is_secret_dom}{Boolean, TRUE if primary secret for dominance rule}
+#'   \item{is_secret_prim}{Boolean, TRUE if primary secret for any rule}
+#'
+#' }
+"datatest1"
+
+#' data crossing 5 categorical variables, none are hierarchical.
+#'
+#' @format A tibble/data frame with 5 612 rows and 15 variables:
+#' \describe{
+#'  \item{A10}{business sector, not hierarchical}
+#'  \item{cj}{legal category, not hierarchical}
+#'  \item{type_distrib}{type of distribution, not hierarchical}
+#'  \item{treff}{Number of employees (categorical), not hierarchical}
+#'  \item{nuts1}{NUTS region, no hierarchical}
+#'  \item{nb_obs}{Frequency, number of companies}
+#'  \item{nb_obs_rnd}{Frequency rounded, number of companies}
+#'  \item{pizzas_tot}{turnover value in euros}
+#'  \item{pizzas_tot_abs}{turnover absolute value in euros}
+#'  \item{pizzas_max}{turnover max value in euros}
+#'  \item{is_secret_freq}{Boolean, TRUE if primary secret for frequency rule}
+#'  \item{is_secret_dom}{Boolean, TRUE if primary secret for dominance rule}
+#'  \item{is_secret_prim}{Boolean, TRUE if primary secret for any rule}
+#'
+#' }
+"datatest2"
+
+#' Companies data at individual level.
+#'
+#' @format A data.table with 9 786 rows and 12 variables:
+#' \describe{
+#'  \item{A10}{business sector, not hierarchical}
+#'  \item{A21}{business sector, not hierarchical but nested in A10}
+#'  \item{A88}{business sector, not hierarchical but nested in A21}
+#'  \item{CJ}{legal category, not hierarchical}
+#'  \item{TYPE}{type of distribution, not hierarchical}
+#'  \item{SIZE}{Number of employees (categorical), not hierarchical}
+#'  \item{NUTS1}{NUTS 1 level of European administrative regions, not hierarchical}
+#'  \item{NUTS2}{NUTS 2 level of European administrative regions, not hierarchical}
+#'  \item{NUTS3}{NUTS 3 level of European administrative regions, not hierarchical}
+#'  \item{WEIGHT}{Weight of the companies, numeric}
+#'  \item{TURNOVER}{Turnover, numeric}
+#'  \item{PRODUCTION}{Production, numeric}
+#' }
+"indiv_dt"
diff --git a/R/hrc.R b/R/hrc.R
@@ -8,8 +8,8 @@
 #' microdonnées.
 #'
 #' The function reconstructs the variable hierarchy from the levels
-#' present in the data. The variables in \code{vars_hrc} must be
-#' \strong{classified from the finest to the most aggregated}.
+#' present in the data. The variables in `vars_hrc` must be
+#' **classified from the finest to the most aggregated**.
 #'
 #' The relationship between each hierarchical level must be an application (in the
 #' mathematical sense of the term), i.e. each fine level must have a
@@ -22,17 +22,17 @@
 #'
 #' Missing values in the hierarchical variables will be
 #' imputed beforehand using another hierarchical variable (parameter
-#' \code{fill_na}). In ascending strategy (\code{"up"}), the variables are
+#' `fill_na`). In ascending strategy (`"up"`), the variables are
 #' from the most aggregated to the most refined, and vice versa in the
-#' downward strategy (\code{"down"}).
+#' downward strategy (`"down"`).
 #'
-#' The parameter \code{compact} allows to create hierarchies with variable
+#' The parameter `compact` allows to create hierarchies with variable
 #' depths. The idea is to cut the branches consisting of a single value
 #' repeated up to the maximum depth (see examples).\cr
 #'
 #' La fonction reconstitue la hiérarchie des variables à partir des niveaux
-#' présents dans les données. Les variables dans \code{vars_hrc} doivent être
-#' \strong{classées de la plus fine à la plus agrégée}.
+#' présents dans les données. Les variables dans `vars_hrc` doivent être
+#' **classées de la plus fine à la plus agrégée**.
 #'
 #' La relation entre chaque niveau hiérarchique doit être une application (au
 #' sens mathématique du terme), c'est-à-dire que chaque niveau fin doit avoir un
@@ -45,47 +45,47 @@
 #'
 #' Les valeurs manquantes présentes dans les variables hiérarchiques seront
 #' préalablement imputées à l'aide d'une autre variable hiérarchique (paramètre
-#' \code{fill_na}). En stratégie ascendante (\code{"up"}), les variables sont
+#' `fill_na`). En stratégie ascendante (`"up"`), les variables sont
 #' parcourues de la plus agrégée à la plus fine, et inversement en stratégie
-#' descendante (\code{"down"}).
+#' descendante (`"down"`).
 #'
-#' Le paramètre \code{compact} permet de créer des hiérarchies à profondeurs
+#' Le paramètre `compact` permet de créer des hiérarchies à profondeurs
 #' variables. L'idée est de couper les branches constituées d'une seule valeur
 #' répétée jusqu'à la profondeur maximale (voir exemples).
 #'
 #' @inheritParams micro_asc_rda
-#' @param vars_hrc \strong{[mandatory]} vector of variable names
+#' @param vars_hrc vector of variable names
 #' constituting the hierarchy, from the finest to the most aggregated level.\cr
-#' (\strong{[obligatoire]} vecteur des noms des variables
+#' (vecteur des noms des variables
 #'   constituant la hiérarchie, du niveau le plus fin au niveau le plus agrégé.)
 #' @param hrc_filename name and location of the produced hrc file. If not
 #' filled, a temporary file.\cr
 #' (nom et emplacement du fichier hrc produit. Si non renseigné, un fichier temporaire.)
 #' @param fill_na fill in any missing values, using an other variable :
 #' \itemize{
-#' \item{\code{"up"} (default) : hierarchical variable of the level level
+#' \item{`"up"` (default) : hierarchical variable of the level level
 #' immediately above}
-#' \item{\code{"down"} : hierarchical variable of the level immediately
+#' \item{`"down"` : hierarchical variable of the level immediately
 #' lower}
 #' }\cr
 #' (remplissage d'éventuelles valeurs manquantes, à l'aide d'une
 #'   autre variable :\itemize{
-#'     \item{\code{"up"} (défaut) : variable hiérarchique de niveau
+#'     \item{`"up"` (défaut) : variable hiérarchique de niveau
 #'        immédiatement supérieur}
-#'     \item{\code{"down"} : variable hiérarchique de niveau immédiatement
+#'     \item{`"down"` : variable hiérarchique de niveau immédiatement
 #'        inférieur}
 #'    })
 #' @param compact to prune branches repeating a single value to the
-#' lowest level of depth (\code{TRUE} by default).\cr
+#' lowest level of depth (`TRUE` by default).\cr
 #' (pour élaguer les branches répétant une unique valeur jusqu'au
-#'   plus bas niveau de profondeur (\code{TRUE} par défaut).)
-#' @param hierlevels if only one variable is specified in \code{vars_hrc},
+#'   plus bas niveau de profondeur (`TRUE` par défaut).)
+#' @param hierlevels if only one variable is specified in `vars_hrc`,
 #' allows to generate the hierarchy according to the position of the characters in the
-#' string. For example, \code{hierlevels = "2 3"} to build a
+#' string. For example, `hierlevels = "2 3"` to build a
 #' hierarchy from a common code.\cr
-#' (si une seule variable est spécifiée dans \code{vars_hrc},
+#' (si une seule variable est spécifiée dans `vars_hrc`,
 #'   permet de générer la hiérarchie selon la position des caractères dans la
-#'   chaîne. Par exemple, \code{hierlevels = "2 3"} pour construire une
+#'   chaîne. Par exemple, `hierlevels = "2 3"` pour construire une
 #'   hiérarchie département-commune à partir d'un code commune.)
 #'
 #' @return The name of the hrc file (useful in the case of a temporary file with
@@ -446,7 +446,8 @@ df_hierlevels <- function(var_hrc, hierlevels) {
   }
 
   lev <- strsplit(hierlevels, " +")[[1]]
-  lev <- as.integer(lev) %>% `[`(. != 0)
+  lev <- as.integer(lev)
+  lev <- lev[lev != 0]
   if (sum(lev) != n1) {
     stop("la somme de hierlevels doit etre egale au nombre de caracteres")
   }