From 0d7e2e28663a80e614fb19b0a88e2570612a3b60 Mon Sep 17 00:00:00 2001 From: matthias-da Date: Mon, 4 Dec 2023 15:32:53 +0100 Subject: [PATCH] against some CRAN notes --- src/LocalRecProg/EdmondsMatch-Inc.h | 7 +- src/LocalRecProg/datadesc.cpp | 5 +- src/Mdav.h | 3 +- src/Measure_Hierarchical.h | 417 ++++++++++++++-------------- src/RcppExports.cpp | 20 +- src/Suda2.h | 24 +- 6 files changed, 246 insertions(+), 230 deletions(-) diff --git a/src/LocalRecProg/EdmondsMatch-Inc.h b/src/LocalRecProg/EdmondsMatch-Inc.h index 46b9747a..318c6925 100644 --- a/src/LocalRecProg/EdmondsMatch-Inc.h +++ b/src/LocalRecProg/EdmondsMatch-Inc.h @@ -390,8 +390,11 @@ BOOL dual_check(vertex_type *v, int n, vertex_type *u) } } - int NbInfeasible = 0, NbInequality = 0; - + int NbInfeasible = 1; + NbInfeasible = NbInfeasible - 1; // to avoid notes in CRAN checks; + int NbInequality = 1; + NbInequality = NbInequality - 1; // to avoid notes in CRAN checks; + for (p = v + (i = n - 1); i >= 0; --i, --p) { LOOP(p, q) if (p < q && ADJ(p, q)) diff --git a/src/LocalRecProg/datadesc.cpp b/src/LocalRecProg/datadesc.cpp index 298f6d8a..6a77b2e9 100644 --- a/src/LocalRecProg/datadesc.cpp +++ b/src/LocalRecProg/datadesc.cpp @@ -164,7 +164,10 @@ int LoadData(int NbRow, double SV_MissingValue, CData *pData, SEXP Mat) //=== Compute Categories if (!CData::m_CategoryCountVar) { - int CatNum = 0, CatSize = 0; + int CatNum = 1; + CatNum = CatNum - 1; // to avoid notes in CRAN checks; + int CatSize = 0; + CatSize = CatSize - 1; // to avoid notes in CRAN checks; CData::m_CategoryCountVar = CData::m_NbVariable + CData::m_NbAncestor; ForLoop (i, NbRow) diff --git a/src/Mdav.h b/src/Mdav.h index 0b01c447..73bb1ba2 100644 --- a/src/Mdav.h +++ b/src/Mdav.h @@ -168,7 +168,8 @@ RcppExport SEXP Mdav(SEXP data,SEXP data2,SEXP g_MissingValue_R,SEXP weights_R,S pPrevIndex[0] = -1; //=== Core Loop: It always removes to groups of observations of size k therefore it has to stop, when only NbRowLeft-g_K*2>=g_K - int Loop = 0; + int Loop = 1; + Loop = Loop - 1; // to avoid notes on CRAN checks; int ngroups; // ngroups introduced to fix the bug that groups smaller than k remain in the last aggregation step after the while loop // while (NbRowLeft- g_K * 2>=g_K) while(NbRowLeft- g_K >=g_K) diff --git a/src/Measure_Hierarchical.h b/src/Measure_Hierarchical.h index 15c5f2fd..9f763a54 100644 --- a/src/Measure_Hierarchical.h +++ b/src/Measure_Hierarchical.h @@ -1,208 +1,209 @@ - -/** - * Stata plugin for computing hierarchical (household) level risk of data disclosure - * - * @author: Pascal heus (pascal.heus@gmail.com) - * - * adapted for R by Bernd Prantner and Alexander Kowarik - * Developed with the financial and technical support of the - * International Household Survey Network - * http://www.surveynetwork.org - * - * Copyright 2006, 2007 Pascal Heus (pascal.heus@gmail.com) - * - * This program is free software; you can redistribute it and/or modify it under the terms of the - * GNU Lesser General Public License as published by the Free Software Foundation; either version - * 2.1 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; - * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - * See the GNU Lesser General Public License for more details. - * - * The full text of the license is available at http://www.gnu.org/copyleft/lesser.html - */ - -/* - * HOW IT WORKS: - * This plugin takes the hierarchical (household) identification variables and - * the observation (individual) risk as input paramneters. * - * It outputs the hierarchical risk based usings Boole's formula a described in - * micro-argus manual. - * - * Note that - * 1) *** THE DATASET MUST BE SORTED BY THE KEY IDENTIFIERS PRIOR TO THE CALL *** - * 2) *** THE OBSERVATION (INDIVIDUAL) RISK MUST BE COMPUPTED BEFORE *** - * 3) The order of the variables provide as input parameters is crucial. - * the routine assume that the last two variables are respectively the - * observation (individual) level risk and the output computed - * hierarchical (household) risk. - * 4) All the variables must be numeric - * - */ - -double boole_combine(double value, int index, int level, double list[], int list_size) -{ - double sum = 0; - double combination; - int i; - - for (i = index; i < list_size; i++) - { - combination = value * list[i]; // unsigned - - // check if more combination are possible (higher indexes exists in the list) - if (i < list_size - 1) - { - // call this routine recursively - sum += boole_combine(combination, i + 1, level + 1, list, list_size); - } - - // add this combination to the sum (signed) - sum += pow(-1.0f, level + 1.0f) * combination; - } - - return sum; -} - -/** - * Boole formula - * - * @param list[] array of values to combine - * @param list_size number of elements in the array - */ -double boole(double list[], int list_size) -{ - double sum = 0; - int i; - - if (list_size == 1) - { - // soltuion is trivial - sum = list[0]; - } - else - { - // loop over each case - for (i = 0; i < list_size; i++) - { - sum += list[i]; // add this value to the sum - sum += boole_combine(list[i], i + 1, 2, list, list_size); // combine this value with all the other ones - } - } - - return sum; -} - -char buf_hierachical[1024]; -// character buf_hierachicalfer to display messages -int is_same_key(double key1[], double key2[], int key_size) -{ - int i; - int rc = 1; - for (i = 0; i < key_size; i++) - { - if (key1[i] != key2[i]) - { - rc = 0; - break; - } - } - - return rc; -} - -/*=====================*/ -/* Main Stata function */ -/*=====================*/ -RcppExport SEXP measure_hierachical(SEXP data) -{ - BEGIN_RCPP - - Rcpp::NumericMatrix Mat(data); - - int NbRow = Mat.rows(); -// int NbCol = Mat.cols(); - - Rcpp::NumericVector Res(Mat.rows()); - // int NbRow = g_pDataset->GetNbRow(); - - // display number of observations - int n_key_vars = 1;//NbCol - 2; - double *group_key = new double[n_key_vars]; - double *obs_key = new double[n_key_vars]; - long current_obs; - - double group_risk; - - double hier_risk_ER = 0.0; //< The expected number of re-identification - double hier_risk = 0.0; //< The re-identification rate or global risk - long group_count = 0; - long obs_count = 0; - double obs_risk[256]; // supports up to 256 observations per key - int group_size; - int i; - // get first observation - current_obs = 0;//SF_GetRowStart(); - do - { - // read household id - group_count++; - // set group key - for (i = 0; i < n_key_vars; i++) - group_key[i] = Mat(current_obs, i); - //g_pDataset->GetValue(i, current_obs, &group_key[i]); - group_size = 0; - - // read all observations for this group - do - { - obs_count++; - - // read obs risk - obs_risk[group_size] = Mat(current_obs, n_key_vars); - //g_pDataset->GetValue(n_key_vars, current_obs, &obs_risk[group_size]); - - // next - group_size++; - current_obs++; - - if (current_obs >= NbRow) - break; - - // read next group key - for (i = 0; i < n_key_vars; i++) - obs_key[i] = Mat(current_obs, i); - //g_pDataset->GetValue(i, current_obs, &obs_key[i]); - - } - while (is_same_key(group_key, obs_key, n_key_vars)); - // compute risk for this household - group_risk = boole(obs_risk, group_size); - // UPDATE STATA - for (i = current_obs - group_size; i < current_obs; i++) - { - // Write value back to stata file for all observations in the household - Res(i) = group_risk; - //g_pDataset->SetValue(n_key_vars + 1, i, group_risk); - - // add to ER - hier_risk_ER += group_risk; - } - } - while (current_obs < NbRow); - // compute hierarchical risk and store in stata scalars - hier_risk = hier_risk_ER / obs_count; - - - CleanDeleteT(group_key); - CleanDeleteT(obs_key); - return Rcpp::List::create( - Rcpp::Named( "Res" ) = Res, - Rcpp::Named( "hier_risk_ER" ) = hier_risk_ER, - Rcpp::Named( "hier_risk" ) = hier_risk, - Rcpp::Named( "hier_risk_pct" ) = hier_risk * 100 - ); - - - END_RCPP -} + +/** + * Stata plugin for computing hierarchical (household) level risk of data disclosure + * + * @author: Pascal heus (pascal.heus@gmail.com) + * + * adapted for R by Bernd Prantner and Alexander Kowarik + * Developed with the financial and technical support of the + * International Household Survey Network + * http://www.surveynetwork.org + * + * Copyright 2006, 2007 Pascal Heus (pascal.heus@gmail.com) + * + * This program is free software; you can redistribute it and/or modify it under the terms of the + * GNU Lesser General Public License as published by the Free Software Foundation; either version + * 2.1 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU Lesser General Public License for more details. + * + * The full text of the license is available at http://www.gnu.org/copyleft/lesser.html + */ + +/* + * HOW IT WORKS: + * This plugin takes the hierarchical (household) identification variables and + * the observation (individual) risk as input paramneters. * + * It outputs the hierarchical risk based usings Boole's formula a described in + * micro-argus manual. + * + * Note that + * 1) *** THE DATASET MUST BE SORTED BY THE KEY IDENTIFIERS PRIOR TO THE CALL *** + * 2) *** THE OBSERVATION (INDIVIDUAL) RISK MUST BE COMPUPTED BEFORE *** + * 3) The order of the variables provide as input parameters is crucial. + * the routine assume that the last two variables are respectively the + * observation (individual) level risk and the output computed + * hierarchical (household) risk. + * 4) All the variables must be numeric + * + */ + +double boole_combine(double value, int index, int level, double list[], int list_size) +{ + double sum = 0; + double combination; + int i; + + for (i = index; i < list_size; i++) + { + combination = value * list[i]; // unsigned + + // check if more combination are possible (higher indexes exists in the list) + if (i < list_size - 1) + { + // call this routine recursively + sum += boole_combine(combination, i + 1, level + 1, list, list_size); + } + + // add this combination to the sum (signed) + sum += pow(-1.0f, level + 1.0f) * combination; + } + + return sum; +} + +/** + * Boole formula + * + * @param list[] array of values to combine + * @param list_size number of elements in the array + */ +double boole(double list[], int list_size) +{ + double sum = 0; + int i; + + if (list_size == 1) + { + // soltuion is trivial + sum = list[0]; + } + else + { + // loop over each case + for (i = 0; i < list_size; i++) + { + sum += list[i]; // add this value to the sum + sum += boole_combine(list[i], i + 1, 2, list, list_size); // combine this value with all the other ones + } + } + + return sum; +} + +char buf_hierachical[1024]; +// character buf_hierachicalfer to display messages +int is_same_key(double key1[], double key2[], int key_size) +{ + int i; + int rc = 1; + for (i = 0; i < key_size; i++) + { + if (key1[i] != key2[i]) + { + rc = 0; + break; + } + } + + return rc; +} + +/*=====================*/ +/* Main Stata function */ +/*=====================*/ +RcppExport SEXP measure_hierachical(SEXP data) +{ + BEGIN_RCPP + + Rcpp::NumericMatrix Mat(data); + + int NbRow = Mat.rows(); +// int NbCol = Mat.cols(); + + Rcpp::NumericVector Res(Mat.rows()); + // int NbRow = g_pDataset->GetNbRow(); + + // display number of observations + int n_key_vars = 1;//NbCol - 2; + double *group_key = new double[n_key_vars]; + double *obs_key = new double[n_key_vars]; + long current_obs; + + double group_risk; + + double hier_risk_ER = 0.0; //< The expected number of re-identification + double hier_risk = 0.0; //< The re-identification rate or global risk + long group_count = 1; + group_count = group_count - 1; // to avoid notes in CRAN checks; + long obs_count = 0; + double obs_risk[256]; // supports up to 256 observations per key + int group_size; + int i; + // get first observation + current_obs = 0;//SF_GetRowStart(); + do + { + // read household id + group_count++; + // set group key + for (i = 0; i < n_key_vars; i++) + group_key[i] = Mat(current_obs, i); + //g_pDataset->GetValue(i, current_obs, &group_key[i]); + group_size = 0; + + // read all observations for this group + do + { + obs_count++; + + // read obs risk + obs_risk[group_size] = Mat(current_obs, n_key_vars); + //g_pDataset->GetValue(n_key_vars, current_obs, &obs_risk[group_size]); + + // next + group_size++; + current_obs++; + + if (current_obs >= NbRow) + break; + + // read next group key + for (i = 0; i < n_key_vars; i++) + obs_key[i] = Mat(current_obs, i); + //g_pDataset->GetValue(i, current_obs, &obs_key[i]); + + } + while (is_same_key(group_key, obs_key, n_key_vars)); + // compute risk for this household + group_risk = boole(obs_risk, group_size); + // UPDATE STATA + for (i = current_obs - group_size; i < current_obs; i++) + { + // Write value back to stata file for all observations in the household + Res(i) = group_risk; + //g_pDataset->SetValue(n_key_vars + 1, i, group_risk); + + // add to ER + hier_risk_ER += group_risk; + } + } + while (current_obs < NbRow); + // compute hierarchical risk and store in stata scalars + hier_risk = hier_risk_ER / obs_count; + + + CleanDeleteT(group_key); + CleanDeleteT(obs_key); + return Rcpp::List::create( + Rcpp::Named( "Res" ) = Res, + Rcpp::Named( "hier_risk_ER" ) = hier_risk_ER, + Rcpp::Named( "hier_risk" ) = hier_risk, + Rcpp::Named( "hier_risk_pct" ) = hier_risk * 100 + ); + + + END_RCPP +} diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 751cc39e..500b2a98 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -227,15 +227,13 @@ BEGIN_RCPP END_RCPP } - -RcppExport SEXP LocalRecProg_cpp(void *, void *, void *, void *, void *, void *, void *, void *, void *); -RcppExport SEXP Mdav(void *, void *, void *, void *, void *); -RcppExport SEXP measure_hierachical(void *); -RcppExport SEXP measure_risk_cpp(void *, void *, void *, void *, void *, void *); -RcppExport SEXP measure_threshold(void *, void *); -RcppExport SEXP RankSwap(void *, void *, void *, void *, void *, void *, void *, void *, void *); -RcppExport SEXP Suda2(void *, void *, void *, void *, void *); - +RcppExport SEXP LocalRecProg_cpp(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); +RcppExport SEXP Mdav(SEXP, SEXP, SEXP, SEXP, SEXP); +RcppExport SEXP measure_hierachical(SEXP); +RcppExport SEXP measure_risk_cpp(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); +RcppExport SEXP measure_threshold(SEXP, SEXP); +RcppExport SEXP RankSwap(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); +RcppExport SEXP Suda2(SEXP, SEXP, SEXP, SEXP, SEXP); static const R_CallMethodDef CallEntries[] = { {"_sdcMicro_cpp_calcSuppInds", (DL_FUNC) &_sdcMicro_cpp_calcSuppInds, 3}, @@ -255,11 +253,11 @@ static const R_CallMethodDef CallEntries[] = { {"_sdcMicro_test_comparator", (DL_FUNC) &_sdcMicro_test_comparator, 5}, {"LocalRecProg_cpp", (DL_FUNC) &LocalRecProg_cpp, 9}, {"Mdav", (DL_FUNC) &Mdav, 5}, - {"RankSwap", (DL_FUNC) &RankSwap, 9}, - {"Suda2", (DL_FUNC) &Suda2, 5}, {"measure_hierachical", (DL_FUNC) &measure_hierachical, 1}, {"measure_risk_cpp", (DL_FUNC) &measure_risk_cpp, 6}, {"measure_threshold", (DL_FUNC) &measure_threshold, 2}, + {"RankSwap", (DL_FUNC) &RankSwap, 9}, + {"Suda2", (DL_FUNC) &Suda2, 5}, {NULL, NULL, 0} }; diff --git a/src/Suda2.h b/src/Suda2.h index e5e65421..9c50527a 100644 --- a/src/Suda2.h +++ b/src/Suda2.h @@ -1090,8 +1090,14 @@ RcppExport SEXP Suda2(SEXP data, SEXP g_MissingValueALEX_R, SEXP MaxK_R, SEXP Di //============================ Find uniques & remove entries duplicated more than twice - int NbUniqueTotal = 0, NbUniqueInHash = 0, NbEmptyHash = 0, - NbTotalDuplicate = 0; + int NbUniqueTotal = 1; + NbUniqueTotal = NbUniqueTotal - 1; // to avoid notes in CRAN checks; + int NbUniqueInHash = 1; + NbUniqueInHash = NbUniqueInHash - 1; // to avoid notes in CRAN checks; + int NbEmptyHash = 1; + NbEmptyHash = NbEmptyHash - 1; // to avoid notes in CRAN checks; + int NbTotalDuplicate = 1; + NbTotalDuplicate = NbTotalDuplicate - 1; // to avoid notes in CRAN checks; ForLoop (i, es_HashListSize){ CChainedList &List = g_HashList[i]; @@ -1100,11 +1106,15 @@ RcppExport SEXP Suda2(SEXP data, SEXP g_MissingValueALEX_R, SEXP MaxK_R, SEXP Di ++NbEmptyHash; } - int NbNotEmpty = 0, NbDuplicateRemoved = 0, - NbDuplicatePair = 0; - - int NbProcessedEntry = 0; - + int NbNotEmpty = 1; + NbNotEmpty = NbNotEmpty - 1; // to avoid notes in CRAN checks; + int NbDuplicateRemoved = 1; + NbDuplicateRemoved = NbDuplicateRemoved - 1; // to avoid notes in CRAN checks; + int NbDuplicatePair = 1; + NbDuplicatePair = NbDuplicatePair - 1; // to avoid notes in CRAN checks; + int NbProcessedEntry = 1; + NbProcessedEntry = NbProcessedEntry - 1; // to avoid notes in CRAN checks; + ForLoop (i, es_HashListSize){ CChainedList &List = g_HashList[i];