From 312f88230ec108eea0b35175bb8ce19303aa2b5e Mon Sep 17 00:00:00 2001 From: Tom Pollard Date: Fri, 7 Jun 2024 15:53:43 -0400 Subject: [PATCH] Move create_cat_table function to tables module. --- tableone/tableone.py | 54 +++++++++++--------------------------------- tableone/tables.py | 50 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+), 41 deletions(-) diff --git a/tableone/tableone.py b/tableone/tableone.py index 4874644..f6c1d86 100644 --- a/tableone/tableone.py +++ b/tableone/tableone.py @@ -337,7 +337,19 @@ def __init__(self, data: pd.DataFrame, # create continuous and categorical tables if self._categorical: - self.cat_table = self._create_cat_table(data, self._overall) + self.cat_table = self.tables.create_cat_table(data, + self._overall, + self.cat_describe, + self._categorical, + self._pval, + self._pval_adjust, + self._htest_table, + self._smd, + self.smd_table, + self._groupby, + self.cat_describe_all) + + if self._continuous: self.cont_table = self.tables.create_cont_table(data, @@ -522,46 +534,6 @@ def _t1_summary(self, x: pd.Series) -> str: f = '{{:.{}f}} ({{:.{}f}})'.format(n, n) return f.format(np.nanmean(x.values), self.statistics._std(x, self._ddof)) # type: ignore - def _create_cat_table(self, data, overall): - """ - Create table one for categorical data. - - Returns - ---------- - table : pandas DataFrame - A table summarising the categorical variables. - """ - table = self.cat_describe['t1_summary'].copy() - - # add the total count of null values across all levels - isnull = data[self._categorical].isnull().sum().to_frame( - name='Missing') - isnull.index = isnull.index.rename('variable') - try: - table = table.join(isnull) - # if columns form a CategoricalIndex, need to convert to string first - except TypeError: - table.columns = table.columns.astype(str) - table = table.join(isnull) - - # add pval column - if self._pval and self._pval_adjust: - table = table.join(self._htest_table[['P-Value (adjusted)', - 'Test']]) - elif self._pval: - table = table.join(self._htest_table[['P-Value', 'Test']]) - - # add standardized mean difference (SMD) column/s - if self._smd: - table = table.join(self.smd_table) - - # join the overall column if needed - if self._groupby and overall: - table = table.join(pd.concat([self.cat_describe_all['t1_summary']. - Overall], axis=1, keys=["Overall"])) - - return table - def _create_tableone(self, data): """ Create table 1 by combining the continuous and categorical tables. diff --git a/tableone/tables.py b/tableone/tables.py index 608cc4d..7135136 100644 --- a/tableone/tables.py +++ b/tableone/tables.py @@ -439,3 +439,53 @@ def create_cont_table(self, Overall], axis=1, keys=["Overall"])) return table + + def create_cat_table(self, + data, + overall, + cat_describe, + categorical, + pval, + pval_adjust, + htest_table, + smd, + smd_table, + groupby, + cat_describe_all): + """ + Create table one for categorical data. + + Returns + ---------- + table : pandas DataFrame + A table summarising the categorical variables. + """ + table = cat_describe['t1_summary'].copy() + + # add the total count of null values across all levels + isnull = data[categorical].isnull().sum().to_frame(name='Missing') + isnull.index = isnull.index.rename('variable') + + try: + table = table.join(isnull) + # if columns form a CategoricalIndex, need to convert to string first + except TypeError: + table.columns = table.columns.astype(str) + table = table.join(isnull) + + # add pval column + if pval and pval_adjust: + table = table.join(htest_table[['P-Value (adjusted)', 'Test']]) + elif pval: + table = table.join(htest_table[['P-Value', 'Test']]) + + # add standardized mean difference (SMD) column/s + if smd: + table = table.join(smd_table) + + # join the overall column if needed + if groupby and overall: + table = table.join(pd.concat([cat_describe_all['t1_summary'].Overall], + axis=1, keys=["Overall"])) + + return table