diff --git a/tableone/tableone.py b/tableone/tableone.py
index 5486e7c..2a5c835 100644
--- a/tableone/tableone.py
+++ b/tableone/tableone.py
@@ -12,7 +12,8 @@
 from tableone.deprecations import deprecated_parameter
 from tableone.preprocessors import ensure_list, detect_categorical, order_categorical, get_groups
 from tableone.statistics import Statistics
-from tableone.validators import DataValidator, InputValidator, InputError
+from tableone.tables import Tables
+from tableone.validators import DataValidator, InputValidator
 
 
 def load_dataset(name: str) -> pd.DataFrame:
@@ -263,55 +264,105 @@ def __init__(self, data: pd.DataFrame,
         self._row_percent = row_percent
         self._smd = smd
         self._sort = sort
-        self.statistics = Statistics()
+        self.statistics = Statistics()  # TODO: remove this after migrating to tables.py
         self._tukey_test = tukey_test
         self._warnings = {}  # display notes and warnings below the table
 
         self._groupbylvls = get_groups(data, self._groupby, self._order, self._reserved_columns)
 
+        # Intermediate tables
+        self.tables = Tables()
+        self._htest_table = None
+        self.cat_describe_all = None
+        self.cont_describe_all = None
+        self.cat_describe = None
+        self.cont_describe = None
+        self.smd_table = None
+        self.cat_table = None
+        self.cont_table = None
+
         # forgive me jraffa
         if self._pval:
-            self._htest_table = self._create_htest_table(data)
-
-        # correct for multiple testing
-        if self._pval and self._pval_adjust:
-            alpha = 0.05
-            adjusted = self.statistics.multipletests(self._htest_table['P-Value'],
-                                                     alpha=alpha,
-                                                     method=self._pval_adjust)
-            self._htest_table['P-Value (adjusted)'] = adjusted[1]
-            self._htest_table['adjust method'] = self._pval_adjust
+            self._htest_table = self.tables.create_htest_table(data, self._continuous, self._categorical,
+                                                               self._nonnormal, self._groupby,
+                                                               self._groupbylvls, self._htest,
+                                                               self._pval, self._pval_adjust)
 
         # create overall tables if required
-        if self._categorical and self._groupby and overall:
-            self.cat_describe_all = self._create_cat_describe(data=data,
-                                                              groupby=None,
-                                                              groupbylvls=['Overall'])
-
-        if self._continuous and self._groupby and overall:
-            self.cont_describe_all = self._create_cont_describe(data=data,
-                                                                groupby=None)
+        if self._categorical and self._groupby and self._overall:
+            self.cat_describe_all = self.tables.create_cat_describe(data,
+                                                                    self._categorical,
+                                                                    self._decimals,
+                                                                    self._row_percent,
+                                                                    groupby=None,
+                                                                    groupbylvls=['Overall'])
+
+        if self._continuous and self._groupby and self._overall:
+            self.cont_describe_all = self.tables.create_cont_describe(data,
+                                                                      self._ddof,
+                                                                      self._t1_summary,
+                                                                      self._dip_test,
+                                                                      self._tukey_test,
+                                                                      self._normal_test,
+                                                                      self._continuous,
+                                                                      groupby=None)
 
         # create descriptive tables
         if self._categorical:
-            self.cat_describe = self._create_cat_describe(data=data,
-                                                          groupby=self._groupby,
-                                                          groupbylvls=self._groupbylvls)
+            self.cat_describe = self.tables.create_cat_describe(data,
+                                                                self._categorical,
+                                                                self._decimals,
+                                                                self._row_percent,
+                                                                groupby=self._groupby,
+                                                                groupbylvls=self._groupbylvls)
 
         if self._continuous:
-            self.cont_describe = self._create_cont_describe(data=data,
-                                                            groupby=self._groupby)
+            self.cont_describe = self.tables.create_cont_describe(data,
+                                                                  self._ddof,
+                                                                  self._t1_summary,
+                                                                  self._dip_test,
+                                                                  self._tukey_test,
+                                                                  self._normal_test,
+                                                                  self._continuous,
+                                                                  groupby=self._groupby)
 
         # compute standardized mean differences
         if self._smd:
-            self.smd_table = self._create_smd_table(data)
+            self.smd_table = self.tables.create_smd_table(data,
+                                                          self._groupbylvls,
+                                                          self._continuous,
+                                                          self._categorical,
+                                                          self.cont_describe,
+                                                          self.cat_describe)
 
         # create continuous and categorical tables
         if self._categorical:
-            self.cat_table = self._create_cat_table(data, overall)
+            self.cat_table = self.tables.create_cat_table(data,
+                                                          self._overall,
+                                                          self.cat_describe,
+                                                          self._categorical,
+                                                          self._pval,
+                                                          self._pval_adjust,
+                                                          self._htest_table,
+                                                          self._smd,
+                                                          self.smd_table,
+                                                          self._groupby,
+                                                          self.cat_describe_all)
+
+
 
         if self._continuous:
-            self.cont_table = self._create_cont_table(data, overall)
+            self.cont_table = self.tables.create_cont_table(data,
+                                                            self._overall,
+                                                            self.cont_describe,
+                                                            self.cont_describe_all,
+                                                            self._continuous,
+                                                            self._pval,
+                                                            self._pval_adjust,
+                                                            self._htest_table,
+                                                            self._smd,
+                                                            self.smd_table,
+                                                            self._groupby)
 
         # combine continuous variables and categorical variables into table 1
         self.tableone = self._create_tableone(data)
@@ -395,44 +446,45 @@ def _generate_remarks(self, newline='\n') -> str:
         Generate a series of remarks that the user should consider
         when interpreting the summary statistics.
         """
-        # generate warnings for continuous variables
-        if self._continuous and self._tukey_test:
-            # highlight far outliers
-            outlier_mask = self.cont_describe.far_outliers > 1
-            outlier_vars = list(self.cont_describe.far_outliers[outlier_mask].
+        if self.cont_describe is not None:
+            # generate warnings for continuous variables
+            if self._continuous and self._tukey_test:
+                # highlight far outliers
+                outlier_mask = self.cont_describe.far_outliers > 1
+                outlier_vars = list(self.cont_describe.far_outliers[outlier_mask].
+                                    dropna(how='all').index)
+                if outlier_vars:
+                    self._warnings["""Tukey test indicates far outliers
+                                    in"""] = outlier_vars
+
+            if self._continuous and self._dip_test:
+                # highlight possible multimodal distributions using hartigan's dip
+                # test -1 values indicate NaN
+                modal_mask = ((self.cont_describe.hartigan_dip >= 0) &
+                            (self.cont_describe.hartigan_dip <= 0.05))
+                modal_vars = list(self.cont_describe.hartigan_dip[modal_mask].
                                 dropna(how='all').index)
-            if outlier_vars:
-                self._warnings["""Tukey test indicates far outliers
-                                  in"""] = outlier_vars
-
-        if self._continuous and self._dip_test:
-            # highlight possible multimodal distributions using hartigan's dip
-            # test -1 values indicate NaN
-            modal_mask = ((self.cont_describe.hartigan_dip >= 0) &
-                          (self.cont_describe.hartigan_dip <= 0.05))
-            modal_vars = list(self.cont_describe.hartigan_dip[modal_mask].
-                              dropna(how='all').index)
-            if modal_vars:
-                self._warnings["""Hartigan's Dip Test reports possible
-                                  multimodal distributions for"""] = modal_vars
-
-        if self._continuous and self._normal_test:
-            # highlight non normal distributions
-            # -1 values indicate NaN
-            modal_mask = ((self.cont_describe.normality >= 0) &
-                          (self.cont_describe.normality <= 0.001))
-            modal_vars = list(self.cont_describe.normality[modal_mask].
-                              dropna(how='all').index)
-            if modal_vars:
-                self._warnings["""Normality test reports non-normal
-                                  distributions for"""] = modal_vars
-
-        # create the warning string
-        msg = '{}'.format(newline)
-        for n, k in enumerate(sorted(self._warnings)):
-            msg += '[{}] {}: {}.{}'.format(n+1, k,
-                                           ', '.join(self._warnings[k]),
-                                           newline)
+                if modal_vars:
+                    self._warnings["""Hartigan's Dip Test reports possible
+                                    multimodal distributions for"""] = modal_vars
+
+            if self._continuous and self._normal_test:
+                # highlight non normal distributions
+                # -1 values indicate NaN
+                modal_mask = ((self.cont_describe.normality >= 0) &
+                            (self.cont_describe.normality <= 0.001))
+                modal_vars = list(self.cont_describe.normality[modal_mask].
+                                dropna(how='all').index)
+                if modal_vars:
+                    self._warnings["""Normality test reports non-normal
+                                    distributions for"""] = modal_vars
+
+            # create the warning string
+            msg = '{}'.format(newline)
+            for n, k in enumerate(sorted(self._warnings)):
+                msg += '[{}] {}: {}.{}'.format(n+1, k, ', '.join(self._warnings[k]), newline)
+        else:
+            msg = ""
 
         return msg
 
@@ -483,422 +535,6 @@ def _t1_summary(self, x: pd.Series) -> str:
                 f = '{{:.{}f}} ({{:.{}f}})'.format(n, n)
                 return f.format(np.nanmean(x.values), self.statistics._std(x, self._ddof))  # type: ignore
 
-    def _create_cont_describe(self,
-                              data: pd.DataFrame,
-                              groupby: Optional[str] = None) -> pd.DataFrame:
-        """
-        Describe the continuous data.
-
-        Parameters
-        ----------
-            data : pandas DataFrame
-                The input dataset.
-
-        Returns
-        ----------
-            df_cont : pandas DataFrame
-                Summarise the continuous variables.
-        """
-        # wrapper for std with ddof
-        def std(x):
-            return self.statistics._std(x, self._ddof)
-
-        aggfuncs = ['count', 'mean', 'median', std,
-                    self.statistics._q25, self.statistics._q75,
-                    'min', 'max', self._t1_summary]
-
-        if self._dip_test:
-            aggfuncs.append(self.statistics._hartigan_dip)
-
-        if self._tukey_test:
-            aggfuncs.append(self.statistics._outliers)
-            aggfuncs.append(self.statistics._far_outliers)
-
-        if self._normal_test:
-            aggfuncs.append(self.statistics._normality)
-
-        # coerce continuous data to numeric
-        cont_data = data[self._continuous].apply(pd.to_numeric,
-                                                 errors='coerce')
-        # check all data in each continuous column is numeric
-        bad_cols = cont_data.count() != data[self._continuous].count()
-        bad_cols = cont_data.columns[bad_cols]
-        if len(bad_cols) > 0:
-            msg = ("The following continuous column(s) have "
-                   "non-numeric values: {variables}. Either specify the "
-                   "column(s) as categorical or remove the "
-                   "non-numeric values.").format(variables=bad_cols.values)
-            raise InputError(msg)
-
-        # check for coerced column containing all NaN to warn user
-        for column in cont_data.columns[cont_data.count() == 0]:
-            self._non_continuous_warning(column)
-
-        if groupby:
-            # add the groupby column back
-            cont_data = cont_data.merge(data[[groupby]],
-                                        left_index=True,
-                                        right_index=True)
-
-            # group and aggregate data
-            df_cont = pd.pivot_table(cont_data,
-                                     columns=[groupby],
-                                     aggfunc=aggfuncs)
-        else:
-            # if no groupby, just add single group column
-            df_cont = cont_data.apply(aggfuncs).T  # type: ignore
-            df_cont.columns.name = 'Overall'
-            df_cont.columns = pd.MultiIndex.from_product([df_cont.columns,
-                                                         ['Overall']])
-
-        df_cont.index = df_cont.index.rename('variable')
-
-        # remove prefix underscore from column names (e.g. _std -> std)
-        agg_rename = df_cont.columns.levels[0]  # type: ignore
-        agg_rename = [x[1:] if x[0] == '_' else x for x in agg_rename]
-        df_cont.columns = df_cont.columns.set_levels(agg_rename, level=0)  # type: ignore
-
-        return df_cont
-
-    def _format_cat(self, row, col) -> str:
-        """
-        Format values to n decimal places.
-        """
-        var = row.name[0]
-        if var in self._decimals:
-            n = self._decimals[var]  # type: ignore
-        else:
-            n = 1
-        f = '{{:.{}f}}'.format(n)
-        return f.format(row[col])
-
-    def _create_cat_describe(self, data: pd.DataFrame,
-                             groupby: Optional[str] = None,
-                             groupbylvls: Optional[list] = None) -> pd.DataFrame:
-        """
-        Describe the categorical data.
-
-        Parameters
-        ----------
-            data : pandas DataFrame
-                The input dataset.
-            groupby : Str
-                Variable to group by.
-            groupbylvls : List
-                List of levels in the groupby variable.
-
-        Returns
-        ----------
-            df_cat : pandas DataFrame
-                Summarise the categorical variables.
-        """
-        group_dict = {}
-
-        cat_slice = data[self._categorical].copy()
-
-        for g in groupbylvls:  # type: ignore
-            if groupby:
-                df = cat_slice.loc[data[groupby] == g, self._categorical]
-            else:
-                df = cat_slice.copy()
-
-            # create n column and null count column
-            # must be done before converting values to strings
-            ct = df.count().to_frame(name='n')
-            ct.index.name = 'variable'
-            nulls = df.isnull().sum().to_frame(name='Missing')
-            nulls.index.name = 'variable'
-
-            # Convert to str to handle int converted to boolean in the index.
-            # Also avoid nans.
-            for column in df.columns:
-                df[column] = [str(row) if not pd.isnull(row)
-                              else None for row in df[column].values]
-                cat_slice[column] = [str(row) if not pd.isnull(row)
-                                     else None for row
-                                     in cat_slice[column].values]
-
-            # create a dataframe with freq, proportion
-            df = df.melt().groupby(['variable',
-                                    'value']).size().to_frame(name='freq')
-
-            df['percent'] = df['freq'].div(df.groupby(level=0).freq.sum(),
-                                           level=0).astype(float) * 100
-
-            # add row percent
-            df['percent_row'] = df['freq'].div(cat_slice[self._categorical]
-                                               .melt()
-                                               .groupby(['variable', 'value'])
-                                               .size()) * 100
-
-            # set number of decimal places for percent
-            if isinstance(self._decimals, int):
-                n = self._decimals
-                f = '{{:.{}f}}'.format(n)
-                df['percent_str'] = df['percent'].astype(float).map(f.format)
-                df['percent_row_str'] = df['percent_row'].astype(float).map(
-                    f.format)
-            elif isinstance(self._decimals, dict):
-                df.loc[:, 'percent_str'] = df.apply(self._format_cat, axis=1,
-                                                    args=['percent'])
-                df.loc[:, 'percent_row_str'] = df.apply(self._format_cat,
-                                                        axis=1,
-                                                        args=['percent_row'])
-            else:
-                n = 1
-                f = '{{:.{}f}}'.format(n)
-                df['percent_str'] = df['percent'].astype(float).map(f.format)
-                df['percent_row_str'] = df['percent_row'].astype(float).map(
-                    f.format)
-
-            # join count column
-            df = df.join(ct)
-
-            # only save null count to the first category for each variable
-            # do this by extracting the first category from the df row index
-            levels = df.reset_index()[['variable',
-                                       'value']].groupby('variable').first()
-            # add this category to the nulls table
-            nulls = nulls.join(levels)
-            nulls = nulls.set_index('value', append=True)
-            # join nulls to categorical
-            df = df.join(nulls)
-
-            # add summary column
-            if self._row_percent:
-                df['t1_summary'] = (df.freq.map(str) + ' ('
-                                    + df.percent_row_str.map(str)+')')
-            else:
-                df['t1_summary'] = (df.freq.map(str) + ' ('
-                                    + df.percent_str.map(str)+')')
-
-            # add to dictionary
-            group_dict[g] = df
-
-        df_cat = pd.concat(group_dict, axis=1)
-        # ensure the groups are the 2nd level of the column index
-        if df_cat.columns.nlevels > 1:
-            df_cat = df_cat.swaplevel(0, 1, axis=1).sort_index(axis=1, level=0)
-
-        return df_cat
-
-    def _create_htest_table(self, data: pd.DataFrame) -> pd.DataFrame:
-        """
-        Create a table containing P-Values for significance tests. Add features
-        of the distributions and the P-Values to the dataframe.
-
-        Parameters
-        ----------
-            data : pandas DataFrame
-                The input dataset.
-
-        Returns
-        ----------
-            df : pandas DataFrame
-                A table containing the P-Values, test name, etc.
-        """
-        # list features of the variable e.g. matched, paired, n_expected
-        df = pd.DataFrame(index=self._continuous+self._categorical,
-                          columns=['continuous', 'nonnormal',
-                                   'min_observed', 'P-Value', 'Test'])
-
-        df.index = df.index.rename('variable')
-        df['continuous'] = np.where(df.index.isin(self._continuous),
-                                    True, False)
-
-        df['nonnormal'] = np.where(df.index.isin(self._nonnormal),
-                                   True, False)
-
-        # list values for each variable, grouped by groupby levels
-        for v in df.index:
-            is_continuous = df.loc[v]['continuous']
-            is_categorical = ~df.loc[v]['continuous']
-            is_normal = ~df.loc[v]['nonnormal']
-
-            # if continuous, group data into list of lists
-            if is_continuous:
-                catlevels = None
-                grouped_data = {}
-                for s in self._groupbylvls:
-                    lvl_data = data.loc[data[self._groupby] == s, v]
-                    # coerce to numeric and drop non-numeric data
-                    lvl_data = lvl_data.apply(pd.to_numeric,
-                                              errors='coerce').dropna()
-                    # append to overall group data
-                    grouped_data[s] = lvl_data.values
-                min_observed = min([len(x) for x in grouped_data.values()])
-            # if categorical, create contingency table
-            elif is_categorical:
-                catlevels = sorted(data[v].astype('category').cat.categories)
-                cross_tab = pd.crosstab(data[self._groupby].
-                                        rename('_groupby_var_'), data[v])
-                min_observed = cross_tab.sum(axis=1).min()
-                grouped_data = cross_tab.T.to_dict('list')
-
-            # minimum number of observations across all levels
-            df.loc[v, 'min_observed'] = min_observed  # type: ignore
-
-            # compute pvalues
-            warning_msg = None
-            (df.loc[v, 'P-Value'],
-             df.loc[v, 'Test'],
-             warning_msg) = self.statistics._p_test(v, grouped_data,  # type: ignore
-                                                    is_continuous, is_categorical, is_normal,  # type: ignore
-                                                    min_observed, catlevels, self._htest)  # type: ignore
-            if warning_msg:
-                try:
-                    self._warnings[warning_msg].append(v)
-                except KeyError:
-                    self._warnings[warning_msg] = [v]
-
-        return df
-
-    def _create_smd_table(self, data: pd.DataFrame) -> pd.DataFrame:
-        """
-        Create a table containing pairwise Standardized Mean Differences
-        (SMDs).
-
-        Parameters
-        ----------
-            data : pandas DataFrame
-                The input dataset.
-
-        Returns
-        ----------
-            df : pandas DataFrame
-                A table containing pairwise standardized mean differences
-                (SMDs).
-        """
-        # create the SMD table
-        permutations = [sorted((x, y),
-                        key=lambda f: self._groupbylvls.index(f))
-                        for x in self._groupbylvls
-                        for y in self._groupbylvls if x is not y]
-
-        p_set = set(tuple(x) for x in permutations)
-
-        colname = 'SMD ({0},{1})'
-        columns = [colname.format(x[0], x[1]) for x in p_set]
-        df = pd.DataFrame(index=self._continuous+self._categorical,
-                          columns=columns)
-        df.index = df.index.rename('variable')
-
-        for p in p_set:
-            try:
-                for v in self.cont_describe.index:
-                    smd, _ = self.statistics._cont_smd(
-                                mean1=self.cont_describe['mean'][p[0]].loc[v],
-                                mean2=self.cont_describe['mean'][p[1]].loc[v],
-                                sd1=self.cont_describe['std'][p[0]].loc[v],
-                                sd2=self.cont_describe['std'][p[1]].loc[v],
-                                n1=self.cont_describe['count'][p[0]].loc[v],
-                                n2=self.cont_describe['count'][p[1]].loc[v],
-                                unbiased=False)
-                    df.loc[v, colname.format(p[0], p[1])] = smd
-            except AttributeError:
-                pass
-
-            try:
-                for v, _ in self.cat_describe.groupby(level=0):
-                    smd, _ = self.statistics._cat_smd(
-                        prop1=self.cat_describe.loc[[v]]['percent'][p[0]]
-                        .values/100,
-                        prop2=self.cat_describe.loc[[v]]['percent'][p[1]]
-                        .values/100,
-                        n1=self.cat_describe.loc[[v]]['freq'][p[0]].sum(),
-                        n2=self.cat_describe.loc[[v]]['freq'][p[1]].sum(),
-                        unbiased=False)
-                    df.loc[v, colname.format(p[0], p[1])] = smd  # type: ignore
-            except AttributeError:
-                pass
-
-        return df
-
-    def _create_cont_table(self, data, overall) -> pd.DataFrame:
-        """
-        Create tableone for continuous data.
-
-        Returns
-        ----------
-        table : pandas DataFrame
-            A table summarising the continuous variables.
-        """
-        # remove the t1_summary level
-        table = self.cont_describe[['t1_summary']].copy()
-        table.columns = table.columns.droplevel(level=0)
-
-        # add a column of null counts as 1-count() from previous function
-        nulltable = data[self._continuous].isnull().sum().to_frame(
-            name='Missing')
-        try:
-            table = table.join(nulltable)
-        # if columns form a CategoricalIndex, need to convert to string first
-        except TypeError:
-            table.columns = table.columns.astype(str)
-            table = table.join(nulltable)
-
-        # add an empty value column, for joining with cat table
-        table['value'] = ''
-        table = table.set_index([table.index, 'value'])  # type: ignore
-
-        # add pval column
-        if self._pval and self._pval_adjust:
-            table = table.join(self._htest_table[['P-Value (adjusted)',
-                                                  'Test']])
-        elif self._pval:
-            table = table.join(self._htest_table[['P-Value', 'Test']])
-
-        # add standardized mean difference (SMD) column/s
-        if self._smd:
-            table = table.join(self.smd_table)
-
-        # join the overall column if needed
-        if self._groupby and overall:
-            table = table.join(pd.concat([self.cont_describe_all['t1_summary'].
-                                          Overall], axis=1, keys=["Overall"]))
-
-        return table
-
-    def _create_cat_table(self, data, overall):
-        """
-        Create table one for categorical data.
-
-        Returns
-        ----------
-        table : pandas DataFrame
-            A table summarising the categorical variables.
-        """
-        table = self.cat_describe['t1_summary'].copy()
-
-        # add the total count of null values across all levels
-        isnull = data[self._categorical].isnull().sum().to_frame(
-            name='Missing')
-        isnull.index = isnull.index.rename('variable')
-        try:
-            table = table.join(isnull)
-        # if columns form a CategoricalIndex, need to convert to string first
-        except TypeError:
-            table.columns = table.columns.astype(str)
-            table = table.join(isnull)
-
-        # add pval column
-        if self._pval and self._pval_adjust:
-            table = table.join(self._htest_table[['P-Value (adjusted)',
-                                                  'Test']])
-        elif self._pval:
-            table = table.join(self._htest_table[['P-Value', 'Test']])
-
-        # add standardized mean difference (SMD) column/s
-        if self._smd:
-            table = table.join(self.smd_table)
-
-        # join the overall column if needed
-        if self._groupby and overall:
-            table = table.join(pd.concat([self.cat_describe_all['t1_summary'].
-                                          Overall], axis=1, keys=["Overall"]))
-
-        return table
-
     def _create_tableone(self, data):
         """
         Create table 1 by combining the continuous and categorical tables.
@@ -926,7 +562,7 @@ def _create_tableone(self, data):
 
         # sort the table rows
         sort_columns = ['Missing', 'P-Value', 'P-Value (adjusted)', 'Test']
-        if self._smd:
+        if self._smd and self.smd_table is not None:
             sort_columns = sort_columns + list(self.smd_table.columns)
 
         if self._sort and isinstance(self._sort, bool):
@@ -976,7 +612,7 @@ def _create_tableone(self, data):
                 table.loc[asterisk_mask, 'P-Value'] = table['P-Value'][asterisk_mask].astype(str)+"*"  # type: ignore
 
         # round smd columns and convert to string
-        if self._smd:
+        if self._smd and self.smd_table is not None:
             for c in list(self.smd_table.columns):
                 table[c] = table[c].apply('{:.3f}'.format).astype(str)
                 table.loc[table[c] == '0.000', c] = '<0.001'
@@ -1045,7 +681,7 @@ def _create_tableone(self, data):
                 table = table.reindex(orig_idx)
 
                 # drop the rows > the limit
-                table = table.drop(new_idx_array[limit:])
+                table = table.drop(new_idx_array[limit:])  # type: ignore
 
         # insert n row
         n_row = pd.DataFrame(columns=['variable', 'value', 'Missing'])
@@ -1071,7 +707,7 @@ def _create_tableone(self, data):
         dupe_mask = table.groupby(level=[0]).cumcount().ne(0)  # type: ignore
         dupe_columns = ['Missing']
         optional_columns = ['P-Value', 'P-Value (adjusted)', 'Test']
-        if self._smd:
+        if self._smd and self.smd_table is not None:
             optional_columns = optional_columns + list(self.smd_table.columns)
         for col in optional_columns:
             if col in table.columns.values:
@@ -1185,12 +821,6 @@ def _create_row_labels(self) -> dict:
 
         return labels
 
-    # warnings
-    def _non_continuous_warning(self, c):
-        msg = ("'{}' has all non-numeric values. Consider including "
-               "it in the list of categorical variables.").format(c)
-        warnings.warn(msg, RuntimeWarning, stacklevel=2)
-
 
 # Allow TableOne to be called as a function.
 # Refactor this out at some point!
diff --git a/tableone/tables.py b/tableone/tables.py
new file mode 100644
index 0000000..7135136
--- /dev/null
+++ b/tableone/tables.py
@@ -0,0 +1,491 @@
+from typing import Optional
+import warnings
+
+import numpy as np
+import pandas as pd
+
+from tableone.statistics import Statistics
+from tableone.validators import InputError
+
+
+class Tables:
+    """
+    Create and store intermediate tables used to create Table 1.
+
+    Usage:
+
+    self.tables = Tables()
+    self.tables._create_htest_table()
+    self.tables._htest_table
+    """
+    def __init__(self):
+        """
+        Initialize the Tables class.
+        """
+        self.statistics = Statistics()
+
+    def create_htest_table(self, data: pd.DataFrame,
+                           continuous,
+                           categorical,
+                           nonnormal,
+                           groupby,
+                           groupbylvls,
+                           htest,
+                           pval,
+                           pval_adjust) -> pd.DataFrame:
+        """
+        Create a table containing P-Values for significance tests. Add features
+        of the distributions and the P-Values to the dataframe.
+
+        Parameters
+        ----------
+            data : pandas DataFrame
+                The input dataset.
+
+        Returns
+        ----------
+            df : pandas DataFrame
+                A table containing the P-Values, test name, etc.
+        """
+        # list features of the variable e.g. matched, paired, n_expected
+        df = pd.DataFrame(index=continuous+categorical,
+                          columns=['continuous', 'nonnormal',
+                                   'min_observed', 'P-Value', 'Test'])
+
+        df.index = df.index.rename('variable')
+
+        df['continuous'] = np.where(df.index.isin(continuous), True, False)
+        df['nonnormal'] = np.where(df.index.isin(nonnormal), True, False)
+
+        # list values for each variable, grouped by groupby levels
+        for v in df.index:
+            is_continuous = df.loc[v]['continuous']
+            is_categorical = ~df.loc[v]['continuous']
+            is_normal = ~df.loc[v]['nonnormal']
+
+            # if continuous, group data into list of lists
+            if is_continuous:
+                catlevels = None
+                grouped_data = {}
+                for s in groupbylvls:
+                    lvl_data = data.loc[data[groupby] == s, v]
+                    # coerce to numeric and drop non-numeric data
+                    lvl_data = lvl_data.apply(pd.to_numeric,
+                                              errors='coerce').dropna()
+                    # append to overall group data
+                    grouped_data[s] = lvl_data.values
+                min_observed = min([len(x) for x in grouped_data.values()])
+            # if categorical, create contingency table
+            elif is_categorical:
+                catlevels = sorted(data[v].astype('category').cat.categories)
+                cross_tab = pd.crosstab(data[groupby].rename('_groupby_var_'), data[v])
+                min_observed = cross_tab.sum(axis=1).min()
+                grouped_data = cross_tab.T.to_dict('list')
+
+            # minimum number of observations across all levels
+            df.loc[v, 'min_observed'] = min_observed  # type: ignore
+
+            # compute pvalues
+            warning_msg = None
+            (df.loc[v, 'P-Value'],
+             df.loc[v, 'Test'],
+             warning_msg) = self.statistics._p_test(v, grouped_data, is_continuous, is_categorical,  # type: ignore
+                                                    is_normal,  min_observed, catlevels, htest)  # type: ignore
+
+            # TODO: Improve method for handling these warnings.
+            # Write to logfile?
+            #
+            # if warning_msg:
+            #     try:
+            #         self._warnings[warning_msg].append(v)
+            #     except KeyError:
+            #         self._warnings[warning_msg] = [v]
+
+        # correct for multiple testing
+        if pval and pval_adjust:
+            adjusted = self.statistics.multipletests(df['P-Value'],
+                                                     alpha=0.05,
+                                                     method=pval_adjust)
+            df['P-Value (adjusted)'] = adjusted[1]
+            df['adjust method'] = pval_adjust
+
+        return df
+
+    def create_smd_table(self,
+                         data: pd.DataFrame,
+                         groupbylvls,
+                         continuous,
+                         categorical,
+                         cont_describe,
+                         cat_describe) -> pd.DataFrame:
+        """
+        Create a table containing pairwise Standardized Mean Differences
+        (SMDs).
+
+        Parameters
+        ----------
+            data : pandas DataFrame
+                The input dataset.
+
+        Returns
+        ----------
+            df : pandas DataFrame
+                A table containing pairwise standardized mean differences
+                (SMDs).
+        """
+        # create the SMD table
+        permutations = [sorted((x, y),
+                        key=lambda f: groupbylvls.index(f))
+                        for x in groupbylvls
+                        for y in groupbylvls if x is not y]
+
+        p_set = set(tuple(x) for x in permutations)
+
+        colname = 'SMD ({0},{1})'
+        columns = [colname.format(x[0], x[1]) for x in p_set]
+        df = pd.DataFrame(index=continuous+categorical, columns=columns)
+        df.index = df.index.rename('variable')
+
+        for p in p_set:
+            try:
+                for v in cont_describe.index:
+                    smd, _ = self.statistics._cont_smd(
+                                mean1=cont_describe['mean'][p[0]].loc[v],
+                                mean2=cont_describe['mean'][p[1]].loc[v],
+                                sd1=cont_describe['std'][p[0]].loc[v],
+                                sd2=cont_describe['std'][p[1]].loc[v],
+                                n1=cont_describe['count'][p[0]].loc[v],
+                                n2=cont_describe['count'][p[1]].loc[v],
+                                unbiased=False)
+                    df.loc[v, colname.format(p[0], p[1])] = smd
+            except AttributeError:
+                pass
+
+            try:
+                for v, _ in cat_describe.groupby(level=0):
+                    smd, _ = self.statistics._cat_smd(
+                        prop1=cat_describe.loc[[v]]['percent'][p[0]].values/100,
+                        prop2=cat_describe.loc[[v]]['percent'][p[1]].values/100,
+                        n1=cat_describe.loc[[v]]['freq'][p[0]].sum(),
+                        n2=cat_describe.loc[[v]]['freq'][p[1]].sum(),
+                        unbiased=False)
+                    df.loc[v, colname.format(p[0], p[1])] = smd  # type: ignore
+            except AttributeError:
+                pass
+
+        return df
+
+    def format_cat(self, row, col, decimals) -> str:
+        """
+        Format values to n decimal places.
+        """
+        var = row.name[0]
+        if var in decimals:
+            n = decimals[var]  # type: ignore
+        else:
+            n = 1
+        f = '{{:.{}f}}'.format(n)
+
+        return f.format(row[col])
+
+    def create_cat_describe(self,
+                            data: pd.DataFrame,
+                            categorical,
+                            decimals,
+                            row_percent,
+                            groupby: Optional[str] = None,
+                            groupbylvls: Optional[list] = None
+                            ) -> pd.DataFrame:
+        """
+        Describe the categorical data.
+
+        Parameters
+        ----------
+            data : pandas DataFrame
+                The input dataset.
+            groupby : Str
+                Variable to group by.
+            groupbylvls : List
+                List of levels in the groupby variable.
+
+        Returns
+        ----------
+            df_cat : pandas DataFrame
+                Summarise the categorical variables.
+        """
+        group_dict = {}
+
+        cat_slice = data[categorical].copy()
+
+        for g in groupbylvls:  # type: ignore
+            if groupby:
+                df = cat_slice.loc[data[groupby] == g, categorical]
+            else:
+                df = cat_slice.copy()
+
+            # create n column and null count column
+            # must be done before converting values to strings
+            ct = df.count().to_frame(name='n')
+            ct.index.name = 'variable'
+            nulls = df.isnull().sum().to_frame(name='Missing')
+            nulls.index.name = 'variable'
+
+            # Convert to str to handle int converted to boolean in the index.
+            # Also avoid nans.
+            for column in df.columns:
+                df[column] = [str(row) if not pd.isnull(row)
+                              else None for row in df[column].values]
+                cat_slice[column] = [str(row) if not pd.isnull(row)
+                                     else None for row
+                                     in cat_slice[column].values]
+
+            # create a dataframe with freq, proportion
+            df = df.melt().groupby(['variable',
+                                    'value']).size().to_frame(name='freq')
+
+            df['percent'] = df['freq'].div(df.groupby(level=0).freq.sum(),
+                                           level=0).astype(float) * 100
+
+            # add row percent
+            df['percent_row'] = df['freq'].div(cat_slice[categorical]
+                                               .melt()
+                                               .groupby(['variable', 'value'])
+                                               .size()) * 100
+
+            # set number of decimal places for percent
+            if isinstance(decimals, int):
+                n = decimals
+                f = '{{:.{}f}}'.format(n)
+                df['percent_str'] = df['percent'].astype(float).map(f.format)
+                df['percent_row_str'] = df['percent_row'].astype(float).map(
+                    f.format)
+            elif isinstance(decimals, dict):
+                df.loc[:, 'percent_str'] = df.apply(self.format_cat, axis=1,
+                                                    args=['percent', decimals])
+                df.loc[:, 'percent_row_str'] = df.apply(self.format_cat, axis=1,
+                                                        args=['percent_row', decimals])
+            else:
+                n = 1
+                f = '{{:.{}f}}'.format(n)
+                df['percent_str'] = df['percent'].astype(float).map(f.format)
+                df['percent_row_str'] = df['percent_row'].astype(float).map(
+                    f.format)
+
+            # join count column
+            df = df.join(ct)
+
+            # only save null count to the first category for each variable
+            # do this by extracting the first category from the df row index
+            levels = df.reset_index()[['variable',
+                                       'value']].groupby('variable').first()
+            # add this category to the nulls table
+            nulls = nulls.join(levels)
+            nulls = nulls.set_index('value', append=True)
+            # join nulls to categorical
+            df = df.join(nulls)
+
+            # add summary column
+            if row_percent:
+                df['t1_summary'] = (df.freq.map(str) + ' ('
+                                    + df.percent_row_str.map(str)+')')
+            else:
+                df['t1_summary'] = (df.freq.map(str) + ' ('
+                                    + df.percent_str.map(str)+')')
+
+            # add to dictionary
+            group_dict[g] = df
+
+        df_cat = pd.concat(group_dict, axis=1)
+        # ensure the groups are the 2nd level of the column index
+        if df_cat.columns.nlevels > 1:
+            df_cat = df_cat.swaplevel(0, 1, axis=1).sort_index(axis=1, level=0)
+
+        return df_cat
+
+    def create_cont_describe(self,
+                             data: pd.DataFrame,
+                             ddof,
+                             t1_summary,
+                             dip_test,
+                             tukey_test,
+                             normal_test,
+                             continuous,
+                             groupby: Optional[str] = None) -> pd.DataFrame:
+        """
+        Describe the continuous data.
+
+        Parameters
+        ----------
+            data : pandas DataFrame
+                The input dataset.
+
+        Returns
+        ----------
+            df_cont : pandas DataFrame
+                Summarise the continuous variables.
+        """
+        # wrapper for std with ddof
+        def std(x):
+            return self.statistics._std(x, ddof)
+
+        aggfuncs = ['count', 'mean', 'median', std,
+                    self.statistics._q25, self.statistics._q75,
+                    'min', 'max', t1_summary]
+
+        if dip_test:
+            aggfuncs.append(self.statistics._hartigan_dip)
+
+        if tukey_test:
+            aggfuncs.append(self.statistics._outliers)
+            aggfuncs.append(self.statistics._far_outliers)
+
+        if normal_test:
+            aggfuncs.append(self.statistics._normality)
+
+        # coerce continuous data to numeric
+        cont_data = data[continuous].apply(pd.to_numeric, errors='coerce')
+        # check all data in each continuous column is numeric
+        bad_cols = cont_data.count() != data[continuous].count()
+        bad_cols = cont_data.columns[bad_cols]
+        if len(bad_cols) > 0:
+            msg = ("The following continuous column(s) have "
+                   "non-numeric values: {variables}. Either specify the "
+                   "column(s) as categorical or remove the "
+                   "non-numeric values.").format(variables=bad_cols.values)
+            raise InputError(msg)
+
+        # check for coerced column containing all NaN to warn user
+        for column in cont_data.columns[cont_data.count() == 0]:
+            self._non_continuous_warning(column)
+
+        if groupby:
+            # add the groupby column back
+            cont_data = cont_data.merge(data[[groupby]], left_index=True, right_index=True)
+
+            # group and aggregate data
+            df_cont = pd.pivot_table(cont_data, columns=[groupby], aggfunc=aggfuncs)
+        else:
+            # if no groupby, just add single group column
+            df_cont = cont_data.apply(aggfuncs).T  # type: ignore
+            df_cont.columns.name = 'Overall'
+            df_cont.columns = pd.MultiIndex.from_product([df_cont.columns, ['Overall']])
+
+        df_cont.index = df_cont.index.rename('variable')
+
+        # remove prefix underscore from column names (e.g. _std -> std)
+        agg_rename = df_cont.columns.levels[0]  # type: ignore
+        agg_rename = [x[1:] if x[0] == '_' else x for x in agg_rename]
+        df_cont.columns = df_cont.columns.set_levels(agg_rename, level=0)  # type: ignore
+
+        return df_cont
+
+    # warnings
+    def _non_continuous_warning(self, c):
+        msg = ("'{}' has all non-numeric values. Consider including "
+               "it in the list of categorical variables.").format(c)
+        warnings.warn(msg, RuntimeWarning, stacklevel=2)
+
+    def create_cont_table(self,
+                          data,
+                          overall,
+                          cont_describe,
+                          cont_describe_all,
+                          continuous,
+                          pval,
+                          pval_adjust,
+                          htest_table,
+                          smd,
+                          smd_table,
+                          groupby
+                          ) -> pd.DataFrame:
+        """
+        Create tableone for continuous data.
+
+        Returns
+        ----------
+        table : pandas DataFrame
+            A table summarising the continuous variables.
+        """
+        # remove the t1_summary level
+        table = cont_describe[['t1_summary']].copy()
+        table.columns = table.columns.droplevel(level=0)
+
+        # add a column of null counts as 1-count() from previous function
+        nulltable = data[continuous].isnull().sum().to_frame(name='Missing')
+        try:
+            table = table.join(nulltable)
+        # if columns form a CategoricalIndex, need to convert to string first
+        except TypeError:
+            table.columns = table.columns.astype(str)
+            table = table.join(nulltable)
+
+        # add an empty value column, for joining with cat table
+        table['value'] = ''
+        table = table.set_index([table.index, 'value'])  # type: ignore
+
+        # add pval column
+        if pval and pval_adjust:
+            table = table.join(htest_table[['P-Value (adjusted)', 'Test']])
+        elif pval:
+            table = table.join(htest_table[['P-Value', 'Test']])
+
+        # add standardized mean difference (SMD) column/s
+        if smd:
+            table = table.join(smd_table)
+
+        # join the overall column if needed
+        if groupby and overall:
+            table = table.join(pd.concat([cont_describe_all['t1_summary'].
+                                          Overall], axis=1, keys=["Overall"]))
+
+        return table
+
+    def create_cat_table(self,
+                         data,
+                         overall,
+                         cat_describe,
+                         categorical,
+                         pval,
+                         pval_adjust,
+                         htest_table,
+                         smd,
+                         smd_table,
+                         groupby,
+                         cat_describe_all):
+        """
+        Create table one for categorical data.
+
+        Returns
+        ----------
+        table : pandas DataFrame
+            A table summarising the categorical variables.
+        """
+        table = cat_describe['t1_summary'].copy()
+
+        # add the total count of null values across all levels
+        isnull = data[categorical].isnull().sum().to_frame(name='Missing')
+        isnull.index = isnull.index.rename('variable')
+
+        try:
+            table = table.join(isnull)
+        # if columns form a CategoricalIndex, need to convert to string first
+        except TypeError:
+            table.columns = table.columns.astype(str)
+            table = table.join(isnull)
+
+        # add pval column
+        if pval and pval_adjust:
+            table = table.join(htest_table[['P-Value (adjusted)', 'Test']])
+        elif pval:
+            table = table.join(htest_table[['P-Value', 'Test']])
+
+        # add standardized mean difference (SMD) column/s
+        if smd:
+            table = table.join(smd_table)
+
+        # join the overall column if needed
+        if groupby and overall:
+            table = table.join(pd.concat([cat_describe_all['t1_summary'].Overall],
+                                         axis=1, keys=["Overall"]))
+
+        return table