diff --git a/tests/execution_utils.py b/tests/execution_utils.py new file mode 100644 index 0000000..b0e251d --- /dev/null +++ b/tests/execution_utils.py @@ -0,0 +1,41 @@ +import base64 +import pickle + +import nbconvert +import nbformat +import pandas as pd + +from edvart.report import ReportBase +from edvart.report_sections.code_string_formatting import code_dedent +from edvart.report_sections.section_base import Section + + +def check_section_executes(section: Section, df: pd.DataFrame) -> None: + nb = nbformat.v4.new_notebook() + section_code_cells = [] + section.add_cells(section_code_cells, df) + + buffer = pickle.dumps(df, fix_imports=False) + buffer_base64 = base64.b85encode(buffer) + + unpickle_df = code_dedent( + f""" + import pickle + import base64 + + data = {buffer_base64} + df = pickle.loads(base64.b85decode(data), fix_imports=False)""" + ) + + all_imports = [ + *ReportBase._DEFAULT_IMPORTS, + *section.required_imports(), + ] + + nb["cells"] = [ + nbformat.v4.new_code_cell("\n".join(all_imports)), + nbformat.v4.new_code_cell(unpickle_df), + *section_code_cells, + ] + preprocessor = nbconvert.preprocessors.ExecutePreprocessor(timeout=60) + preprocessor.preprocess(nb) diff --git a/tests/test_bivariate_analysis.py b/tests/test_bivariate_analysis.py index 1aaccef..0f52775 100644 --- a/tests/test_bivariate_analysis.py +++ b/tests/test_bivariate_analysis.py @@ -1,6 +1,7 @@ import warnings from contextlib import redirect_stdout +import numpy as np import pandas as pd import pytest @@ -9,6 +10,7 @@ from edvart.report_sections.code_string_formatting import get_code from edvart.report_sections.section_base import Verbosity +from .execution_utils import check_section_executes from .pyarrow_utils import pyarrow_parameterize @@ -136,6 +138,8 @@ def test_code_export_verbosity_low(): assert len(exported_code) == 1 assert exported_code[0] == expected_code[0], "Exported code mismatch" + check_section_executes(bivariate_section, df=get_test_df()) + def test_code_export_verbosity_low_with_subsections(): bivariate_section = bivariate_analysis.BivariateAnalysis( @@ -160,12 +164,26 @@ def test_code_export_verbosity_low_with_subsections(): assert len(exported_code) == 1 assert exported_code[0] == expected_code[0], "Exported code mismatch" + check_section_executes(bivariate_section, df=get_test_df()) + def test_generated_code_verbosity_low_columns(): columns = [f"col{i}" for i in range(5)] columns_x = [f"col_x{i}" for i in range(6)] columns_y = [f"col_y{i}" for i in range(4)] columns_pairs = [(f"first{i}", f"second{i}") for i in range(8)] + columns_all = ( + columns + + columns_x + + columns_y + + [col_pair[0] for col_pair in columns_pairs] + + [col_pair[1] for col_pair in columns_pairs] + ) + test_df = pd.DataFrame( + data=np.random.rand(4, len(columns_all)), + columns=columns_all, + ) + bivariate_section = bivariate_analysis.BivariateAnalysis( columns=columns, columns_x=columns_x, @@ -188,6 +206,8 @@ def test_generated_code_verbosity_low_columns(): assert len(exported_code) == 1 assert exported_code[0] == expected_code[0], "Exported code mismatch" + check_section_executes(bivariate_section, df=test_df) + def test_generated_code_verbosity_medium(): bivariate_section = bivariate_analysis.BivariateAnalysis( @@ -213,6 +233,8 @@ def test_generated_code_verbosity_medium(): for expected_line, exported_line in zip(expected_code, exported_code): assert expected_line == exported_line, "Exported code mismatch" + check_section_executes(bivariate_section, df=get_test_df()) + def test_generated_code_verbosity_medium_columns_x_y(): columns_x = ["a", "b"] @@ -228,6 +250,10 @@ def test_generated_code_verbosity_medium_columns_x_y(): ], color_col="b", ) + test_df = pd.DataFrame( + columns=columns_x + columns_y, + data=np.random.rand(10, 4), + ) exported_cells = [] bivariate_section.add_cells(exported_cells, df=pd.DataFrame()) @@ -243,6 +269,8 @@ def test_generated_code_verbosity_medium_columns_x_y(): for expected_line, exported_line in zip(expected_code, exported_code): assert expected_line == exported_line, "Exported code mismatch" + check_section_executes(bivariate_section, df=test_df) + def test_generated_code_verbosity_medium_columns_pairs(): columns_pairs = [("a", "b"), ("c", "d")] @@ -257,6 +285,10 @@ def test_generated_code_verbosity_medium_columns_pairs(): BivariateAnalysisSubsection.ContingencyTable, ], ) + test_df = pd.DataFrame( + columns=columns_x_correct + columns_y_correct, + data=np.random.rand(10, 4), + ) exported_cells = [] bivariate_section.add_cells(exported_cells, df=pd.DataFrame()) @@ -272,6 +304,8 @@ def test_generated_code_verbosity_medium_columns_pairs(): for expected_line, exported_line in zip(expected_code, exported_code): assert expected_line == exported_line, "Exported code mismatch" + check_section_executes(bivariate_section, df=test_df) + def test_generated_code_verbosity_high(): bivariate_section = bivariate_analysis.BivariateAnalysis( @@ -311,6 +345,8 @@ def test_generated_code_verbosity_high(): for expected_line, exported_line in zip(expected_code, exported_code): assert expected_line == exported_line, "Exported code mismatch" + check_section_executes(bivariate_section, df=get_test_df()) + def test_verbosity_low_different_subsection_verbosities(): bivariate_section = BivariateAnalysis( @@ -341,6 +377,8 @@ def test_verbosity_low_different_subsection_verbosities(): for expected_line, exported_line in zip(expected_code, exported_code): assert expected_line == exported_line, "Exported code mismatch" + check_section_executes(bivariate_section, df=get_test_df()) + def test_imports_verbosity_low(): bivariate_section = BivariateAnalysis(verbosity=Verbosity.LOW) diff --git a/tests/test_group_analysis.py b/tests/test_group_analysis.py index 257dace..eeb6c67 100644 --- a/tests/test_group_analysis.py +++ b/tests/test_group_analysis.py @@ -21,6 +21,7 @@ ) from edvart.report_sections.section_base import Verbosity +from .execution_utils import check_section_executes from .pyarrow_utils import pyarrow_parameterize # Workaround to prevent multiple browser tabs opening with figures @@ -105,6 +106,8 @@ def test_code_export_verbosity_low(pyarrow_dtypes: bool): assert len(exported_code) == 1 assert exported_code[0] == expected_code[0], "Exported code mismatch" + check_section_executes(group_section, df) + @pyarrow_parameterize def test_code_export_verbosity_medium(pyarrow_dtypes: bool): @@ -132,6 +135,8 @@ def test_code_export_verbosity_medium(pyarrow_dtypes: bool): for expected_line, exported_line in zip(expected_code, exported_code): assert expected_line == exported_line, "Exported code mismatch" + check_section_executes(group_section, df) + @pyarrow_parameterize def test_code_export_verbosity_high(pyarrow_dtypes: bool): @@ -187,6 +192,8 @@ def test_code_export_verbosity_high(pyarrow_dtypes: bool): for expected_line, exported_line in zip(expected_code, exported_code): assert expected_line == exported_line, "Exported code mismatch" + check_section_executes(group_section, df) + @pyarrow_parameterize def test_columns_parameter(pyarrow_dtypes: bool): diff --git a/tests/test_multivariate_analysis.py b/tests/test_multivariate_analysis.py index 459f65a..b90bacd 100644 --- a/tests/test_multivariate_analysis.py +++ b/tests/test_multivariate_analysis.py @@ -19,6 +19,7 @@ from edvart.report_sections.section_base import Verbosity from edvart.utils import select_numeric_columns +from .execution_utils import check_section_executes from .pyarrow_utils import pyarrow_parameterize @@ -147,6 +148,8 @@ def test_code_export_verbosity_low(pyarrow_dtypes: bool): assert len(exported_code) == 1 assert exported_code[0] == expected_code[0], "Exported code mismatch" + check_section_executes(multivariate_section, df) + @pyarrow_parameterize def test_code_export_verbosity_low_with_subsections(pyarrow_dtypes: bool): @@ -186,6 +189,8 @@ def test_code_export_verbosity_low_with_subsections(pyarrow_dtypes: bool): assert len(exported_code) == 1 assert exported_code[0] == expected_code[0], "Exported code mismatch" + check_section_executes(multivariate_section, df) + @pyarrow_parameterize def test_code_export_verbosity_medium_all_cols_valid(pyarrow_dtypes: bool): @@ -215,6 +220,8 @@ def test_code_export_verbosity_medium_all_cols_valid(pyarrow_dtypes: bool): for expected_line, exported_line in zip(expected_code, exported_code): assert expected_line == exported_line, "Exported code mismatch" + check_section_executes(multivariate_section, all_numeric_df) + @pyarrow_parameterize def test_generated_code_verbosity_1(pyarrow_dtypes: bool): @@ -252,6 +259,8 @@ def test_generated_code_verbosity_1(pyarrow_dtypes: bool): for expected_line, exported_line in zip(expected_code, exported_code): assert expected_line == exported_line, "Exported code mismatch" + check_section_executes(multivariate_section, df) + @pyarrow_parameterize def test_generated_code_verbosity_2(pyarrow_dtypes: bool): @@ -314,6 +323,8 @@ def test_generated_code_verbosity_2(pyarrow_dtypes: bool): for expected_line, exported_line in zip(expected_code, exported_code): assert expected_line == exported_line, "Exported code mismatch" + check_section_executes(multivariate_section, df) + @pyarrow_parameterize def test_verbosity_medium_non_categorical_col(pyarrow_dtypes: bool): @@ -337,6 +348,8 @@ def test_verbosity_medium_non_categorical_col(pyarrow_dtypes: bool): for expected_line, exported_line in zip(expected_code, exported_code): assert expected_line == exported_line, "Exported code mismatch" + check_section_executes(multivariate_section, random_df) + @pyarrow_parameterize def test_verbosity_low_different_subsection_verbosities(pyarrow_dtypes: bool): @@ -382,6 +395,8 @@ def test_verbosity_low_different_subsection_verbosities(pyarrow_dtypes: bool): for expected_line, exported_line in zip(expected_code, exported_code): assert expected_line == exported_line, "Exported code mismatch" + check_section_executes(multivariate_section, df) + def test_imports_verbosity_low(): multivariate_section = MultivariateAnalysis(verbosity=Verbosity.LOW) diff --git a/tests/test_overview_section.py b/tests/test_overview_section.py index 7fb3b44..a1ae9bf 100644 --- a/tests/test_overview_section.py +++ b/tests/test_overview_section.py @@ -20,6 +20,8 @@ from edvart.report_sections.dataset_overview import Overview, OverviewSubsection from edvart.report_sections.section_base import Verbosity +from .execution_utils import check_section_executes + def get_test_df() -> pd.DataFrame: test_df = pd.DataFrame(data=[[1.1, "a"], [2.2, "b"], [3.3, "c"]], columns=["A", "B"]) @@ -136,6 +138,8 @@ def test_code_export_verbosity_low(): # Test code equivalence assert exported_code[0] == expected_code[0], "Exported code mismatch" + check_section_executes(overview_section, df=get_test_df()) + def test_code_export_verbosity_low_with_subsections(): overview_section = Overview( @@ -158,6 +162,8 @@ def test_code_export_verbosity_low_with_subsections(): # Test code equivalence assert exported_code[0] == expected_code[0], "Exported code mismatch" + check_section_executes(overview_section, df=get_test_df()) + def test_code_export_verbosity_medium(): # Construct overview section @@ -192,6 +198,8 @@ def test_code_export_verbosity_medium(): for i in range(len(exported_code)): assert exported_code[i] == expected_code[i], "Exported code mismatch" + check_section_executes(overview_section, df=get_test_df()) + def test_code_export_verbosity_high(): # Construct overview section @@ -270,6 +278,8 @@ def test_code_export_verbosity_high(): for i in range(len(exported_code)): assert exported_code[i] == expected_code[i], "Exported code mismatch" + check_section_executes(overview_section, df=get_test_df()) + def test_verbosity_low_different_subsection_verbosities(): overview_section = Overview( @@ -303,6 +313,8 @@ def test_verbosity_low_different_subsection_verbosities(): for expected_line, exported_line in zip(expected_code, exported_code): assert expected_line == exported_line, "Exported code mismatch" + check_section_executes(overview_section, df=get_test_df()) + def test_imports_verbosity_low(): overview_section = Overview(verbosity=Verbosity.LOW) diff --git a/tests/test_timeseries_analysis.py b/tests/test_timeseries_analysis.py index c01dc40..f536309 100644 --- a/tests/test_timeseries_analysis.py +++ b/tests/test_timeseries_analysis.py @@ -1,8 +1,10 @@ -import datetime import warnings from contextlib import redirect_stdout +from datetime import datetime +import numpy as np import pandas as pd +import plotly.io as pio import pytest import edvart @@ -15,6 +17,20 @@ TimeseriesAnalysisSubsection, ) +from .execution_utils import check_section_executes + +pio.renderers.default = "json" + + +def get_test_df() -> pd.DataFrame: + n_rows = 20 + columns = ["a", "b", "c"] + return pd.DataFrame( + index=[pd.Timestamp.now() + pd.Timedelta(minutes=i) for i in range(n_rows)], + data=np.random.rand(n_rows, len(columns)), + columns=columns, + ) + def test_default_config_verbosity(): timeseries_section = TimeseriesAnalysis() @@ -171,9 +187,10 @@ def test_ft_no_sampling_rate_error(): def test_code_export_verbosity_low(): ts_section = TimeseriesAnalysis(verbosity=Verbosity.LOW) + test_df = get_test_df() # Export code exported_cells = [] - ts_section.add_cells(exported_cells, df=pd.DataFrame()) + ts_section.add_cells(exported_cells, df=test_df) # Remove markdown and other cells and get code strings exported_code = [cell["source"] for cell in exported_cells if cell["cell_type"] == "code"] # Define expected code @@ -182,6 +199,8 @@ def test_code_export_verbosity_low(): assert len(exported_code) == 1 assert exported_code[0] == expected_code[0], "Exported code mismatch" + check_section_executes(ts_section, test_df) + def test_code_export_verbosity_low_with_subsections(): ts_section = TimeseriesAnalysis( @@ -191,9 +210,10 @@ def test_code_export_verbosity_low_with_subsections(): ], verbosity=Verbosity.LOW, ) + test_df = get_test_df() # Export code exported_cells = [] - ts_section.add_cells(exported_cells, df=pd.DataFrame()) + ts_section.add_cells(exported_cells, df=get_test_df()) # Remove markdown and other cells and get code strings exported_code = [cell["source"] for cell in exported_cells if cell["cell_type"] == "code"] # Define expected code @@ -206,6 +226,8 @@ def test_code_export_verbosity_low_with_subsections(): assert len(exported_code) == 1 assert exported_code[0] == expected_code[0], "Exported code mismatch" + check_section_executes(ts_section, test_df) + def test_code_export_verbosity_low_with_fft_stft(): ts_section = TimeseriesAnalysis( @@ -217,9 +239,10 @@ def test_code_export_verbosity_low_with_fft_stft(): sampling_rate=1, stft_window_size=1, ) + test_df = get_test_df() # Export code exported_cells = [] - ts_section.add_cells(exported_cells, df=pd.DataFrame()) + ts_section.add_cells(exported_cells, df=test_df) # Remove markdown and other cells and get code strings exported_code = [cell["source"] for cell in exported_cells if cell["cell_type"] == "code"] # Define expected code @@ -233,12 +256,15 @@ def test_code_export_verbosity_low_with_fft_stft(): assert len(exported_code) == 1 assert exported_code[0] == expected_code[0], "Exported code mismatch" + check_section_executes(ts_section, test_df) + def test_generated_code_verbosity_medium(): ts_section = TimeseriesAnalysis(verbosity=Verbosity.MEDIUM) + test_df = get_test_df() exported_cells = [] - ts_section.add_cells(exported_cells, df=pd.DataFrame()) + ts_section.add_cells(exported_cells, df=test_df) exported_code = [cell["source"] for cell in exported_cells if cell["cell_type"] == "code"] expected_code = [ @@ -254,13 +280,15 @@ def test_generated_code_verbosity_medium(): assert len(expected_code) == len(exported_code) for expected_line, exported_line in zip(expected_code, exported_code): assert expected_line == exported_line, "Exported code mismatch" + check_section_executes(ts_section, test_df) def test_generated_code_verbosity_high(): + test_df = get_test_df() ts_section = TimeseriesAnalysis(verbosity=Verbosity.HIGH, sampling_rate=1, stft_window_size=1) pairplot_cells = [] - ts_section.add_cells(pairplot_cells, df=pd.DataFrame()) + ts_section.add_cells(pairplot_cells, df=test_df) exported_code = [cell["source"] for cell in pairplot_cells if cell["cell_type"] == "code"] expected_code = [ @@ -323,8 +351,11 @@ def test_generated_code_verbosity_high(): for expected_line, exported_line in zip(expected_code, exported_code): assert expected_line == exported_line, "Exported code mismatch" + check_section_executes(ts_section, test_df) + def test_verbosity_low_different_subsection_verbosities(): + test_df = get_test_df() ts_section = TimeseriesAnalysis( verbosity=Verbosity.LOW, subsections=[ @@ -343,7 +374,7 @@ def test_verbosity_low_different_subsection_verbosities(): ) ts_cells = [] - ts_section.add_cells(ts_cells, df=pd.DataFrame()) + ts_section.add_cells(ts_cells, df=test_df) exported_code = [cell["source"] for cell in ts_cells if cell["cell_type"] == "code"] expected_code = [ @@ -366,10 +397,14 @@ def test_verbosity_low_different_subsection_verbosities(): def test_boxplots_over_time_def(): - def month_func(x: datetime.datetime) -> str: + def month_func(x: datetime) -> str: return str(x.month) - boxplots_sub = BoxplotsOverTime(grouping_name="Month", grouping_function=month_func) + boxplots_sub = BoxplotsOverTime( + grouping_name="Month", + grouping_function=month_func, + grouping_function_imports=["from datetime import datetime"], + ) # Export code exported_cells = [] boxplots_sub.add_cells(exported_cells, df=pd.DataFrame()) @@ -385,6 +420,8 @@ def month_func(x: datetime.datetime) -> str: for expected_line, exported_line in zip(expected_code, exported_code): assert expected_line == exported_line, "Exported code mismatch" + check_section_executes(boxplots_sub, get_test_df()) + def test_boxplots_over_time_lambda(): month_lambda = lambda x: x.month @@ -406,6 +443,8 @@ def test_boxplots_over_time_lambda(): for expected_line, exported_line in zip(expected_code, exported_code): assert expected_line == exported_line, "Exported code mismatch" + check_section_executes(boxplots_sub, get_test_df()) + def test_imports_verbosity_low(): ts_section = TimeseriesAnalysis(verbosity=Verbosity.LOW) diff --git a/tests/test_univariate_analysis_section.py b/tests/test_univariate_analysis_section.py index 76e0ce8..f69a4d9 100644 --- a/tests/test_univariate_analysis_section.py +++ b/tests/test_univariate_analysis_section.py @@ -9,6 +9,7 @@ from edvart.report_sections.code_string_formatting import code_dedent, get_code from edvart.report_sections.section_base import Verbosity +from .execution_utils import check_section_executes from .pyarrow_utils import pyarrow_parameterize @@ -46,6 +47,8 @@ def test_code_export_verbosity_low(pyarrow_dtypes: bool): # Test code equivalence assert exported_code[0] == expected_code[0], "Exported code mismatch" + check_section_executes(univariate_section, test_df) + @pyarrow_parameterize def test_code_export_verbosity_medium(pyarrow_dtypes: bool): @@ -66,6 +69,8 @@ def test_code_export_verbosity_medium(pyarrow_dtypes: bool): for i in range(len(exported_code)): assert exported_code[i] == expected_code[i], "Exported code mismatch" + check_section_executes(univariate_section, test_df) + @pyarrow_parameterize def test_code_export_verbosity_high(pyarrow_dtypes: bool): @@ -117,6 +122,8 @@ def test_code_export_verbosity_high(pyarrow_dtypes: bool): for i in range(len(exported_code)): assert exported_code[i] == expected_code[i], "Exported code mismatch" + check_section_executes(univariate_section, test_df) + @pyarrow_parameterize def test_show(pyarrow_dtypes: bool):