diff --git a/tests/execution_utils.py b/tests/execution_utils.py new file mode 100644 index 0000000..b0e251d --- /dev/null +++ b/tests/execution_utils.py @@ -0,0 +1,41 @@ +import base64 +import pickle + +import nbconvert +import nbformat +import pandas as pd + +from edvart.report import ReportBase +from edvart.report_sections.code_string_formatting import code_dedent +from edvart.report_sections.section_base import Section + + +def check_section_executes(section: Section, df: pd.DataFrame) -> None: + nb = nbformat.v4.new_notebook() + section_code_cells = [] + section.add_cells(section_code_cells, df) + + buffer = pickle.dumps(df, fix_imports=False) + buffer_base64 = base64.b85encode(buffer) + + unpickle_df = code_dedent( + f""" + import pickle + import base64 + + data = {buffer_base64} + df = pickle.loads(base64.b85decode(data), fix_imports=False)""" + ) + + all_imports = [ + *ReportBase._DEFAULT_IMPORTS, + *section.required_imports(), + ] + + nb["cells"] = [ + nbformat.v4.new_code_cell("\n".join(all_imports)), + nbformat.v4.new_code_cell(unpickle_df), + *section_code_cells, + ] + preprocessor = nbconvert.preprocessors.ExecutePreprocessor(timeout=60) + preprocessor.preprocess(nb) diff --git a/tests/test_bivariate_analysis.py b/tests/test_bivariate_analysis.py index bb71a5b..223cc7c 100644 --- a/tests/test_bivariate_analysis.py +++ b/tests/test_bivariate_analysis.py @@ -1,6 +1,7 @@ import warnings from contextlib import redirect_stdout +import numpy as np import pandas as pd import pytest @@ -9,6 +10,8 @@ from edvart.report_sections.code_string_formatting import get_code from edvart.report_sections.section_base import Verbosity +from .execution_utils import check_section_executes + def get_test_df() -> pd.DataFrame: test_df = pd.DataFrame(data=[[1.1, "a"], [2.2, "b"], [3.3, "c"]], columns=["A", "B"]) @@ -131,6 +134,7 @@ def test_code_export_verbosity_low(): # Test code equivalence assert len(exported_code) == 1 assert exported_code[0] == expected_code[0], "Exported code mismatch" + check_section_executes(bivariate_section, df=get_test_df()) def test_code_export_verbosity_low_with_subsections(): @@ -155,6 +159,7 @@ def test_code_export_verbosity_low_with_subsections(): # Test code equivalence assert len(exported_code) == 1 assert exported_code[0] == expected_code[0], "Exported code mismatch" + check_section_executes(bivariate_section, df=get_test_df()) def test_generated_code_verbosity_low_columns(): @@ -162,6 +167,18 @@ def test_generated_code_verbosity_low_columns(): columns_x = [f"col_x{i}" for i in range(6)] columns_y = [f"col_y{i}" for i in range(4)] columns_pairs = [(f"first{i}", f"second{i}") for i in range(8)] + columns_all = ( + columns + + columns_x + + columns_y + + [col_pair[0] for col_pair in columns_pairs] + + [col_pair[1] for col_pair in columns_pairs] + ) + test_df = pd.DataFrame( + data=np.random.rand(4, len(columns_all)), + columns=columns_all, + ) + bivariate_section = bivariate_analysis.BivariateAnalysis( columns=columns, columns_x=columns_x, @@ -184,6 +201,8 @@ def test_generated_code_verbosity_low_columns(): assert len(exported_code) == 1 assert exported_code[0] == expected_code[0], "Exported code mismatch" + check_section_executes(bivariate_section, df=test_df) + def test_generated_code_verbosity_medium(): bivariate_section = bivariate_analysis.BivariateAnalysis( @@ -209,6 +228,8 @@ def test_generated_code_verbosity_medium(): for expected_line, exported_line in zip(expected_code, exported_code): assert expected_line == exported_line, "Exported code mismatch" + check_section_executes(bivariate_section, df=get_test_df()) + def test_generated_code_verbosity_medium_columns_x_y(): columns_x = ["a", "b"] @@ -224,6 +245,10 @@ def test_generated_code_verbosity_medium_columns_x_y(): ], color_col="b", ) + test_df = pd.DataFrame( + columns=columns_x + columns_y, + data=np.random.rand(10, 4), + ) exported_cells = [] bivariate_section.add_cells(exported_cells, df=pd.DataFrame()) @@ -239,6 +264,8 @@ def test_generated_code_verbosity_medium_columns_x_y(): for expected_line, exported_line in zip(expected_code, exported_code): assert expected_line == exported_line, "Exported code mismatch" + check_section_executes(bivariate_section, df=test_df) + def test_generated_code_verbosity_medium_columns_pairs(): columns_pairs = [("a", "b"), ("c", "d")] @@ -253,6 +280,10 @@ def test_generated_code_verbosity_medium_columns_pairs(): bivariate_analysis.BivariateAnalysis.BivariateAnalysisSubsection.ContingencyTable, ], ) + test_df = pd.DataFrame( + columns=columns_x_correct + columns_y_correct, + data=np.random.rand(10, 4), + ) exported_cells = [] bivariate_section.add_cells(exported_cells, df=pd.DataFrame()) @@ -268,6 +299,8 @@ def test_generated_code_verbosity_medium_columns_pairs(): for expected_line, exported_line in zip(expected_code, exported_code): assert expected_line == exported_line, "Exported code mismatch" + check_section_executes(bivariate_section, df=test_df) + def test_generated_code_verbosity_high(): bivariate_section = bivariate_analysis.BivariateAnalysis( @@ -307,6 +340,8 @@ def test_generated_code_verbosity_high(): for expected_line, exported_line in zip(expected_code, exported_code): assert expected_line == exported_line, "Exported code mismatch" + check_section_executes(bivariate_section, df=get_test_df()) + def test_verbosity_low_different_subsection_verbosities(): bivariate_section = BivariateAnalysis( @@ -337,6 +372,8 @@ def test_verbosity_low_different_subsection_verbosities(): for expected_line, exported_line in zip(expected_code, exported_code): assert expected_line == exported_line, "Exported code mismatch" + check_section_executes(bivariate_section, df=get_test_df()) + def test_imports_verbosity_low(): bivariate_section = BivariateAnalysis(verbosity=Verbosity.LOW) diff --git a/tests/test_group_analysis.py b/tests/test_group_analysis.py index b0710e3..7b12679 100644 --- a/tests/test_group_analysis.py +++ b/tests/test_group_analysis.py @@ -21,6 +21,8 @@ ) from edvart.report_sections.section_base import Verbosity +from .execution_utils import check_section_executes + # Workaround to prevent multiple browser tabs opening with figures plotly.io.renderers.default = "json" @@ -95,6 +97,8 @@ def test_code_export_verbosity_low(): assert len(exported_code) == 1 assert exported_code[0] == expected_code[0], "Exported code mismatch" + check_section_executes(group_section, df) + def test_code_export_verbosity_medium(): df = get_test_df() @@ -120,6 +124,7 @@ def test_code_export_verbosity_medium(): assert len(expected_code) == len(exported_code) for expected_line, exported_line in zip(expected_code, exported_code): assert expected_line == exported_line, "Exported code mismatch" + check_section_executes(group_section, df) def test_code_export_verbosity_high(): @@ -174,6 +179,7 @@ def test_code_export_verbosity_high(): assert len(expected_code) == len(exported_code) for expected_line, exported_line in zip(expected_code, exported_code): assert expected_line == exported_line, "Exported code mismatch" + check_section_executes(group_section, df) def test_columns_parameter(): diff --git a/tests/test_multivariate_analysis.py b/tests/test_multivariate_analysis.py index 20ba3f2..44b587e 100644 --- a/tests/test_multivariate_analysis.py +++ b/tests/test_multivariate_analysis.py @@ -15,6 +15,8 @@ from edvart.report_sections.section_base import Verbosity from edvart.utils import select_numeric_columns +from .execution_utils import check_section_executes + def get_test_df() -> pd.DataFrame: test_df = pd.DataFrame( @@ -139,6 +141,8 @@ def test_code_export_verbosity_low(): assert len(exported_code) == 1 assert exported_code[0] == expected_code[0], "Exported code mismatch" + check_section_executes(multivariate_section, df) + def test_code_export_verbosity_low_with_subsections(): subsec = multivariate_analysis.MultivariateAnalysis.MultivariateAnalysisSubsection @@ -177,6 +181,8 @@ def test_code_export_verbosity_low_with_subsections(): assert len(exported_code) == 1 assert exported_code[0] == expected_code[0], "Exported code mismatch" + check_section_executes(multivariate_section, df) + def test_code_export_verbosity_medium_all_cols_valid(): all_numeric_df = pd.DataFrame( @@ -203,6 +209,8 @@ def test_code_export_verbosity_medium_all_cols_valid(): for expected_line, exported_line in zip(expected_code, exported_code): assert expected_line == exported_line, "Exported code mismatch" + check_section_executes(multivariate_section, all_numeric_df) + def test_generated_code_verbosity_1(): multivariate_section = multivariate_analysis.MultivariateAnalysis(verbosity=Verbosity.MEDIUM) @@ -239,6 +247,8 @@ def test_generated_code_verbosity_1(): for expected_line, exported_line in zip(expected_code, exported_code): assert expected_line == exported_line, "Exported code mismatch" + check_section_executes(multivariate_section, df) + def test_generated_code_verbosity_2(): df = get_test_df() @@ -300,6 +310,8 @@ def test_generated_code_verbosity_2(): for expected_line, exported_line in zip(expected_code, exported_code): assert expected_line == exported_line, "Exported code mismatch" + check_section_executes(multivariate_section, df) + def test_verbosity_medium_non_categorical_col(): random_array = np.random.randint(low=1, high=40, size=(100, 3)) @@ -320,6 +332,8 @@ def test_verbosity_medium_non_categorical_col(): for expected_line, exported_line in zip(expected_code, exported_code): assert expected_line == exported_line, "Exported code mismatch" + check_section_executes(multivariate_section, random_df) + def test_verbosity_low_different_subsection_verbosities(): subsections = [ @@ -364,6 +378,8 @@ def test_verbosity_low_different_subsection_verbosities(): for expected_line, exported_line in zip(expected_code, exported_code): assert expected_line == exported_line, "Exported code mismatch" + check_section_executes(multivariate_section, df) + def test_imports_verbosity_low(): multivariate_section = MultivariateAnalysis(verbosity=Verbosity.LOW) diff --git a/tests/test_overview_section.py b/tests/test_overview_section.py index 1f92563..b9a8db6 100644 --- a/tests/test_overview_section.py +++ b/tests/test_overview_section.py @@ -20,6 +20,8 @@ from edvart.report_sections.dataset_overview import Overview from edvart.report_sections.section_base import Verbosity +from .execution_utils import check_section_executes + def get_test_df() -> pd.DataFrame: test_df = pd.DataFrame(data=[[1.1, "a"], [2.2, "b"], [3.3, "c"]], columns=["A", "B"]) @@ -136,6 +138,8 @@ def test_code_export_verbosity_low(): # Test code equivalence assert exported_code[0] == expected_code[0], "Exported code mismatch" + check_section_executes(overview_section, df=get_test_df()) + def test_code_export_verbosity_low_with_subsections(): overview_section = Overview( @@ -158,6 +162,8 @@ def test_code_export_verbosity_low_with_subsections(): # Test code equivalence assert exported_code[0] == expected_code[0], "Exported code mismatch" + check_section_executes(overview_section, df=get_test_df()) + def test_code_export_verbosity_medium(): # Construct overview section @@ -192,6 +198,8 @@ def test_code_export_verbosity_medium(): for i in range(len(exported_code)): assert exported_code[i] == expected_code[i], "Exported code mismatch" + check_section_executes(overview_section, df=get_test_df()) + def test_code_export_verbosity_high(): # Construct overview section @@ -270,6 +278,8 @@ def test_code_export_verbosity_high(): for i in range(len(exported_code)): assert exported_code[i] == expected_code[i], "Exported code mismatch" + check_section_executes(overview_section, df=get_test_df()) + def test_verbosity_low_different_subsection_verbosities(): overview_section = Overview( @@ -303,6 +313,8 @@ def test_verbosity_low_different_subsection_verbosities(): for expected_line, exported_line in zip(expected_code, exported_code): assert expected_line == exported_line, "Exported code mismatch" + check_section_executes(overview_section, df=get_test_df()) + def test_imports_verbosity_low(): overview_section = Overview(verbosity=Verbosity.LOW) diff --git a/tests/test_timeseries_analysis.py b/tests/test_timeseries_analysis.py index 3834309..5b275b5 100644 --- a/tests/test_timeseries_analysis.py +++ b/tests/test_timeseries_analysis.py @@ -1,8 +1,10 @@ -import datetime import warnings from contextlib import redirect_stdout +from datetime import datetime +import numpy as np import pandas as pd +import plotly.io as pio import pytest import edvart @@ -11,6 +13,20 @@ from edvart.report_sections.section_base import Verbosity from edvart.report_sections.timeseries_analysis import BoxplotsOverTime, TimeseriesAnalysis +from .execution_utils import check_section_executes + +pio.renderers.default = "json" + + +def get_test_df() -> pd.DataFrame: + n_rows = 20 + columns = ["a", "b", "c"] + return pd.DataFrame( + index=[pd.Timestamp.now() + pd.Timedelta(minutes=i) for i in range(n_rows)], + data=np.random.rand(n_rows, len(columns)), + columns=columns, + ) + def test_default_config_verbosity(): timeseries_section = TimeseriesAnalysis() @@ -169,9 +185,10 @@ def test_ft_no_sampling_rate_error(): def test_code_export_verbosity_low(): ts_section = TimeseriesAnalysis(verbosity=Verbosity.LOW) + test_df = get_test_df() # Export code exported_cells = [] - ts_section.add_cells(exported_cells, df=pd.DataFrame()) + ts_section.add_cells(exported_cells, df=test_df) # Remove markdown and other cells and get code strings exported_code = [cell["source"] for cell in exported_cells if cell["cell_type"] == "code"] # Define expected code @@ -180,6 +197,8 @@ def test_code_export_verbosity_low(): assert len(exported_code) == 1 assert exported_code[0] == expected_code[0], "Exported code mismatch" + check_section_executes(ts_section, test_df) + def test_code_export_verbosity_low_with_subsections(): ts_section = TimeseriesAnalysis( @@ -189,9 +208,10 @@ def test_code_export_verbosity_low_with_subsections(): ], verbosity=Verbosity.LOW, ) + test_df = get_test_df() # Export code exported_cells = [] - ts_section.add_cells(exported_cells, df=pd.DataFrame()) + ts_section.add_cells(exported_cells, df=get_test_df()) # Remove markdown and other cells and get code strings exported_code = [cell["source"] for cell in exported_cells if cell["cell_type"] == "code"] # Define expected code @@ -203,6 +223,7 @@ def test_code_export_verbosity_low_with_subsections(): # Test code equivalence assert len(exported_code) == 1 assert exported_code[0] == expected_code[0], "Exported code mismatch" + check_section_executes(ts_section, test_df) def test_code_export_verbosity_low_with_fft_stft(): @@ -215,9 +236,10 @@ def test_code_export_verbosity_low_with_fft_stft(): sampling_rate=1, stft_window_size=1, ) + test_df = get_test_df() # Export code exported_cells = [] - ts_section.add_cells(exported_cells, df=pd.DataFrame()) + ts_section.add_cells(exported_cells, df=test_df) # Remove markdown and other cells and get code strings exported_code = [cell["source"] for cell in exported_cells if cell["cell_type"] == "code"] # Define expected code @@ -230,13 +252,15 @@ def test_code_export_verbosity_low_with_fft_stft(): # Test code equivalence assert len(exported_code) == 1 assert exported_code[0] == expected_code[0], "Exported code mismatch" + check_section_executes(ts_section, test_df) def test_generated_code_verbosity_medium(): ts_section = TimeseriesAnalysis(verbosity=Verbosity.MEDIUM) + test_df = get_test_df() exported_cells = [] - ts_section.add_cells(exported_cells, df=pd.DataFrame()) + ts_section.add_cells(exported_cells, df=test_df) exported_code = [cell["source"] for cell in exported_cells if cell["cell_type"] == "code"] expected_code = [ @@ -252,13 +276,15 @@ def test_generated_code_verbosity_medium(): assert len(expected_code) == len(exported_code) for expected_line, exported_line in zip(expected_code, exported_code): assert expected_line == exported_line, "Exported code mismatch" + check_section_executes(ts_section, test_df) def test_generated_code_verbosity_high(): + test_df = get_test_df() ts_section = TimeseriesAnalysis(verbosity=Verbosity.HIGH, sampling_rate=1, stft_window_size=1) pairplot_cells = [] - ts_section.add_cells(pairplot_cells, df=pd.DataFrame()) + ts_section.add_cells(pairplot_cells, df=test_df) exported_code = [cell["source"] for cell in pairplot_cells if cell["cell_type"] == "code"] expected_code = [ @@ -320,9 +346,11 @@ def test_generated_code_verbosity_high(): assert len(expected_code) == len(exported_code) for expected_line, exported_line in zip(expected_code, exported_code): assert expected_line == exported_line, "Exported code mismatch" + check_section_executes(ts_section, test_df) def test_verbosity_low_different_subsection_verbosities(): + test_df = get_test_df() ts_section = TimeseriesAnalysis( verbosity=Verbosity.LOW, subsections=[ @@ -341,7 +369,7 @@ def test_verbosity_low_different_subsection_verbosities(): ) ts_cells = [] - ts_section.add_cells(ts_cells, df=pd.DataFrame()) + ts_section.add_cells(ts_cells, df=test_df) exported_code = [cell["source"] for cell in ts_cells if cell["cell_type"] == "code"] expected_code = [ @@ -364,10 +392,14 @@ def test_verbosity_low_different_subsection_verbosities(): def test_boxplots_over_time_def(): - def month_func(x: datetime.datetime) -> str: + def month_func(x: datetime) -> str: return str(x.month) - boxplots_sub = BoxplotsOverTime(grouping_name="Month", grouping_function=month_func) + boxplots_sub = BoxplotsOverTime( + grouping_name="Month", + grouping_function=month_func, + grouping_function_imports=["from datetime import datetime"], + ) # Export code exported_cells = [] boxplots_sub.add_cells(exported_cells, df=pd.DataFrame()) @@ -382,6 +414,7 @@ def month_func(x: datetime.datetime) -> str: assert len(expected_code) == len(exported_code) for expected_line, exported_line in zip(expected_code, exported_code): assert expected_line == exported_line, "Exported code mismatch" + check_section_executes(boxplots_sub, get_test_df()) def test_boxplots_over_time_lambda(): @@ -404,6 +437,8 @@ def test_boxplots_over_time_lambda(): for expected_line, exported_line in zip(expected_code, exported_code): assert expected_line == exported_line, "Exported code mismatch" + check_section_executes(boxplots_sub, get_test_df()) + def test_imports_verbosity_low(): ts_section = TimeseriesAnalysis(verbosity=Verbosity.LOW) diff --git a/tests/test_univariate_analysis_section.py b/tests/test_univariate_analysis_section.py index 3dd0b0f..9969b3f 100644 --- a/tests/test_univariate_analysis_section.py +++ b/tests/test_univariate_analysis_section.py @@ -9,6 +9,8 @@ from edvart.report_sections.code_string_formatting import code_dedent, get_code from edvart.report_sections.section_base import Verbosity +from .execution_utils import check_section_executes + def test_invalid_verbosity(): with pytest.raises(ValueError): @@ -35,6 +37,8 @@ def test_code_export_verbosity_low(): # Test code equivalence assert exported_code[0] == expected_code[0], "Exported code mismatch" + check_section_executes(univariate_section, test_df) + def test_code_export_verbosity_medium(): test_df = pd.DataFrame(data=[[1.9, "a"], [2.1, "b"], [3.3, "c"]], columns=["A", "B"]) @@ -54,6 +58,8 @@ def test_code_export_verbosity_medium(): for i in range(len(exported_code)): assert exported_code[i] == expected_code[i], "Exported code mismatch" + check_section_executes(univariate_section, test_df) + def test_code_export_verbosity_high(): test_df = pd.DataFrame(data=[[1.9, "a"], [2.1, "b"], [3.3, "c"]], columns=["A", "B"]) @@ -104,6 +110,8 @@ def test_code_export_verbosity_high(): for i in range(len(exported_code)): assert exported_code[i] == expected_code[i], "Exported code mismatch" + check_section_executes(univariate_section, test_df) + def test_show(): test_df = pd.DataFrame(data=[[1.9, "a"], [2.1, "b"], [3.3, "c"]], columns=["A", "B"])