Skip to content

Commit

Permalink
removed prints
Browse files Browse the repository at this point in the history
  • Loading branch information
drahc1R committed Aug 8, 2023
1 parent 188ba9f commit 0cac2bb
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 77 deletions.
2 changes: 0 additions & 2 deletions synthetic_data/generators.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,6 @@ def _generate_uncorrelated_column_data(self, num_samples):

generator_name = col_.get("data_type", None)
generator_func = self.gen_funcs.get(generator_name, None)
print(generator_name)

if not generator_name:
logging.warning(
Expand Down Expand Up @@ -140,7 +139,6 @@ def _generate_uncorrelated_column_data(self, num_samples):
# edge cases for extracting data from profiler report.
if generator_name == "datetime":
col_["format"] = col_["statistics"].get("format", None)
print(col_["format"], "SHOULD NOT BE EMPTY")
col_["min"] = pd.to_datetime(
col_["statistics"].get("min", None), format=col_["format"][0]
)
Expand Down
150 changes: 75 additions & 75 deletions tests/test_generators.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def test_synthesize_correlated_method(self, mock_make_data):


# @mock.patch("generate_uncorrelated_column_data.TabularGenerator", spec=TabularGenerator)
class TestDatasetGenerator(unittest.TestCase):
class TestGenerateUncorrelatedColumnData(unittest.TestCase):
# @staticmethod
# def setup_tabular_generator_mock(mock_generator):
# mock_DataLabeler = mock_generator.return_value
Expand Down Expand Up @@ -276,80 +276,80 @@ def test_generate_uncorrelated_column_data(
else:
self.assertEqual(call_args_list[key], expected_calls[j][key])

# def test_get_ordered_column_integration(self):
# columns_to_gen = [
# {
# "generator": "integer",
# "name": "int",
# "min_value": 4,
# "max_value": 88,
# "order": "ascending",
# },
# {
# "generator": "datetime",
# "name": "dat",
# "date_format_list": ["%Y-%m-%d"],
# "start_date": pd.Timestamp(2001, 12, 22),
# "end_date": pd.Timestamp(2022, 12, 22),
# "order": "ascending",
# },
# {
# "generator": "text",
# "name": "txt",
# "chars": ["0", "1"],
# "str_len_min": 2,
# "str_len_max": 5,
# "order": "ascending",
# },
# {
# "generator": "categorical",
# "name": "cat",
# "categories": ["X", "Y", "Z"],
# "probabilities": [0.1, 0.5, 0.4],
# "order": "ascending",
# },
# {
# "generator": "float",
# "name": "flo",
# "min_value": 3,
# "max_value": 10,
# "sig_figs": 3,
# "order": "ascending",
# },
# ]
# expected_data = [
# np.array([21, 23, 30, 36, 57, 60, 62, 70, 70, 87]),
# np.array(
# [
# "2003-12-27",
# "2005-11-23",
# "2007-03-10",
# "2008-12-17",
# "2011-04-02",
# "2014-07-16",
# "2015-12-26",
# "2016-02-07",
# "2021-10-01",
# "2021-11-24",
# ]
# ),
# np.array(
# ["00", "000", "0001", "01", "0100", "10", "10", "100", "1110", "1111"]
# ),
# np.array(["Y", "Y", "Y", "Y", "Y", "Y", "Z", "Z", "Z", "Z"]),
# np.array(
# [3.035, 3.477, 4.234, 4.812, 4.977, 5.131, 5.379, 5.488, 7.318, 7.4]
# ),
# ]
# expected_df = pd.DataFrame.from_dict(
# dict(zip(["int", "dat", "txt", "cat", "flo"], expected_data))
# )
# actual_df = dataset_generator.generate_dataset(
# self.rng,
# columns_to_generate=columns_to_gen,
# dataset_length=self.dataset_length,
# )
# np.testing.assert_array_equal(actual_df.values, expected_df.values)
def test_get_ordered_column_integration(self):
columns_to_gen = [
{
"generator": "integer",
"name": "int",
"min_value": 4,
"max_value": 88,
"order": "ascending",
},
{
"generator": "datetime",
"name": "dat",
"date_format_list": ["%Y-%m-%d"],
"start_date": pd.Timestamp(2001, 12, 22),
"end_date": pd.Timestamp(2022, 12, 22),
"order": "ascending",
},
{
"generator": "text",
"name": "txt",
"chars": ["0", "1"],
"str_len_min": 2,
"str_len_max": 5,
"order": "ascending",
},
{
"generator": "categorical",
"name": "cat",
"categories": ["X", "Y", "Z"],
"probabilities": [0.1, 0.5, 0.4],
"order": "ascending",
},
{
"generator": "float",
"name": "flo",
"min_value": 3,
"max_value": 10,
"sig_figs": 3,
"order": "ascending",
},
]
expected_data = [
np.array([21, 23, 30, 36, 57, 60, 62, 70, 70, 87]),
np.array(
[
"2003-12-27",
"2005-11-23",
"2007-03-10",
"2008-12-17",
"2011-04-02",
"2014-07-16",
"2015-12-26",
"2016-02-07",
"2021-10-01",
"2021-11-24",
]
),
np.array(
["00", "000", "0001", "01", "0100", "10", "10", "100", "1110", "1111"]
),
np.array(["Y", "Y", "Y", "Y", "Y", "Y", "Z", "Z", "Z", "Z"]),
np.array(
[3.035, 3.477, 4.234, 4.812, 4.977, 5.131, 5.379, 5.488, 7.318, 7.4]
),
]
expected_df = pd.DataFrame.from_dict(
dict(zip(["int", "dat", "txt", "cat", "flo"], expected_data))
)
actual_df = dataset_generator.generate_dataset(
self.rng,
columns_to_generate=columns_to_gen,
dataset_length=self.dataset_length,
)
np.testing.assert_array_equal(actual_df.values, expected_df.values)


# def test_generate_dataset_with_invalid_generator(self):
Expand Down

0 comments on commit 0cac2bb

Please sign in to comment.