From 4ec2bd6f2f4f7f7b82aff50bf2685da972efad79 Mon Sep 17 00:00:00 2001
From: Liz Smith <liz.smith@richmond.edu>
Date: Tue, 13 Jun 2023 15:50:09 -0500
Subject: [PATCH 01/15] New preset implementation and test (#867)

* memory optimization preset

ttrying again

ttrying again 3

ttrying again 4

accidentally pushed my updated makefile

* Wrote catch for invalid presets, wrote test for catch for invalid presets, debugged new optimization preset

* Forgot to run pre-commit, fixed those issues

* black doing weird things

* made preset validation more maintainable by moving it to the constructor and getting rid of preset list
---
 dataprofiler/profilers/profiler_options.py    | 29 +++++++++++++++++--
 .../profiler_options/test_profiler_presets.py | 23 +++++++++++++++
 2 files changed, 50 insertions(+), 2 deletions(-)

diff --git a/dataprofiler/profilers/profiler_options.py b/dataprofiler/profilers/profiler_options.py
index 76b3654e3..711c5f13d 100644
--- a/dataprofiler/profilers/profiler_options.py
+++ b/dataprofiler/profilers/profiler_options.py
@@ -6,6 +6,7 @@
 import copy
 import re
 import warnings
+from typing import Any
 
 from ..labelers.base_data_labeler import BaseDataLabeler
 
@@ -1557,7 +1558,8 @@ def __init__(self, presets: str = None) -> None:
         :ivar unstructured_options: option set for unstructured dataset profiling.
         :vartype unstructured_options: UnstructuredOptions
         :ivar presets: A pre-configured mapping of a string name to group of options:
-            "complete", "data_types", and "numeric_stats_disabled". Default: None
+            "complete", "data_types", "numeric_stats_disabled",
+            and "memory_optimization". Default: None
         :vartype presets: Optional[str]
         """
         self.structured_options = StructuredOptions()
@@ -1570,6 +1572,10 @@ def __init__(self, presets: str = None) -> None:
                 self._data_types_presets()
             elif self.presets == "numeric_stats_disabled":
                 self._numeric_stats_disabled_presets()
+            elif self.presets == "memory_optimization":
+                self._memory_optimization_presets()
+            else:
+                raise ValueError("The preset entered is not a valid preset.")
 
     def _complete_presets(self) -> None:
         self.set({"*.is_enabled": True})
@@ -1583,6 +1589,25 @@ def _numeric_stats_disabled_presets(self) -> None:
         self.set({"*.float.is_numeric_stats_enabled": False})
         self.set({"structured_options.text.is_numeric_stats_enabled": False})
 
+    def _memory_optimization_presets(self) -> None:
+        self.set({"structured_options.row_statistics.is_enabled": False})
+        self.set({"structured_options.multiprocess.is_enabled": False})
+        self.set({"structured_options.data_labeler.is_enabled": False})
+        self.set({"structured_options.datetime.is_enabled": False})
+        self.set({"structured_options.order.is_enabled": False})
+        self.set({"structured_options.chi2_homogeneity.is_enabled": False})
+        self.set({"structured_options.null_replication_metrics.is_enabled": False})
+        self.set({"unstructured_options.data_labeler.is_enabled": False})
+        self.set(
+            {
+                (
+                    "structured_options.category."
+                    "max_sample_size_to_check_stop_condition"
+                ): 5000
+            }
+        )
+        self.set({"structured_options.category.stop_condition_unique_value_ratio": 0.5})
+
     def _validate_helper(self, variable_path: str = "ProfilerOptions") -> list[str]:
         """
         Validate the options do not conflict and cause errors.
@@ -1620,7 +1645,7 @@ def _validate_helper(self, variable_path: str = "ProfilerOptions") -> list[str]:
 
         return errors
 
-    def set(self, options: dict[str, bool]) -> None:
+    def set(self, options: dict[str, Any]) -> None:
         """
         Overwrite BaseOption.set.
 
diff --git a/dataprofiler/tests/profilers/profiler_options/test_profiler_presets.py b/dataprofiler/tests/profilers/profiler_options/test_profiler_presets.py
index 8f8bc0d6b..b1daa0bd3 100644
--- a/dataprofiler/tests/profilers/profiler_options/test_profiler_presets.py
+++ b/dataprofiler/tests/profilers/profiler_options/test_profiler_presets.py
@@ -33,3 +33,26 @@ def test_profiler_preset_numeric_stats_disabled(self, *mocks):
         self.assertFalse(options.structured_options.null_replication_metrics.is_enabled)
         self.assertTrue(options.structured_options.category.is_enabled)
         self.assertTrue(options.structured_options.order.is_enabled)
+
+    def test_profiler_preset_memory_optimization(self, *mocks):
+        options = ProfilerOptions(presets="memory_optimization")
+        self.assertFalse(options.structured_options.row_statistics.is_enabled)
+        self.assertFalse(options.structured_options.multiprocess.is_enabled)
+        self.assertFalse(options.structured_options.data_labeler.is_enabled)
+        self.assertFalse(options.structured_options.datetime.is_enabled)
+        self.assertFalse(options.structured_options.order.is_enabled)
+        self.assertFalse(options.structured_options.chi2_homogeneity.is_enabled)
+        self.assertFalse(options.structured_options.null_replication_metrics.is_enabled)
+        self.assertFalse(options.unstructured_options.data_labeler.is_enabled)
+        self.assertEqual(
+            options.structured_options.category.max_sample_size_to_check_stop_condition,
+            5000,
+        )
+        self.assertEqual(
+            options.structured_options.category.stop_condition_unique_value_ratio, 0.5
+        )
+
+    def test_profiler_preset_failure(self, *mocks):
+        expected_error = "The preset entered is not a valid preset."
+        with self.assertRaisesRegex(ValueError, expected_error):
+            ProfilerOptions(presets="failing_preset")

From 0023ed5823f288d7df4ddb257ee40d190cc5f44c Mon Sep 17 00:00:00 2001
From: Richard Bann <87214439+drahc1R@users.noreply.github.com>
Date: Wed, 14 Jun 2023 13:09:32 -0400
Subject: [PATCH 02/15] RowStatisticsOptions: Add option (#865)

* RowStatisticsOptions: Add null row count

Added null_row_count as an option in RowStatisticsOptions. It toggles the functionality for row_has_null_ratio and row_is_null_ratio in _update_row_statistics.

* Unit test for RowStatisticOptions:

* Black formatting

* RowStatisticsOptions: Add null row count

Added null_row_count as an option in RowStatisticsOptions. It toggles the functionality for row_has_null_ratio and row_is_null_ratio in _update_row_statistics.

* Unit test for RowStatisticOptions:

* Black formatting

* added a unit test for RowStatisticsOptions

* Deleted test cases that were written in the wrong file

* updated testing for null_count toggle in _update_row_statistics

* removed the RowStatisticsOptions from test_profiler_options imports

* add line

* Created toggle option for null_count

* RowStatisticsOptions: Add implementation

* Revert "RowStatisticsOptions: Add implementation"

This reverts commit 2da6a938d78690203d6ced4d1d16edb73138494e.

* RowStatsticsOptions: Create option

* fixed pre-commit error

* Update dataprofiler/profilers/profiler_options.py

Co-authored-by: Taylor Turner <taylorfturner@gmail.com>

* Update dataprofiler/profilers/profiler_options.py

Co-authored-by: Taylor Turner <taylorfturner@gmail.com>

* fixed documentation

---------

Co-authored-by: Taylor Turner <taylorfturner@gmail.com>
---
 dataprofiler/profilers/profiler_options.py    | 19 +++++++++++++++++--
 .../test_row_statistics_options.py            |  2 +-
 2 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/dataprofiler/profilers/profiler_options.py b/dataprofiler/profilers/profiler_options.py
index 711c5f13d..467649aa3 100644
--- a/dataprofiler/profilers/profiler_options.py
+++ b/dataprofiler/profilers/profiler_options.py
@@ -1071,15 +1071,25 @@ def _validate_helper(self, variable_path: str = "UniqueCountOptions") -> list[st
 class RowStatisticsOptions(BooleanOption):
     """For configuring options for row statistics."""
 
-    def __init__(self, is_enabled: bool = True, unique_count: bool = True) -> None:
+    def __init__(
+        self,
+        is_enabled: bool = True,
+        unique_count: bool = True,
+        null_count: bool = True,
+    ) -> None:
         """
         Initialize options for row statistics.
 
         :ivar is_enabled: boolean option to enable/disable.
         :vartype is_enabled: bool
+        :ivar unique_count: boolean option to enable/disable unique_count
+        :vartype unique_count: bool
+        ivar null_count: boolean option to enable/disable null_count
+        :vartype null_count: bool
         """
         BooleanOption.__init__(self, is_enabled=is_enabled)
         self.unique_count = UniqueCountOptions(is_enabled=unique_count)
+        self.null_count = BooleanOption(is_enabled=null_count)
 
     def _validate_helper(
         self, variable_path: str = "RowStatisticsOptions"
@@ -1095,9 +1105,14 @@ def _validate_helper(
         errors = super()._validate_helper(variable_path=variable_path)
         if not isinstance(self.unique_count, UniqueCountOptions):
             errors.append(
-                f"{variable_path}.full_hashing must be an UniqueCountOptions."
+                f"{variable_path}.unique_count must be an UniqueCountOptions."
             )
+
+        if not isinstance(self.null_count, BooleanOption):
+            errors.append(f"{variable_path}.null_count must be an BooleanOption.")
+
         errors += self.unique_count._validate_helper(variable_path + ".unique_counts")
+        errors += self.null_count._validate_helper(variable_path + ".null_count")
         return super()._validate_helper(variable_path)
 
 
diff --git a/dataprofiler/tests/profilers/profiler_options/test_row_statistics_options.py b/dataprofiler/tests/profilers/profiler_options/test_row_statistics_options.py
index b32ef4ee8..687888b1e 100644
--- a/dataprofiler/tests/profilers/profiler_options/test_row_statistics_options.py
+++ b/dataprofiler/tests/profilers/profiler_options/test_row_statistics_options.py
@@ -7,7 +7,7 @@
 class TestRowStatisticsOptions(TestBooleanOption):
 
     option_class = RowStatisticsOptions
-    keys = ["unique_count"]
+    keys = ["unique_count", "null_count"]
 
     def get_options(self, **params):
         options = RowStatisticsOptions()

From 7d9d995d6d6c35dec2ddbbfb4722aff6cf2d8144 Mon Sep 17 00:00:00 2001
From: Liz Smith <liz.smith@richmond.edu>
Date: Thu, 15 Jun 2023 14:51:20 -0500
Subject: [PATCH 03/15] Preset test updated w new names and different toggles
 (#880)

* memory optimization preset

ttrying again

ttrying again 3

ttrying again 4

accidentally pushed my updated makefile

* trying

* trying

* black doing weird things

* trying

* made preset validation more maintainable by moving it to the constructor and getting rid of preset list

* Update to open-source in prep for wrapper changes for mem op preset

* updated preset toggles and preset name (mem op -> large data)

* updated tests to match

* continued name and test and toggle updates

* fix comments
---
 dataprofiler/profilers/profiler_options.py      | 17 +++++------------
 .../profiler_options/test_profiler_presets.py   | 15 +++++----------
 2 files changed, 10 insertions(+), 22 deletions(-)

diff --git a/dataprofiler/profilers/profiler_options.py b/dataprofiler/profilers/profiler_options.py
index 467649aa3..2b1f7b06a 100644
--- a/dataprofiler/profilers/profiler_options.py
+++ b/dataprofiler/profilers/profiler_options.py
@@ -1574,7 +1574,7 @@ def __init__(self, presets: str = None) -> None:
         :vartype unstructured_options: UnstructuredOptions
         :ivar presets: A pre-configured mapping of a string name to group of options:
             "complete", "data_types", "numeric_stats_disabled",
-            and "memory_optimization". Default: None
+            and "lower_memory_sketching". Default: None
         :vartype presets: Optional[str]
         """
         self.structured_options = StructuredOptions()
@@ -1587,8 +1587,8 @@ def __init__(self, presets: str = None) -> None:
                 self._data_types_presets()
             elif self.presets == "numeric_stats_disabled":
                 self._numeric_stats_disabled_presets()
-            elif self.presets == "memory_optimization":
-                self._memory_optimization_presets()
+            elif self.presets == "lower_memory_sketching":
+                self._lower_memory_sketching_presets()
             else:
                 raise ValueError("The preset entered is not a valid preset.")
 
@@ -1604,15 +1604,8 @@ def _numeric_stats_disabled_presets(self) -> None:
         self.set({"*.float.is_numeric_stats_enabled": False})
         self.set({"structured_options.text.is_numeric_stats_enabled": False})
 
-    def _memory_optimization_presets(self) -> None:
-        self.set({"structured_options.row_statistics.is_enabled": False})
-        self.set({"structured_options.multiprocess.is_enabled": False})
-        self.set({"structured_options.data_labeler.is_enabled": False})
-        self.set({"structured_options.datetime.is_enabled": False})
-        self.set({"structured_options.order.is_enabled": False})
-        self.set({"structured_options.chi2_homogeneity.is_enabled": False})
-        self.set({"structured_options.null_replication_metrics.is_enabled": False})
-        self.set({"unstructured_options.data_labeler.is_enabled": False})
+    def _lower_memory_sketching_presets(self) -> None:
+        self.set({"row_statistics.unique_count.hashing_method": "hll"})
         self.set(
             {
                 (
diff --git a/dataprofiler/tests/profilers/profiler_options/test_profiler_presets.py b/dataprofiler/tests/profilers/profiler_options/test_profiler_presets.py
index b1daa0bd3..545d9259c 100644
--- a/dataprofiler/tests/profilers/profiler_options/test_profiler_presets.py
+++ b/dataprofiler/tests/profilers/profiler_options/test_profiler_presets.py
@@ -34,16 +34,11 @@ def test_profiler_preset_numeric_stats_disabled(self, *mocks):
         self.assertTrue(options.structured_options.category.is_enabled)
         self.assertTrue(options.structured_options.order.is_enabled)
 
-    def test_profiler_preset_memory_optimization(self, *mocks):
-        options = ProfilerOptions(presets="memory_optimization")
-        self.assertFalse(options.structured_options.row_statistics.is_enabled)
-        self.assertFalse(options.structured_options.multiprocess.is_enabled)
-        self.assertFalse(options.structured_options.data_labeler.is_enabled)
-        self.assertFalse(options.structured_options.datetime.is_enabled)
-        self.assertFalse(options.structured_options.order.is_enabled)
-        self.assertFalse(options.structured_options.chi2_homogeneity.is_enabled)
-        self.assertFalse(options.structured_options.null_replication_metrics.is_enabled)
-        self.assertFalse(options.unstructured_options.data_labeler.is_enabled)
+    def test_profiler_preset_lower_memory_sketching(self, *mocks):
+        options = ProfilerOptions(presets="lower_memory_sketching")
+        self.assertEqual(
+            options.structured_options.row_statistics.unique_count.hashing_method, "hll"
+        )
         self.assertEqual(
             options.structured_options.category.max_sample_size_to_check_stop_condition,
             5000,

From 661a8d060b1ccfa5fdd0f9931eda317998516c18 Mon Sep 17 00:00:00 2001
From: Richard Bann <87214439+drahc1R@users.noreply.github.com>
Date: Thu, 15 Jun 2023 16:54:05 -0400
Subject: [PATCH 04/15] RowStatisticsOptions: Implementing option (#871)

* Implementing option

* Implementing option

* took out redundant if statement. added test case for when null_count is disabled.

* attempt to check for conflicts between profile merges

* added test to check if two profilers have null_count enabled before merging them together

* fixed typo and added a trycatch to prevent failing test

* No mocks needed. Fixed assertRaisesRegex error

* Changed variables names and added a new test to check for check the null_count when null_count is disabled.

* Changed name of test, moved tests to TestStructuredProfilerRowStatistics. Fixed position of if statement to prevent unnecessary code from running.

* added null_count test cases

* fixed indentation mistake

* fixed typo

* removed a useless commented a line

* Updated test name
---
 dataprofiler/profilers/profile_builder.py     | 100 +++++++++++-------
 .../tests/profilers/test_profile_builder.py   |  69 +++++++++++-
 2 files changed, 125 insertions(+), 44 deletions(-)

diff --git a/dataprofiler/profilers/profile_builder.py b/dataprofiler/profilers/profile_builder.py
index d64eaa175..f3757256b 100644
--- a/dataprofiler/profilers/profile_builder.py
+++ b/dataprofiler/profilers/profile_builder.py
@@ -1582,6 +1582,15 @@ def _add_error_checks(  # type: ignore[override]
                 "Attempting to merge two profiles with unique row "
                 "count option enabled on one profile but not the other."
             )
+        # Check null_count options
+        if (
+            self.options.row_statistics.null_count.is_enabled
+            != other.options.row_statistics.null_count.is_enabled
+        ):
+            raise ValueError(
+                "Attempting to merge two profiles with null row "
+                "count option enabled on one profile but not the other."
+            )
         # Check hashing_method options
         if (
             self.options.row_statistics.unique_count.hashing_method
@@ -1967,7 +1976,10 @@ def _get_unique_row_ratio(self) -> float | None:
 
     def _get_row_is_null_ratio(self) -> float | None:
         """Return whether row is null ratio."""
-        if not self.options.row_statistics.is_enabled:
+        if (
+            not self.options.row_statistics.is_enabled
+            or not self.options.row_statistics.null_count.is_enabled
+        ):
             return None
 
         if self._min_col_samples_used:
@@ -1976,7 +1988,10 @@ def _get_row_is_null_ratio(self) -> float | None:
 
     def _get_row_has_null_ratio(self) -> float | None:
         """Return whether row has null ratio."""
-        if not self.options.row_statistics.is_enabled:
+        if (
+            not self.options.row_statistics.is_enabled
+            or not self.options.row_statistics.null_count.is_enabled
+        ):
             return None
 
         if self._min_col_samples_used:
@@ -2051,48 +2066,51 @@ def _update_row_statistics(
                         self.hashed_row_object.add(record)
 
         # Calculate Null Column Count
-        null_rows = set()
-        null_in_row_count = set()
-        first_col_flag = True
-        for column in self._profile:
-            null_type_dict = column.null_types_index
-            null_row_indices = set()
-            if null_type_dict:
-                null_row_indices = set.union(*null_type_dict.values())
-
-            # If sample ids provided, only consider nulls in rows that
-            # were fully sampled
-            if sample_ids is not None:
-                # This is the amount (integer) indices were shifted by in the
-                # event of overlap
-                shift = column._index_shift
-                if shift is None:
-                    # Shift is None if index is str or if no overlap detected
-                    null_row_indices = null_row_indices.intersection(
-                        data.index[sample_ids[: self._min_sampled_from_batch]]
-                    )
+        if self.options.row_statistics.null_count.is_enabled:
+            null_rows = set()
+            null_in_row_count = set()
+            first_col_flag = True
+            for column in self._profile:
+                null_type_dict = column.null_types_index
+                null_row_indices = set()
+                if null_type_dict:
+                    null_row_indices = set.union(*null_type_dict.values())
+
+                # If sample ids provided, only consider nulls in rows that
+                # were fully sampled
+                if sample_ids is not None:
+                    # This is the amount (integer) indices were shifted by in the
+                    # event of overlap
+                    shift = column._index_shift
+                    if shift is None:
+                        # Shift is None if index is str or if no overlap detected
+                        null_row_indices = null_row_indices.intersection(
+                            data.index[sample_ids[: self._min_sampled_from_batch]]
+                        )
+                    else:
+                        # Only shift if index shift detected (must be ints)
+                        null_row_indices = null_row_indices.intersection(
+                            data.index[sample_ids[: self._min_sampled_from_batch]]
+                            + shift
+                        )
+
+                # Find the common null indices between the columns
+                if first_col_flag:
+                    null_rows = null_row_indices
+                    null_in_row_count = null_row_indices
+                    first_col_flag = False
                 else:
-                    # Only shift if index shift detected (must be ints)
-                    null_row_indices = null_row_indices.intersection(
-                        data.index[sample_ids[: self._min_sampled_from_batch]] + shift
-                    )
+                    null_rows = null_rows.intersection(null_row_indices)
+                    null_in_row_count = null_in_row_count.union(null_row_indices)
 
-            # Find the common null indices between the columns
-            if first_col_flag:
-                null_rows = null_row_indices
-                null_in_row_count = null_row_indices
-                first_col_flag = False
+            # If sample_ids provided,
+            # increment since that means only new data read
+            if sample_ids is not None:
+                self.row_has_null_count += len(null_in_row_count)
+                self.row_is_null_count += len(null_rows)
             else:
-                null_rows = null_rows.intersection(null_row_indices)
-                null_in_row_count = null_in_row_count.union(null_row_indices)
-
-        # If sample_ids provided, increment since that means only new data read
-        if sample_ids is not None:
-            self.row_has_null_count += len(null_in_row_count)
-            self.row_is_null_count += len(null_rows)
-        else:
-            self.row_has_null_count = len(null_in_row_count)
-            self.row_is_null_count = len(null_rows)
+                self.row_has_null_count = len(null_in_row_count)
+                self.row_is_null_count = len(null_rows)
 
     def _get_correlation(
         self, clean_samples: dict, batch_properties: dict
diff --git a/dataprofiler/tests/profilers/test_profile_builder.py b/dataprofiler/tests/profilers/test_profile_builder.py
index 3a5505192..d7dbd2a10 100644
--- a/dataprofiler/tests/profilers/test_profile_builder.py
+++ b/dataprofiler/tests/profilers/test_profile_builder.py
@@ -233,6 +233,8 @@ def test_add_profilers(self, *mocks):
         self.assertEqual(
             "<class 'pandas.core.frame.DataFrame'>", merged_profile.file_type
         )
+        self.assertTrue(merged_profile.options.row_statistics.null_count.is_enabled)
+        self.assertTrue(merged_profile.options.row_statistics.unique_count.is_enabled)
         self.assertEqual(2, merged_profile.row_has_null_count)
         self.assertEqual(2, merged_profile.row_is_null_count)
         self.assertEqual(7, merged_profile.total_samples)
@@ -3555,6 +3557,57 @@ def setUpClass(cls):
                 cls.data, len(cls.data), options=profiler_options_full
             )
 
+    def test_adding_profiles_of_mismatched_null_count_options(self):
+        profiler_options_null_count = ProfilerOptions()
+        profiler_options_null_count.set(
+            {
+                "*.is_enabled": False,
+                "row_statistics.*.is_enabled": True,
+                "row_statistics.null_count.is_enabled": True,
+            }
+        )
+        profiler_options_null_disabled = ProfilerOptions()
+        profiler_options_null_disabled.set(
+            {
+                "*.is_enabled": False,
+                "row_statistics.*.is_enabled": True,
+                "row_statistics.null_count.is_enabled": False,
+            }
+        )
+        data = pd.DataFrame([1, None, 3, 4, 5, None, 1])
+        with test_utils.mock_timeit():
+            profiler_w_null_count = dp.StructuredProfiler(
+                data[:2], options=profiler_options_null_count
+            )
+            profiler_w_disabled_null_count = dp.StructuredProfiler(
+                data[2:], options=profiler_options_null_disabled
+            )
+
+        with self.assertRaisesRegex(
+            ValueError,
+            "Attempting to merge two profiles with null row "
+            "count option enabled on one profile but not the other.",
+        ):
+            profiler_w_null_count + profiler_w_disabled_null_count
+
+    def test_profile_null_count_not_enabled(self):
+        profiler_options_null_disabled = ProfilerOptions()
+        profiler_options_null_disabled.set(
+            {
+                "*.is_enabled": False,
+                "row_statistics.*.is_enabled": True,
+                "row_statistics.null_count.is_enabled": False,
+            }
+        )
+        data = pd.DataFrame([1, None, 3, 4, 5, None, 1])
+        with test_utils.mock_timeit():
+            profiler_w_disabled_null_count = dp.StructuredProfiler(
+                data[2:], options=profiler_options_null_disabled
+            )
+
+        self.assertEqual(0, profiler_w_disabled_null_count.row_has_null_count)
+        self.assertEqual(0, profiler_w_disabled_null_count.row_is_null_count)
+
     def test_correct_rows_ingested(self):
         test_dict = {
             "1": ["nan", "null", None, None, ""],
@@ -3618,9 +3671,7 @@ def test_correct_null_row_counts(self):
     def test_row_is_null_ratio_row_stats_disabled(self):
         profiler_options_1 = ProfilerOptions()
         profiler_options_1.set(
-            {
-                "*.is_enabled": False,
-            }
+            {"*.is_enabled": False, "row_statistics.null_count.is_enabled": False}
         )
         profiler = StructuredProfiler(pd.DataFrame([]), options=profiler_options_1)
         self.assertIsNone(profiler._get_row_is_null_ratio())
@@ -4085,6 +4136,18 @@ def test_unique_row_ratio_empty_profiler(self):
         profiler = StructuredProfiler(pd.DataFrame([]), options=profiler_options)
         self.assertEqual(0, profiler._get_unique_row_ratio())
 
+    def test_null_count_empty_profiler(self):
+        profiler_options = ProfilerOptions()
+        profiler_options.set(
+            {
+                "*.is_enabled": False,
+                "row_statistics.null_count.is_enabled": False,
+            }
+        )
+        profiler = StructuredProfiler(pd.DataFrame([]), options=profiler_options)
+        self.assertIsNone(profiler._get_row_is_null_ratio())
+        self.assertIsNone(profiler._get_row_has_null_ratio())
+
     def test_correct_duplicate_row_count_full_row_hashing(self):
         self.assertEqual(15, len(self.trained_schema_full.hashed_row_object))
         self.assertEqual(20, self.trained_schema_full.total_samples)

From 4d2b6add147f8f07e97d5a3aaa7610aad8b948b3 Mon Sep 17 00:00:00 2001
From: clee1152 <chrislee011502@gmail.com>
Date: Tue, 27 Jun 2023 09:50:33 -0400
Subject: [PATCH 05/15] Implemented options for `num_quantiles` feature in
 `NumericStatsMixin`

---
 dataprofiler/profilers/numerical_column_stats.py |  6 ++++--
 dataprofiler/profilers/profiler_options.py       | 14 ++++++++++++++
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/dataprofiler/profilers/numerical_column_stats.py b/dataprofiler/profilers/numerical_column_stats.py
index ae058464e..abb29c844 100644
--- a/dataprofiler/profilers/numerical_column_stats.py
+++ b/dataprofiler/profilers/numerical_column_stats.py
@@ -82,9 +82,11 @@ def __init__(self, options: NumericalOptions = None) -> None:
         self._mode_is_enabled: bool = True
         self.num_zeros: int = 0
         self.num_negatives: int = 0
+        self.num_quantiles: int = 1000 # By default, we use 1000 quantiles
         if options:
             self.bias_correction = options.bias_correction.is_enabled
             self._top_k_modes = options.mode.top_k_modes
+            self.num_quantiles = options.num_quantiles
             self._median_is_enabled = options.median.is_enabled
             self._median_abs_dev_is_enabled = options.median_abs_deviation.is_enabled
             self._mode_is_enabled = options.mode.is_enabled
@@ -111,9 +113,8 @@ def __init__(self, options: NumericalOptions = None) -> None:
                 "suggested_bin_count": self.min_histogram_bin,
                 "histogram": {"bin_counts": None, "bin_edges": None},
             }
-        num_quantiles: int = 1000  # TODO: add to options
         self.quantiles: list[float] | dict = {
-            bin_num: None for bin_num in range(num_quantiles - 1)
+            bin_num: None for bin_num in range(self.num_quantiles - 1)
         }
         self.__calculations = {
             "min": NumericStatsMixin._get_min,
@@ -383,6 +384,7 @@ def profile(self) -> dict:
             skewness=self.np_type_to_type(self.skewness),
             kurtosis=self.np_type_to_type(self.kurtosis),
             histogram=self._get_best_histogram_for_profile(),
+            num_quantiles=self.num_quantiles,
             quantiles=self.quantiles,
             median_abs_deviation=self.np_type_to_type(self.median_abs_deviation),
             num_zeros=self.np_type_to_type(self.num_zeros),
diff --git a/dataprofiler/profilers/profiler_options.py b/dataprofiler/profilers/profiler_options.py
index 2b1f7b06a..cecbf2484 100644
--- a/dataprofiler/profilers/profiler_options.py
+++ b/dataprofiler/profilers/profiler_options.py
@@ -352,6 +352,8 @@ def __init__(self) -> None:
         :vartype num_zeros: BooleanOption
         :ivar num_negatives: boolean option to enable/disable num_negatives
         :vartype num_negatives: BooleanOption
+        :ivar num_quantiles: boolean option to enable/disable num_quantiles
+        :vartype num_quantiles: BooleanOption
         :ivar is_numeric_stats_enabled: boolean to enable/disable all numeric
             stats
         :vartype is_numeric_stats_enabled: bool
@@ -367,6 +369,7 @@ def __init__(self) -> None:
         self.median_abs_deviation = BooleanOption(is_enabled=True)
         self.num_zeros = BooleanOption(is_enabled=True)
         self.num_negatives = BooleanOption(is_enabled=True)
+        self.num_quantiles = BooleanOption(is_enabled=True)
         self.histogram_and_quantiles = HistogramOption()
         # By default, we correct for bias
         self.bias_correction = BooleanOption(is_enabled=True)
@@ -396,6 +399,7 @@ def is_numeric_stats_enabled(self) -> bool:
             or self.histogram_and_quantiles.is_enabled
             or self.num_zeros.is_enabled
             or self.num_negatives.is_enabled
+            or self.num_quantiles.is_enabled
         ):
             return True
         return False
@@ -425,6 +429,7 @@ def is_numeric_stats_enabled(self, value: bool) -> None:
         self.num_zeros.is_enabled = value
         self.num_negatives.is_enabled = value
         self.histogram_and_quantiles.is_enabled = value
+        self.num_quantiles.is_enabled = value
 
     @property
     def properties(self) -> dict[str, BooleanOption]:
@@ -464,6 +469,7 @@ def _validate_helper(self, variable_path: str = "NumericalOptions") -> list[str]
             "bias_correction",
             "num_zeros",
             "num_negatives",
+            "num_quantiles",
         ]:
             if not isinstance(self.properties[item], BooleanOption):
                 errors.append(f"{variable_path}.{item} must be a BooleanOption.")
@@ -569,6 +575,8 @@ def __init__(self) -> None:
         :vartype num_zeros: BooleanOption
         :ivar num_negatives: boolean option to enable/disable num_negatives
         :vartype num_negatives: BooleanOption
+        :ivar num_quantiles: boolean option to enable/disable num_quantiles
+        :vartype num_quantiles: BooleanOption
         :ivar is_numeric_stats_enabled: boolean to enable/disable all numeric
             stats
         :vartype is_numeric_stats_enabled: bool
@@ -666,6 +674,8 @@ def __init__(self) -> None:
         :vartype num_zeros: BooleanOption
         :ivar num_negatives: boolean option to enable/disable num_negatives
         :vartype num_negatives: BooleanOption
+        :ivar num_quantiles: boolean option to enable/disable num_quantiles
+        :vartype num_quantiles: BooleanOption
         :ivar is_numeric_stats_enabled: boolean to enable/disable all numeric
             stats
         :vartype is_numeric_stats_enabled: bool
@@ -725,6 +735,8 @@ def __init__(self) -> None:
         :vartype num_zeros: BooleanOption
         :ivar num_negatives: boolean option to enable/disable num_negatives
         :vartype num_negatives: BooleanOption
+        :ivar num_quantiles: boolean option to enable/disable num_quantiles
+        :vartype num_quantiles: BooleanOption
         :ivar is_numeric_stats_enabled: boolean to enable/disable all numeric
             stats
         :vartype is_numeric_stats_enabled: bool
@@ -788,6 +800,7 @@ def is_numeric_stats_enabled(self) -> bool:
             or self.median.is_enabled
             or self.median_abs_deviation.is_enabled
             or self.histogram_and_quantiles.is_enabled
+            or self.num_quantiles.is_enabled
         ):
             return True
         return False
@@ -814,6 +827,7 @@ def is_numeric_stats_enabled(self, value: bool) -> None:
         self.kurtosis.is_enabled = value
         self.median_abs_deviation.is_enabled = value
         self.histogram_and_quantiles.is_enabled = value
+        self.num_quantiles.is_enabled = value
 
 
 class DateTimeOptions(BaseInspectorOptions):

From 47903d227b3789985139ffcdfbdba8a6ccdb31b4 Mon Sep 17 00:00:00 2001
From: clee1152 <chrislee011502@gmail.com>
Date: Tue, 27 Jun 2023 09:56:44 -0400
Subject: [PATCH 06/15] Implemented options for `num_quantiles.py`.

---
 .../profilers/numerical_column_stats.py       |  4 +-
 dataprofiler/profilers/profiler_options.py    | 50 +++++++++++++++++--
 2 files changed, 47 insertions(+), 7 deletions(-)

diff --git a/dataprofiler/profilers/numerical_column_stats.py b/dataprofiler/profilers/numerical_column_stats.py
index abb29c844..627dc1fb0 100644
--- a/dataprofiler/profilers/numerical_column_stats.py
+++ b/dataprofiler/profilers/numerical_column_stats.py
@@ -82,11 +82,11 @@ def __init__(self, options: NumericalOptions = None) -> None:
         self._mode_is_enabled: bool = True
         self.num_zeros: int = 0
         self.num_negatives: int = 0
-        self.num_quantiles: int = 1000 # By default, we use 1000 quantiles
+        self.num_quantiles: int = 1000  # By default, we use 1000 quantiles
         if options:
             self.bias_correction = options.bias_correction.is_enabled
             self._top_k_modes = options.mode.top_k_modes
-            self.num_quantiles = options.num_quantiles
+            self.num_quantiles = options.num_quantiles.num_quantiles
             self._median_is_enabled = options.median.is_enabled
             self._median_abs_dev_is_enabled = options.median_abs_deviation.is_enabled
             self._mode_is_enabled = options.mode.is_enabled
diff --git a/dataprofiler/profilers/profiler_options.py b/dataprofiler/profilers/profiler_options.py
index cecbf2484..09d9f6736 100644
--- a/dataprofiler/profilers/profiler_options.py
+++ b/dataprofiler/profilers/profiler_options.py
@@ -269,6 +269,42 @@ def _validate_helper(self, variable_path: str = "ModeOption") -> list[str]:
         return errors
 
 
+class NumQuantilesOption(BooleanOption):
+    """For setting number of quantile options."""
+
+    def __init__(self, is_enabled: bool = True, num_quantiles: int = 1000) -> None:
+        """
+        Initialize options for number of quantiles.
+
+        :ivar is_enabled: boolean option to enable/disable the option.
+        :vartype is_enabled: bool
+        :ivar num_quantiles: the number of quantiles to bin the data.
+        :vartype num_quantiles: int
+        """
+        self.num_quantiles = num_quantiles
+        super().__init__(is_enabled=is_enabled)
+
+    def _validate_helper(self, variable_path: str = "NumQuantilesOption") -> list[str]:
+        """
+        Validate the options do not conflict and cause errors.
+
+        :param variable_path: current path to variable set.
+        :type variable_path: str
+        :return: list of errors (if raise_error is false)
+        :rtype: list(str)
+        """
+        errors = super()._validate_helper(variable_path=variable_path)
+
+        if self.num_quantiles is not None and (
+            not isinstance(self.num_quantiles, int) or self.num_quantiles < 1
+        ):
+            errors.append(
+                "{}.num_quantiles must be either None"
+                " or a positive integer".format(variable_path)
+            )
+        return errors
+
+
 class BaseInspectorOptions(BooleanOption):
     """For setting Base options."""
 
@@ -353,7 +389,8 @@ def __init__(self) -> None:
         :ivar num_negatives: boolean option to enable/disable num_negatives
         :vartype num_negatives: BooleanOption
         :ivar num_quantiles: boolean option to enable/disable num_quantiles
-        :vartype num_quantiles: BooleanOption
+            and set the number of quantiles
+        :vartype num_quantiles: NumQuantilesOption
         :ivar is_numeric_stats_enabled: boolean to enable/disable all numeric
             stats
         :vartype is_numeric_stats_enabled: bool
@@ -369,7 +406,7 @@ def __init__(self) -> None:
         self.median_abs_deviation = BooleanOption(is_enabled=True)
         self.num_zeros = BooleanOption(is_enabled=True)
         self.num_negatives = BooleanOption(is_enabled=True)
-        self.num_quantiles = BooleanOption(is_enabled=True)
+        self.num_quantiles = NumQuantilesOption(is_enabled=True)
         self.histogram_and_quantiles = HistogramOption()
         # By default, we correct for bias
         self.bias_correction = BooleanOption(is_enabled=True)
@@ -576,7 +613,8 @@ def __init__(self) -> None:
         :ivar num_negatives: boolean option to enable/disable num_negatives
         :vartype num_negatives: BooleanOption
         :ivar num_quantiles: boolean option to enable/disable num_quantiles
-        :vartype num_quantiles: BooleanOption
+            and set the number of quantiles
+        :vartype num_quantiles: NumQuantilesOption
         :ivar is_numeric_stats_enabled: boolean to enable/disable all numeric
             stats
         :vartype is_numeric_stats_enabled: bool
@@ -675,7 +713,8 @@ def __init__(self) -> None:
         :ivar num_negatives: boolean option to enable/disable num_negatives
         :vartype num_negatives: BooleanOption
         :ivar num_quantiles: boolean option to enable/disable num_quantiles
-        :vartype num_quantiles: BooleanOption
+            and set the number of quantiles
+        :vartype num_quantiles: NumQuantilesOption
         :ivar is_numeric_stats_enabled: boolean to enable/disable all numeric
             stats
         :vartype is_numeric_stats_enabled: bool
@@ -736,7 +775,8 @@ def __init__(self) -> None:
         :ivar num_negatives: boolean option to enable/disable num_negatives
         :vartype num_negatives: BooleanOption
         :ivar num_quantiles: boolean option to enable/disable num_quantiles
-        :vartype num_quantiles: BooleanOption
+            and set the number of quantiles
+        :vartype num_quantiles: NumQuantilesOption
         :ivar is_numeric_stats_enabled: boolean to enable/disable all numeric
             stats
         :vartype is_numeric_stats_enabled: bool

From 4045c58440ef51ff3917ce15faeef7a20a146a67 Mon Sep 17 00:00:00 2001
From: clee1152 <chrislee011502@gmail.com>
Date: Tue, 27 Jun 2023 09:57:39 -0400
Subject: [PATCH 07/15] Added tests for `num_quantiles`.

---
 .../test_numerical_options.py                 | 20 +++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/dataprofiler/tests/profilers/profiler_options/test_numerical_options.py b/dataprofiler/tests/profilers/profiler_options/test_numerical_options.py
index 63ed6f04f..56664dc53 100644
--- a/dataprofiler/tests/profilers/profiler_options/test_numerical_options.py
+++ b/dataprofiler/tests/profilers/profiler_options/test_numerical_options.py
@@ -20,6 +20,7 @@ class TestNumericalOptions(TestBaseInspectorOptions):
         "median_abs_deviation",
         "num_zeros",
         "num_negatives",
+        "num_quantiles",
         "histogram_and_quantiles",
     ]
     numeric_keys = [
@@ -35,6 +36,7 @@ class TestNumericalOptions(TestBaseInspectorOptions):
         "histogram_and_quantiles",
         "num_zeros",
         "num_negatives",
+        "num_quantiles",
     ]
 
     def test_init(self):
@@ -123,6 +125,24 @@ def test_validate_helper(self):
         self.assertEqual([mode_error], options._validate_helper())
         options.set({"mode.top_k_modes": 5})
 
+        # Zero num_quantiles
+        options.set(
+            {"num_quantiles.is_enabled": True, "num_quantiles.num_quantiles": 0}
+        )
+        num_quantiles_error = (
+            "{}.num_quantiles.num_quantiles must be either None"
+            " or a positive integer".format(optpth)
+        )
+        self.assertEqual([num_quantiles_error], options._validate_helper())
+        # Negative num_quantiles
+        options.set({"num_quantiles.num_quantiles": -5})
+        num_quantiles_error = (
+            "{}.num_quantiles.num_quantiles must be either None"
+            " or a positive integer".format(optpth)
+        )
+        self.assertEqual([num_quantiles_error], options._validate_helper())
+        options.set({"num_quantiles.num_quantiles": 1000})
+
         # Disable Sum and Enable Variance
         options.set(
             {

From ea603d2c01bc59d240bb8c0b5c629c35f3f89a5c Mon Sep 17 00:00:00 2001
From: clee1152 <chrislee011502@gmail.com>
Date: Tue, 27 Jun 2023 16:48:59 -0400
Subject: [PATCH 08/15] Fixed tests for num_quantiles option in column
 profiles.

---
 dataprofiler/profilers/numerical_column_stats.py               | 1 -
 dataprofiler/tests/profilers/test_float_column_profile.py      | 1 +
 dataprofiler/tests/profilers/test_int_column_profile.py        | 3 +++
 .../tests/profilers/test_numeric_stats_mixin_profile.py        | 2 ++
 4 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/dataprofiler/profilers/numerical_column_stats.py b/dataprofiler/profilers/numerical_column_stats.py
index 627dc1fb0..a87447cf4 100644
--- a/dataprofiler/profilers/numerical_column_stats.py
+++ b/dataprofiler/profilers/numerical_column_stats.py
@@ -384,7 +384,6 @@ def profile(self) -> dict:
             skewness=self.np_type_to_type(self.skewness),
             kurtosis=self.np_type_to_type(self.kurtosis),
             histogram=self._get_best_histogram_for_profile(),
-            num_quantiles=self.num_quantiles,
             quantiles=self.quantiles,
             median_abs_deviation=self.np_type_to_type(self.median_abs_deviation),
             num_zeros=self.np_type_to_type(self.num_zeros),
diff --git a/dataprofiler/tests/profilers/test_float_column_profile.py b/dataprofiler/tests/profilers/test_float_column_profile.py
index 9c7db90ea..f2dd7b23e 100644
--- a/dataprofiler/tests/profilers/test_float_column_profile.py
+++ b/dataprofiler/tests/profilers/test_float_column_profile.py
@@ -32,6 +32,7 @@ def test_base_case(self):
         self.assertTrue(profiler.kurtosis is np.nan)
         self.assertTrue(profiler.stddev is np.nan)
         self.assertIsNone(profiler.histogram_selection)
+        self.assertEqual(profiler.num_quantiles, 1000)
         self.assertEqual(len(profiler.quantiles), 999)
         self.assertIsNone(profiler.data_type_ratio)
 
diff --git a/dataprofiler/tests/profilers/test_int_column_profile.py b/dataprofiler/tests/profilers/test_int_column_profile.py
index 50ca859cf..5a2a77540 100644
--- a/dataprofiler/tests/profilers/test_int_column_profile.py
+++ b/dataprofiler/tests/profilers/test_int_column_profile.py
@@ -1146,6 +1146,7 @@ def test_json_encode(self):
                     "_mode_is_enabled": True,
                     "num_zeros": 0,
                     "num_negatives": 0,
+                    "num_quantiles": 1000,
                     "histogram_methods": expected_historam_methods,
                     "_stored_histogram": {
                         "total_loss": 0,
@@ -1187,6 +1188,7 @@ def test_json_encode_after_update(self, time):
 
         int_options = IntOptions()
         int_options.histogram_and_quantiles.bin_count_or_method = 5
+        int_options.num_quantiles.num_quantiles = 4
         profiler = IntColumn("0", int_options)
 
         mocked_quantiles = [0.25, 0.50, 0.75]
@@ -1221,6 +1223,7 @@ def test_json_encode_after_update(self, time):
                     "_mode_is_enabled": True,
                     "num_zeros": 1,
                     "num_negatives": 0,
+                    "num_quantiles": 4,
                     "histogram_selection": None,
                     "user_set_histogram_bin": 5,
                     "bias_correction": True,
diff --git a/dataprofiler/tests/profilers/test_numeric_stats_mixin_profile.py b/dataprofiler/tests/profilers/test_numeric_stats_mixin_profile.py
index 6e4299db6..70214b20b 100644
--- a/dataprofiler/tests/profilers/test_numeric_stats_mixin_profile.py
+++ b/dataprofiler/tests/profilers/test_numeric_stats_mixin_profile.py
@@ -720,6 +720,7 @@ def test_profile(self):
         num_profiler.histogram_selection = "auto"
         num_profiler.histogram_methods["auto"]["histogram"] = mock_profile["histogram"]
         num_profiler.quantiles = mock_profile["quantiles"]
+        num_profiler.num_quantiles = 4
         num_profiler.times = mock_profile["times"]
 
         time_array = [float(i) for i in range(100, 0, -1)]
@@ -1195,6 +1196,7 @@ def test_json_encode(self):
                     "_mode_is_enabled": True,
                     "num_zeros": 0,
                     "num_negatives": 0,
+                    "num_quantiles": 1000,
                     "histogram_methods": expected_historam_methods,
                     "_stored_histogram": {
                         "total_loss": 0,

From 1c44351439cf8afe0868adbc3032a3427aae4a21 Mon Sep 17 00:00:00 2001
From: clee1152 <chrislee011502@gmail.com>
Date: Tue, 27 Jun 2023 21:18:04 -0400
Subject: [PATCH 09/15] Added scipy requirements to requirements.txt

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index b4d420d4f..7c63ff126 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -10,7 +10,7 @@ fastavro>=1.0.0.post1
 python-snappy>=0.5.4
 charset-normalizer>=1.3.6
 psutil>=4.0.0
-scipy>=1.4.1
+scipy>=1.4.1,<1.11.0
 requests>=2.28.1
 networkx>=2.5.1
 typing-extensions>=3.10.0.2

From de8104c443ea5d1fcfcbcd619be4b5f8f5a045a4 Mon Sep 17 00:00:00 2001
From: clee1152 <chrislee011502@gmail.com>
Date: Wed, 28 Jun 2023 09:50:25 -0400
Subject: [PATCH 10/15] Added tests for num_quantiles option.

---
 dataprofiler/profilers/profiler_options.py    |   3 +-
 .../test_num_quantiles_option.py              | 142 ++++++++++++++++++
 .../test_numerical_options.py                 |  18 ---
 3 files changed, 143 insertions(+), 20 deletions(-)
 create mode 100644 dataprofiler/tests/profilers/profiler_options/test_num_quantiles_option.py

diff --git a/dataprofiler/profilers/profiler_options.py b/dataprofiler/profilers/profiler_options.py
index 09d9f6736..3ac96f9f4 100644
--- a/dataprofiler/profilers/profiler_options.py
+++ b/dataprofiler/profilers/profiler_options.py
@@ -299,8 +299,7 @@ def _validate_helper(self, variable_path: str = "NumQuantilesOption") -> list[st
             not isinstance(self.num_quantiles, int) or self.num_quantiles < 1
         ):
             errors.append(
-                "{}.num_quantiles must be either None"
-                " or a positive integer".format(variable_path)
+                "{}.num_quantiles must be " "a positive integer.".format(variable_path)
             )
         return errors
 
diff --git a/dataprofiler/tests/profilers/profiler_options/test_num_quantiles_option.py b/dataprofiler/tests/profilers/profiler_options/test_num_quantiles_option.py
new file mode 100644
index 000000000..6ad5ec327
--- /dev/null
+++ b/dataprofiler/tests/profilers/profiler_options/test_num_quantiles_option.py
@@ -0,0 +1,142 @@
+from dataprofiler.profilers.profiler_options import NumQuantilesOption
+from dataprofiler.tests.profilers.profiler_options.test_boolean_option import (
+    TestBooleanOption,
+)
+
+
+class TestNumQuantilesOption(TestBooleanOption):
+
+    option_class = NumQuantilesOption
+    keys = []
+
+    def test_init(self):
+        option = self.get_options()
+
+        self.assertDictEqual(
+            {"is_enabled": True, "num_quantiles": 1000}, option.properties
+        )
+        option = self.get_options(is_enabled=False)
+        self.assertDictEqual(
+            {"is_enabled": False, "num_quantiles": 1000}, option.properties
+        )
+        option = self.get_options(is_enabled=False, num_quantiles=50)
+        self.assertDictEqual(
+            {"is_enabled": False, "num_quantiles": 50}, option.properties
+        )
+
+    def test_set_helper(self):
+        super().test_set_helper
+        option = self.get_options()
+
+        # validate, variable path being passed
+        expected_error = (
+            "type object 'test.num_quantiles' has no " "attribute 'is_enabled'"
+        )
+        with self.assertRaisesRegex(AttributeError, expected_error):
+            option._set_helper({"num_quantiles.is_enabled": True}, "test")
+
+    def test_set(self):
+        super().test_set()
+        option = self.get_options()
+        option.set({"num_quantiles": 50})
+        self.assertDictEqual(
+            {"is_enabled": True, "num_quantiles": 50}, option.properties
+        )
+
+    def test_validate_helper(self):
+        super().test_validate_helper()
+
+        optpth = self.get_options_path()
+
+        # Default configuration
+        option = self.get_options(num_quantiles=1000)
+        self.assertEqual([], option._validate_helper())
+
+        # Valid configurations
+        option = self.get_options(num_quantiles=50)
+        self.assertEqual([], option._validate_helper())
+        option = self.get_options(num_quantiles=2000)
+        self.assertEqual([], option._validate_helper())
+        option = self.get_options(num_quantiles=1)
+        self.assertEqual([], option._validate_helper())
+
+        # Option num_quantiles
+        option = self.get_options(num_quantiles="Hello World")
+        expected_error = [f"{optpth}.num_quantiles must be a positive integer."]
+        self.assertSetEqual(set(expected_error), set(option._validate_helper()))
+
+        # Option num_quantiles cannot be a float, must be an int
+        option = self.get_options(num_quantiles=1.1)
+        expected_error = [f"{optpth}.num_quantiles must be a positive integer."]
+        self.assertSetEqual(set(expected_error), set(option._validate_helper()))
+
+        # Option num_quantiles must be a positive integer
+        option = self.get_options(num_quantiles=0)
+        expected_error = [f"{optpth}.num_quantiles must be a positive integer."]
+        self.assertSetEqual(set(expected_error), set(option._validate_helper()))
+
+        # Option num_quantiles must be a positive integer
+        option = self.get_options(num_quantiles=-5)
+        expected_error = [f"{optpth}.num_quantiles must be a positive integer."]
+        self.assertSetEqual(set(expected_error), set(option._validate_helper()))
+
+    def test_validate(self):
+        super().test_validate()
+
+        optpth = self.get_options_path()
+
+        # Default configuration
+        option = self.get_options(num_quantiles=1000)
+        self.assertEqual([], option._validate_helper())
+
+        # Valid configurations
+        option = self.get_options(num_quantiles=50)
+        self.assertEqual([], option._validate_helper())
+        option = self.get_options(num_quantiles=2000)
+        self.assertEqual([], option._validate_helper())
+        option = self.get_options(num_quantiles=1)
+        self.assertEqual([], option._validate_helper())
+
+        # Option num_quantiles cannot be a string, must be an int
+        option = self.get_options(num_quantiles="Hello World")
+        expected_error = (
+            "NumQuantilesOption.num_quantiles must be a " "positive integer"
+        )
+        with self.assertRaisesRegex(ValueError, expected_error):
+            option.validate()
+
+        # Option num_quantiles cannot be a float, must be an int
+        option = self.get_options(num_quantiles=1.1)
+        expected_error = (
+            "NumQuantilesOption.num_quantiles must be a " "positive integer"
+        )
+        with self.assertRaisesRegex(ValueError, expected_error):
+            option.validate()
+
+        # Option num_quantiles must be a positive integer
+        option = self.get_options(num_quantiles=0)
+        expected_error = (
+            "NumQuantilesOption.num_quantiles must be a " "positive integer"
+        )
+        with self.assertRaisesRegex(ValueError, expected_error):
+            option.validate()
+
+        # Option num_quantiles must be a positive integer
+        option = self.get_options(num_quantiles=-5)
+        expected_error = (
+            "NumQuantilesOption.num_quantiles must be a " "positive integer"
+        )
+        with self.assertRaisesRegex(ValueError, expected_error):
+            option.validate()
+
+    def test_eq(self):
+        super().test_eq()
+
+        options = self.get_options()
+        options2 = self.get_options()
+        options.num_quantiles = 30
+        self.assertNotEqual(options, options2)
+        options2.num_quantiles = 50
+        self.assertNotEqual(options, options2)
+        options2.num_quantiles = 30
+        self.assertEqual(options, options2)
diff --git a/dataprofiler/tests/profilers/profiler_options/test_numerical_options.py b/dataprofiler/tests/profilers/profiler_options/test_numerical_options.py
index 56664dc53..560c82db5 100644
--- a/dataprofiler/tests/profilers/profiler_options/test_numerical_options.py
+++ b/dataprofiler/tests/profilers/profiler_options/test_numerical_options.py
@@ -125,24 +125,6 @@ def test_validate_helper(self):
         self.assertEqual([mode_error], options._validate_helper())
         options.set({"mode.top_k_modes": 5})
 
-        # Zero num_quantiles
-        options.set(
-            {"num_quantiles.is_enabled": True, "num_quantiles.num_quantiles": 0}
-        )
-        num_quantiles_error = (
-            "{}.num_quantiles.num_quantiles must be either None"
-            " or a positive integer".format(optpth)
-        )
-        self.assertEqual([num_quantiles_error], options._validate_helper())
-        # Negative num_quantiles
-        options.set({"num_quantiles.num_quantiles": -5})
-        num_quantiles_error = (
-            "{}.num_quantiles.num_quantiles must be either None"
-            " or a positive integer".format(optpth)
-        )
-        self.assertEqual([num_quantiles_error], options._validate_helper())
-        options.set({"num_quantiles.num_quantiles": 1000})
-
         # Disable Sum and Enable Variance
         options.set(
             {

From 26554ae58a67a272314f433134635796af154747 Mon Sep 17 00:00:00 2001
From: clee1152 <chrislee011502@gmail.com>
Date: Wed, 28 Jun 2023 13:25:41 -0400
Subject: [PATCH 11/15] Update dataprofiler/profilers/profiler_options.py

Co-authored-by: Taylor Turner <taylorfturner@gmail.com>
---
 dataprofiler/profilers/profiler_options.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dataprofiler/profilers/profiler_options.py b/dataprofiler/profilers/profiler_options.py
index 3ac96f9f4..8e100f547 100644
--- a/dataprofiler/profilers/profiler_options.py
+++ b/dataprofiler/profilers/profiler_options.py
@@ -299,7 +299,7 @@ def _validate_helper(self, variable_path: str = "NumQuantilesOption") -> list[st
             not isinstance(self.num_quantiles, int) or self.num_quantiles < 1
         ):
             errors.append(
-                "{}.num_quantiles must be " "a positive integer.".format(variable_path)
+                "{}.num_quantiles must be a positive integer.".format(variable_path)
             )
         return errors
 

From 382f11c1ce2b0c8b75fb1eda295420a9d2a043c4 Mon Sep 17 00:00:00 2001
From: clee1152 <chrislee011502@gmail.com>
Date: Wed, 28 Jun 2023 13:26:33 -0400
Subject: [PATCH 12/15] Update
 dataprofiler/tests/profilers/profiler_options/test_num_quantiles_option.py

Co-authored-by: Taylor Turner <taylorfturner@gmail.com>
---
 .../profilers/profiler_options/test_num_quantiles_option.py     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dataprofiler/tests/profilers/profiler_options/test_num_quantiles_option.py b/dataprofiler/tests/profilers/profiler_options/test_num_quantiles_option.py
index 6ad5ec327..e2537f411 100644
--- a/dataprofiler/tests/profilers/profiler_options/test_num_quantiles_option.py
+++ b/dataprofiler/tests/profilers/profiler_options/test_num_quantiles_option.py
@@ -75,7 +75,7 @@ def test_validate_helper(self):
         expected_error = [f"{optpth}.num_quantiles must be a positive integer."]
         self.assertSetEqual(set(expected_error), set(option._validate_helper()))
 
-        # Option num_quantiles must be a positive integer
+        # Option num_quantiles cannot be a negative integer
         option = self.get_options(num_quantiles=-5)
         expected_error = [f"{optpth}.num_quantiles must be a positive integer."]
         self.assertSetEqual(set(expected_error), set(option._validate_helper()))

From 3a19c241427d17b6774934da6f2463a7212b0456 Mon Sep 17 00:00:00 2001
From: clee1152 <chrislee011502@gmail.com>
Date: Wed, 28 Jun 2023 13:26:40 -0400
Subject: [PATCH 13/15] Minor edits to tests

---
 .../test_num_quantiles_option.py              | 22 ++++++-------------
 1 file changed, 7 insertions(+), 15 deletions(-)

diff --git a/dataprofiler/tests/profilers/profiler_options/test_num_quantiles_option.py b/dataprofiler/tests/profilers/profiler_options/test_num_quantiles_option.py
index e2537f411..a59d81293 100644
--- a/dataprofiler/tests/profilers/profiler_options/test_num_quantiles_option.py
+++ b/dataprofiler/tests/profilers/profiler_options/test_num_quantiles_option.py
@@ -30,7 +30,7 @@ def test_set_helper(self):
 
         # validate, variable path being passed
         expected_error = (
-            "type object 'test.num_quantiles' has no " "attribute 'is_enabled'"
+            "type object 'test.num_quantiles' has no attribute 'is_enabled'"
         )
         with self.assertRaisesRegex(AttributeError, expected_error):
             option._set_helper({"num_quantiles.is_enabled": True}, "test")
@@ -70,7 +70,7 @@ def test_validate_helper(self):
         expected_error = [f"{optpth}.num_quantiles must be a positive integer."]
         self.assertSetEqual(set(expected_error), set(option._validate_helper()))
 
-        # Option num_quantiles must be a positive integer
+        # Option num_quantiles may not be zero, must be greater than one(1)
         option = self.get_options(num_quantiles=0)
         expected_error = [f"{optpth}.num_quantiles must be a positive integer."]
         self.assertSetEqual(set(expected_error), set(option._validate_helper()))
@@ -99,33 +99,25 @@ def test_validate(self):
 
         # Option num_quantiles cannot be a string, must be an int
         option = self.get_options(num_quantiles="Hello World")
-        expected_error = (
-            "NumQuantilesOption.num_quantiles must be a " "positive integer"
-        )
+        expected_error = "NumQuantilesOption.num_quantiles must be a positive integer"
         with self.assertRaisesRegex(ValueError, expected_error):
             option.validate()
 
         # Option num_quantiles cannot be a float, must be an int
         option = self.get_options(num_quantiles=1.1)
-        expected_error = (
-            "NumQuantilesOption.num_quantiles must be a " "positive integer"
-        )
+        expected_error = "NumQuantilesOption.num_quantiles must be a positive integer"
         with self.assertRaisesRegex(ValueError, expected_error):
             option.validate()
 
         # Option num_quantiles must be a positive integer
         option = self.get_options(num_quantiles=0)
-        expected_error = (
-            "NumQuantilesOption.num_quantiles must be a " "positive integer"
-        )
+        expected_error = "NumQuantilesOption.num_quantiles must be a positive integer"
         with self.assertRaisesRegex(ValueError, expected_error):
             option.validate()
 
-        # Option num_quantiles must be a positive integer
+        # Option num_quantiles cannot be a negative integer
         option = self.get_options(num_quantiles=-5)
-        expected_error = (
-            "NumQuantilesOption.num_quantiles must be a " "positive integer"
-        )
+        expected_error = "NumQuantilesOption.num_quantiles must be a positive integer"
         with self.assertRaisesRegex(ValueError, expected_error):
             option.validate()
 

From bc5f195773bc37bdf413ae4c077c44ce3a9de383 Mon Sep 17 00:00:00 2001
From: Christopher Lee <chrislee011502@gmail.com>
Date: Wed, 28 Jun 2023 14:09:36 -0400
Subject: [PATCH 14/15] Empty commit


From 48cbdf4a498a5b54dd7b0a3529227dec3e408e3c Mon Sep 17 00:00:00 2001
From: Christopher Lee <chrislee011502@gmail.com>
Date: Wed, 28 Jun 2023 14:13:38 -0400
Subject: [PATCH 15/15] Precommits

---
 dataprofiler/profilers/profiler_options.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/dataprofiler/profilers/profiler_options.py b/dataprofiler/profilers/profiler_options.py
index 8e100f547..7737ac435 100644
--- a/dataprofiler/profilers/profiler_options.py
+++ b/dataprofiler/profilers/profiler_options.py
@@ -298,9 +298,8 @@ def _validate_helper(self, variable_path: str = "NumQuantilesOption") -> list[st
         if self.num_quantiles is not None and (
             not isinstance(self.num_quantiles, int) or self.num_quantiles < 1
         ):
-            errors.append(
-                "{}.num_quantiles must be a positive integer.".format(variable_path)
-            )
+            errors.append(f"{variable_path}.num_quantiles must be a positive integer.")
+
         return errors