seperman · seperman · Aug 27, 2024 · May 14, 2024 · May 14, 2024 · May 17, 2024
diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml
@@ -12,19 +12,28 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: [3.8, 3.9, "3.10", "3.11", "3.12"]
+        python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
         architecture: ["x64"]
-        include:
-          - python-version: "3.10"
-            numpy-version: "2.0.dev"
     steps:
     - uses: actions/checkout@v2
     - name: Setup Python ${{ matrix.python-version }} on ${{ matrix.architecture }}
       uses: actions/setup-python@v2
       with:
         python-version: ${{ matrix.python-version }}
         architecture: ${{ matrix.architecture }}
+    - name: Cache pip 3.8
+      if: matrix.python-version == 3.8
+      uses: actions/cache@v2
+      with:
+        # This path is specific to Ubuntu
+        path: ~/.cache/pip
+        # Look to see if there is a cache hit for the corresponding requirements file
+        key: ${{ runner.os }}-pip-${{ hashFiles('requirements.txt') }}-${{ hashFiles('requirements-dev3.8.txt') }}
+        restore-keys: |
+          ${{ runner.os }}-pip-
+          ${{ runner.os }}-
     - name: Cache pip
+      if: matrix.python-version != 3.8
       uses: actions/cache@v2
       with:
         # This path is specific to Ubuntu
@@ -40,28 +49,31 @@ jobs:
         # workaround for 3.12, SEE: https://github.com/pypa/setuptools/issues/3661#issuecomment-1813845177
         pip install --upgrade setuptools
     - name: Install dependencies
+      if: matrix.python-version != 3.8
       run: pip install -r requirements-dev.txt
-    - name: Install Numpy Dev
-      if: ${{ matrix.numpy-version }}
-      run: pip install -I --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple "numpy>=0.0.dev0" 
+    - name: Install dependencies
+      if: matrix.python-version == 3.8
+      run: pip install -r requirements-dev3.8.txt
     - name: Lint with flake8
-      if: matrix.python-version == 3.11
+      if: matrix.python-version == 3.12
       run: |
         # stop the build if there are Python syntax errors or undefined names
         flake8 deepdiff --count --select=E9,F63,F7,F82 --show-source --statistics
         # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
         flake8 deepdiff --count --exit-zero --max-complexity=26 --max-line-lengt=250 --statistics
     - name: Test with pytest and get the coverage
-      if: matrix.python-version == 3.11
+      if: matrix.python-version == 3.12
       run: |
-        pytest --cov-report=xml --cov=deepdiff tests/ --runslow
+        pytest --benchmark-disable --cov-report=xml --cov=deepdiff tests/ --runslow
     - name: Test with pytest and no coverage report
-      if: matrix.python-version != 3.11
+      if: matrix.python-version != 3.12
       run: |
-        pytest
+        pytest --benchmark-disable
     - name: Upload coverage to Codecov
-      uses: codecov/codecov-action@v3
-      if: matrix.python-version == 3.11
+      uses: codecov/codecov-action@v4
+      if: matrix.python-version == 3.12
+      env:
+        CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
       with:
         file: ./coverage.xml
         token: ${{ secrets.CODECOV_TOKEN }}

diff --git a/CITATION.cff b/CITATION.cff
@@ -5,6 +5,6 @@ authors:
   given-names: "Sep"
   orcid: "https://orcid.org/0009-0009-5828-4345"
 title: "DeepDiff"
-version: 7.0.1
+version: 8.0.0
 date-released: 2024
 url: "https://github.com/seperman/deepdiff"
diff --git a/README.md b/README.md
@@ -1,4 +1,4 @@
-# DeepDiff v 7.0.1
+# DeepDiff v 8.0.0
 
 ![Downloads](https://img.shields.io/pypi/dm/deepdiff.svg?style=flat)
 ![Python Versions](https://img.shields.io/pypi/pyversions/deepdiff.svg?style=flat)
@@ -17,7 +17,7 @@
 
 Tested on Python 3.8+ and PyPy3.
 
-- **[Documentation](https://zepworks.com/deepdiff/7.0.1/)**
+- **[Documentation](https://zepworks.com/deepdiff/8.0.0/)**
 
 ## What is new?
 

diff --git a/deepdiff/__init__.py b/deepdiff/__init__.py
@@ -1,6 +1,6 @@
 """This module offers the DeepDiff, DeepSearch, grep, Delta and DeepHash classes."""
 # flake8: noqa
-__version__ = '7.0.1'
+__version__ = '8.0.0'
 import logging
 
 if __name__ == '__main__':

diff --git a/deepdiff/anyset.py b/deepdiff/anyset.py
@@ -1,6 +1,5 @@
-from ordered_set import OrderedSet
 from deepdiff.deephash import DeepHash
-from deepdiff.helper import dict_
+from deepdiff.helper import dict_, SetOrdered
 
 
 class AnySet:
@@ -11,7 +10,7 @@ class AnySet:
     However one the AnySet object is deleted, all those traces will be gone too.
     """
     def __init__(self, items=None):
-        self._set = OrderedSet()
+        self._set = SetOrdered()
         self._hashes = dict_()
         self._hash_to_objects = dict_()
         if items:

diff --git a/deepdiff/base.py b/deepdiff/base.py
@@ -1,5 +1,4 @@
-from ordered_set import OrderedSet
-from deepdiff.helper import strings, numbers
+from deepdiff.helper import strings, numbers, SetOrdered
 
 
 DEFAULT_SIGNIFICANT_DIGITS_WHEN_IGNORE_NUMERIC_TYPES = 12
@@ -31,18 +30,18 @@ def get_ignore_types_in_groups(self, ignore_type_in_groups,
 
         result = []
         for item_group in ignore_type_in_groups:
-            new_item_group = OrderedSet()
+            new_item_group = SetOrdered()
             for item in item_group:
                 item = type(item) if item is None or not isinstance(item, type) else item
                 new_item_group.add(item)
             result.append(new_item_group)
         ignore_type_in_groups = result
 
         if ignore_string_type_changes and self.strings not in ignore_type_in_groups:
-            ignore_type_in_groups.append(OrderedSet(self.strings))
+            ignore_type_in_groups.append(SetOrdered(self.strings))
 
         if ignore_numeric_type_changes and self.numbers not in ignore_type_in_groups:
-            ignore_type_in_groups.append(OrderedSet(self.numbers))
+            ignore_type_in_groups.append(SetOrdered(self.numbers))
 
         if not ignore_type_subclasses:
             # is_instance method needs tuples. When we look for subclasses, we need them to be tuples

diff --git a/deepdiff/commands.py b/deepdiff/commands.py
@@ -47,6 +47,7 @@ def cli():
 @click.option('--log-frequency-in-sec', required=False, default=0, type=int, show_default=True)
 @click.option('--max-passes', required=False, default=10000000, type=int, show_default=True)
 @click.option('--max_diffs', required=False, default=None, type=int, show_default=True)
+@click.option('--threshold-to-diff-deeper', required=False, default=0.33, type=float, show_default=False)
 @click.option('--number-format-notation', required=False, type=click.Choice(['f', 'e'], case_sensitive=True), show_default=True, default="f")
 @click.option('--progress-logger', required=False, type=click.Choice(['info', 'error'], case_sensitive=True), show_default=True, default="info")
 @click.option('--report-repetition', is_flag=True, show_default=True)

diff --git a/deepdiff/deephash.py b/deepdiff/deephash.py
@@ -14,6 +14,17 @@
                              number_to_string, datetime_normalize, KEY_TO_VAL_STR, short_repr,
                              get_truncate_datetime, dict_, add_root_to_paths)
 from deepdiff.base import Base
+
+try:
+    import pandas
+except ImportError:
+    pandas = False
+
+try:
+    import polars
+except ImportError:
+    polars = False
+
 logger = logging.getLogger(__name__)
 
 UNPROCESSED_KEY = object()
@@ -139,6 +150,7 @@ def __init__(self,
                  ignore_numeric_type_changes=False,
                  ignore_type_subclasses=False,
                  ignore_string_case=False,
+                 use_enum_value=False,
                  exclude_obj_callback=None,
                  number_to_string_func=None,
                  ignore_private_variables=True,
@@ -154,7 +166,7 @@ def __init__(self,
                  "exclude_paths, include_paths, exclude_regex_paths, hasher, ignore_repetition, "
                  "number_format_notation, apply_hash, ignore_type_in_groups, ignore_string_type_changes, "
                  "ignore_numeric_type_changes, ignore_type_subclasses, ignore_string_case "
-                 "number_to_string_func, ignore_private_variables, parent "
+                 "number_to_string_func, ignore_private_variables, parent, use_enum_value "
                  "encodings, ignore_encoding_errors") % ', '.join(kwargs.keys()))
         if isinstance(hashes, MutableMapping):
             self.hashes = hashes
@@ -170,6 +182,7 @@ def __init__(self,
         self.exclude_regex_paths = convert_item_or_items_into_compiled_regexes_else_none(exclude_regex_paths)
         self.hasher = default_hasher if hasher is None else hasher
         self.hashes[UNPROCESSED_KEY] = []
+        self.use_enum_value = use_enum_value
 
         self.significant_digits = self.get_significant_digits(significant_digits, ignore_numeric_type_changes)
         self.truncate_datetime = get_truncate_datetime(truncate_datetime)
@@ -206,10 +219,10 @@ def __init__(self,
     sha1hex = sha1hex
 
     def __getitem__(self, obj, extract_index=0):
-        return self._getitem(self.hashes, obj, extract_index=extract_index)
+        return self._getitem(self.hashes, obj, extract_index=extract_index, use_enum_value=self.use_enum_value)
 
     @staticmethod
-    def _getitem(hashes, obj, extract_index=0):
+    def _getitem(hashes, obj, extract_index=0, use_enum_value=False):
         """
         extract_index is zero for hash and 1 for count and None to get them both.
         To keep it backward compatible, we only get the hash by default so it is set to zero by default.
@@ -220,6 +233,8 @@ def _getitem(hashes, obj, extract_index=0):
             key = BoolObj.TRUE
         elif obj is False:
             key = BoolObj.FALSE
+        elif use_enum_value and isinstance(obj, Enum):
+            key = obj.value
 
         result_n_count = (None, 0)
 
@@ -256,14 +271,14 @@ def get(self, key, default=None, extract_index=0):
         return self.get_key(self.hashes, key, default=default, extract_index=extract_index)
 
     @staticmethod
-    def get_key(hashes, key, default=None, extract_index=0):
+    def get_key(hashes, key, default=None, extract_index=0, use_enum_value=False):
         """
         get_key method for the hashes dictionary.
         It can extract the hash for a given key that is already calculated when extract_index=0
         or the count of items that went to building the object whenextract_index=1.
         """
         try:
-            result = DeepHash._getitem(hashes, key, extract_index=extract_index)
+            result = DeepHash._getitem(hashes, key, extract_index=extract_index, use_enum_value=use_enum_value)
         except KeyError:
             result = default
         return result
@@ -444,7 +459,6 @@ def _prep_path(self, obj):
         type_ = obj.__class__.__name__
         return KEY_TO_VAL_STR.format(type_, obj)
 
-
     def _prep_number(self, obj):
         type_ = "number" if self.ignore_numeric_type_changes else obj.__class__.__name__
         if self.significant_digits is not None:
@@ -475,12 +489,14 @@ def _prep_tuple(self, obj, parent, parents_ids):
         return result, counts
 
     def _hash(self, obj, parent, parents_ids=EMPTY_FROZENSET):
-        """The main diff method"""
+        """The main hash method"""
         counts = 1
 
         if isinstance(obj, bool):
             obj = self._prep_bool(obj)
             result = None
+        elif self.use_enum_value and isinstance(obj, Enum):
+            obj = obj.value
         else:
             result = not_hashed
         try:
@@ -523,6 +539,19 @@ def _hash(self, obj, parent, parents_ids=EMPTY_FROZENSET):
         elif isinstance(obj, tuple):
             result, counts = self._prep_tuple(obj=obj, parent=parent, parents_ids=parents_ids)
 
+        elif (pandas and isinstance(obj, pandas.DataFrame)):
+            def gen():
+                yield ('dtype', obj.dtypes)
+                yield ('index', obj.index)
+                yield from obj.items()  # which contains (column name, series tuples)
+            result, counts = self._prep_iterable(obj=gen(), parent=parent, parents_ids=parents_ids)
+        elif (polars and isinstance(obj, polars.DataFrame)):
+            def gen():
+                yield from obj.columns
+                yield from list(obj.schema.items())
+                yield from obj.rows()
+            result, counts = self._prep_iterable(obj=gen(), parent=parent, parents_ids=parents_ids)
+
         elif isinstance(obj, Iterable):
             result, counts = self._prep_iterable(obj=obj, parent=parent, parents_ids=parents_ids)
 

diff --git a/deepdiff/delta.py b/deepdiff/delta.py
@@ -4,7 +4,6 @@
 from functools import partial, cmp_to_key
 from collections.abc import Mapping
 from copy import deepcopy
-from ordered_set import OrderedSet
 from deepdiff import DeepDiff
 from deepdiff.serialization import pickle_load, pickle_dump
 from deepdiff.helper import (
@@ -14,6 +13,7 @@
     Opcode, FlatDeltaRow, UnkownValueCode, FlatDataAction,
     OPCODE_TAG_TO_FLAT_DATA_ACTION,
     FLAT_DATA_ACTION_TO_OPCODE_TAG,
+    SetOrdered,
 )
 from deepdiff.path import (
     _path_to_elements, _get_nested_obj, _get_nested_obj_and_force,
@@ -744,7 +744,7 @@ def _do_ignore_order(self):
         """
         fixed_indexes = self.diff.get('iterable_items_added_at_indexes', dict_())
         remove_indexes = self.diff.get('iterable_items_removed_at_indexes', dict_())
-        paths = OrderedSet(fixed_indexes.keys()) | OrderedSet(remove_indexes.keys())
+        paths = SetOrdered(fixed_indexes.keys()) | SetOrdered(remove_indexes.keys())
         for path in paths:
             # In the case of ignore_order reports, we are pointing to the container object.
             # Thus we add a [0] to the elements so we can get the required objects and discard what we don't need.