From b769cae99838cabaaabdba4267cf365c96f56ca2 Mon Sep 17 00:00:00 2001 From: Bobby Morck Date: Thu, 6 Jul 2023 16:15:26 -0400 Subject: [PATCH 1/8] Add Ignore List Order Option to DeepHash --- deepdiff/deephash.py | 6 +++++- tests/test_hash.py | 15 +++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/deepdiff/deephash.py b/deepdiff/deephash.py index c93037d8..9547730a 100644 --- a/deepdiff/deephash.py +++ b/deepdiff/deephash.py @@ -144,6 +144,7 @@ def __init__(self, parent="root", encodings=None, ignore_encoding_errors=False, + ignore_list_order=True, **kwargs): if kwargs: raise ValueError( @@ -190,6 +191,7 @@ def __init__(self, self.ignore_private_variables = ignore_private_variables self.encodings = encodings self.ignore_encoding_errors = ignore_encoding_errors + self.ignore_list_order = ignore_list_order self._hash(obj, parent=parent, parents_ids=frozenset({get_id(obj)})) @@ -424,7 +426,9 @@ def _prep_iterable(self, obj, parent, parents_ids=EMPTY_FROZENSET): '{}|{}'.format(i, v) for i, v in result.items() ] - result = sorted(map(str, result)) # making sure the result items are string and sorted so join command works. + result = map(str, result) # making sure the result items are string so join command works. + if self.ignore_list_order: + result = sorted(result) result = ','.join(result) result = KEY_TO_VAL_STR.format(type(obj).__name__, result) diff --git a/tests/test_hash.py b/tests/test_hash.py index da94130d..f56be5c3 100755 --- a/tests/test_hash.py +++ b/tests/test_hash.py @@ -368,6 +368,21 @@ def test_same_sets_same_hash(self): t2_hash = DeepHashPrep(t2) assert t1_hash[get_id(t1)] == t2_hash[get_id(t2)] + + @pytest.mark.parametrize("list1, list2, ignore_list_order, is_equal", [ + ([1, 2], [2, 1], False, False), + ([1, 2], [2, 1], True, True), + ([1, 2, 3], [1, 3, 2], False, False), + ([1, [1, 2, 3]], [1, [3, 2, 1]], False, False), + ([1, [1, 2, 3]], [1, [3, 2, 1]], True, True), + ((1, 2), (2, 1), False, False), + ((1, 2), (2, 1), True, True), + ]) + def test_list_ignore_order(self, list1, list2, ignore_list_order, is_equal): + list1_hash = DeepHash(list1, ignore_list_order=ignore_list_order) + list2_hash = DeepHash(list2, ignore_list_order=ignore_list_order) + + assert is_equal == (list1_hash[list1] == list2_hash[list2]) @pytest.mark.parametrize("t1, t2, significant_digits, number_format_notation, result", [ ({0.012, 0.98}, {0.013, 0.99}, 1, "f", 'set:float:0.0,float:1.0'), From b2fcd658608ee924d1cdf9affdb811e947ed4b8f Mon Sep 17 00:00:00 2001 From: Bobby Morck Date: Wed, 12 Jul 2023 14:21:42 -0400 Subject: [PATCH 2/8] Update docs and rename to ignore_iterable_order --- deepdiff/deephash.py | 6 +++--- docs/deephash_doc.rst | 2 ++ tests/test_hash.py | 8 ++++---- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/deepdiff/deephash.py b/deepdiff/deephash.py index 9547730a..eb9b9f11 100644 --- a/deepdiff/deephash.py +++ b/deepdiff/deephash.py @@ -144,7 +144,7 @@ def __init__(self, parent="root", encodings=None, ignore_encoding_errors=False, - ignore_list_order=True, + ignore_iterable_order=True, **kwargs): if kwargs: raise ValueError( @@ -191,7 +191,7 @@ def __init__(self, self.ignore_private_variables = ignore_private_variables self.encodings = encodings self.ignore_encoding_errors = ignore_encoding_errors - self.ignore_list_order = ignore_list_order + self.ignore_iterable_order = ignore_iterable_order self._hash(obj, parent=parent, parents_ids=frozenset({get_id(obj)})) @@ -427,7 +427,7 @@ def _prep_iterable(self, obj, parent, parents_ids=EMPTY_FROZENSET): ] result = map(str, result) # making sure the result items are string so join command works. - if self.ignore_list_order: + if self.ignore_iterable_order: result = sorted(result) result = ','.join(result) result = KEY_TO_VAL_STR.format(type(obj).__name__, result) diff --git a/docs/deephash_doc.rst b/docs/deephash_doc.rst index 82e8c361..a5aa9f1f 100644 --- a/docs/deephash_doc.rst +++ b/docs/deephash_doc.rst @@ -123,6 +123,8 @@ ignore_private_variables: Boolean, default = True ignore_encoding_errors: Boolean, default = False If you want to get away with UnicodeDecodeError without passing explicit character encodings, set this option to True. If you want to make sure the encoding is done properly, keep this as False and instead pass an explicit list of character encodings to be considered via the encodings parameter. +ignore_iterable_order: Boolean, default = True + If order of items in an iterable should not cause the hash of the iterable to be different. number_format_notation : string, default="f" number_format_notation is what defines the meaning of significant digits. The default value of "f" means the digits AFTER the decimal point. "f" stands for fixed point. The other option is "e" which stands for exponent notation or scientific notation. diff --git a/tests/test_hash.py b/tests/test_hash.py index f56be5c3..bbf2c0ef 100755 --- a/tests/test_hash.py +++ b/tests/test_hash.py @@ -369,7 +369,7 @@ def test_same_sets_same_hash(self): assert t1_hash[get_id(t1)] == t2_hash[get_id(t2)] - @pytest.mark.parametrize("list1, list2, ignore_list_order, is_equal", [ + @pytest.mark.parametrize("list1, list2, ignore_iterable_order, is_equal", [ ([1, 2], [2, 1], False, False), ([1, 2], [2, 1], True, True), ([1, 2, 3], [1, 3, 2], False, False), @@ -378,9 +378,9 @@ def test_same_sets_same_hash(self): ((1, 2), (2, 1), False, False), ((1, 2), (2, 1), True, True), ]) - def test_list_ignore_order(self, list1, list2, ignore_list_order, is_equal): - list1_hash = DeepHash(list1, ignore_list_order=ignore_list_order) - list2_hash = DeepHash(list2, ignore_list_order=ignore_list_order) + def test_ignore_iterable_order(self, list1, list2, ignore_iterable_order, is_equal): + list1_hash = DeepHash(list1, ignore_iterable_order=ignore_iterable_order) + list2_hash = DeepHash(list2, ignore_iterable_order=ignore_iterable_order) assert is_equal == (list1_hash[list1] == list2_hash[list2]) From 1fc9a3ab7096e7f337039fb9b77c73bc928ee4e4 Mon Sep 17 00:00:00 2001 From: Robert Bo Davis Date: Tue, 18 Jul 2023 07:11:28 -0400 Subject: [PATCH 3/8] pyyaml to 6.0.1 to fix cython build problems --- requirements-cli.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements-cli.txt b/requirements-cli.txt index ef515c8d..f487dc50 100644 --- a/requirements-cli.txt +++ b/requirements-cli.txt @@ -1,2 +1,2 @@ click==8.1.3 -pyyaml==6.0 +pyyaml==6.0.1 From 4196a30706b78cca4d1b56263838f4804d7ffb1c Mon Sep 17 00:00:00 2001 From: Chris Hamill Date: Tue, 15 Aug 2023 11:18:10 -0400 Subject: [PATCH 4/8] make DiffLevel iterable --- deepdiff/model.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/deepdiff/model.py b/deepdiff/model.py index 0d8d67e5..4b846b21 100644 --- a/deepdiff/model.py +++ b/deepdiff/model.py @@ -577,6 +577,10 @@ def __setattr__(self, key, value): else: self.__dict__[key] = value + def __iter__(self): + yield self.t1 + yield self.t2 + @property def repetition(self): return self.additional['repetition'] From 62b857feabaa6cfee9b1d1babbdf860efa95b90f Mon Sep 17 00:00:00 2001 From: Chris Hamill Date: Tue, 8 Aug 2023 23:00:42 -0400 Subject: [PATCH 5/8] generalize logic for diffing immutable objects, e.g. precompiled regex --- deepdiff/diff.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/deepdiff/diff.py b/deepdiff/diff.py index aa85e84a..36cebb19 100755 --- a/deepdiff/diff.py +++ b/deepdiff/diff.py @@ -13,6 +13,7 @@ from math import isclose as is_close from collections.abc import Mapping, Iterable, Sequence from collections import defaultdict +from inspect import getmembers from itertools import zip_longest from ordered_set import OrderedSet from deepdiff.helper import (strings, bytes_type, numbers, uuids, datetimes, ListItemRemovedOrAdded, notpresent, @@ -415,20 +416,25 @@ def _diff_enum(self, level, parents_ids=frozenset(), local_tree=None): def _diff_obj(self, level, parents_ids=frozenset(), is_namedtuple=False, local_tree=None): """Difference of 2 objects""" + processing_error = False try: if is_namedtuple: t1 = level.t1._asdict() t2 = level.t2._asdict() - else: + elif all('__dict__' in dir(t) for t in level): t1 = detailed__dict__(level.t1, ignore_private_variables=self.ignore_private_variables) t2 = detailed__dict__(level.t2, ignore_private_variables=self.ignore_private_variables) - except AttributeError: - try: + elif all('__slots__' in dir(t) for t in level): t1 = self._dict_from_slots(level.t1) t2 = self._dict_from_slots(level.t2) - except AttributeError: - self._report_result('unprocessed', level, local_tree=local_tree) - return + else: + t1 = {k: v for k, v in getmembers(level.t1) if not callable(v)} + t2 = {k: v for k, v in getmembers(level.t2) if not callable(v)} + except AttributeError: + processing_error = True + if processing_error is True: + self._report_result('unprocessed', level, local_tree=local_tree) + return self._diff_dict( level, From 998a26c527c73ae1ce09d5102b1138294a86cdff Mon Sep 17 00:00:00 2001 From: Chris Hamill Date: Tue, 8 Aug 2023 23:19:51 -0400 Subject: [PATCH 6/8] add unit test for simple precompiled regex diffing --- tests/test_diff_text.py | 60 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 59 insertions(+), 1 deletion(-) diff --git a/tests/test_diff_text.py b/tests/test_diff_text.py index e0025648..be822fd2 100755 --- a/tests/test_diff_text.py +++ b/tests/test_diff_text.py @@ -2,6 +2,7 @@ import datetime import pytest import logging +import re import uuid from enum import Enum from typing import List @@ -551,6 +552,64 @@ class MyEnum(Enum): } assert ddiff == result + def test_precompiled_regex(self): + + pattern_1 = re.compile('foo') + pattern_2 = re.compile('foo') + pattern_3 = re.compile('foo', flags=re.I) + pattern_4 = re.compile('(foo)') + pattern_5 = re.compile('bar') + + # same object + ddiff = DeepDiff(pattern_1, pattern_1) + result = {} + assert ddiff == result + + # same pattern, different object + ddiff = DeepDiff(pattern_1, pattern_2) + result = {} + assert ddiff == result + + # same pattern, different flags + ddiff = DeepDiff(pattern_1, pattern_3) + result = { + 'values_changed': { + 'root.flags': { + 'new_value': 34, + 'old_value': 32, + }, + } + } + assert ddiff == result + + # same pattern, different groups + ddiff = DeepDiff(pattern_1, pattern_4) + result = { + 'values_changed': { + 'root.pattern': { + 'new_value': '(foo)', + 'old_value': 'foo', + }, + 'root.groups': { + 'new_value': 1, + 'old_value': 0, + }, + } + } + assert ddiff == result + + # different pattern + ddiff = DeepDiff(pattern_1, pattern_5) + result = { + 'values_changed': { + 'root.pattern': { + 'new_value': 'bar', + 'old_value': 'foo', + }, + } + } + assert ddiff == result + def test_custom_objects_change(self): t1 = CustomClass(1) t2 = CustomClass(2) @@ -1803,4 +1862,3 @@ class Bar(PydanticBaseModel): diff = DeepDiff(t1, t2) expected = {'values_changed': {'root.stuff[0].thing': {'new_value': 2, 'old_value': 1}}} assert expected == diff - From c86292b34bbb46f2ec344188436a4a9418e99352 Mon Sep 17 00:00:00 2001 From: Chris Hamill Date: Tue, 15 Aug 2023 17:55:34 -0400 Subject: [PATCH 7/8] fix if/elif branching otherwise bools get diffed twice --- deepdiff/diff.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deepdiff/diff.py b/deepdiff/diff.py index 36cebb19..7d935946 100755 --- a/deepdiff/diff.py +++ b/deepdiff/diff.py @@ -1533,7 +1533,7 @@ def _diff(self, level, parents_ids=frozenset(), _original_type=None, local_tree= if isinstance(level.t1, booleans): self._diff_booleans(level, local_tree=local_tree) - if isinstance(level.t1, strings): + elif isinstance(level.t1, strings): self._diff_str(level, local_tree=local_tree) elif isinstance(level.t1, datetimes): From 32ec1820e1b392585f8fa9503b836dfd9a7f081b Mon Sep 17 00:00:00 2001 From: Chris Hamill Date: Tue, 15 Aug 2023 11:20:47 -0400 Subject: [PATCH 8/8] tweak for consistent style --- deepdiff/diff.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/deepdiff/diff.py b/deepdiff/diff.py index 7d935946..94f290c5 100755 --- a/deepdiff/diff.py +++ b/deepdiff/diff.py @@ -880,7 +880,8 @@ def _diff_by_forming_pairs_and_comparing_one_by_one( x, y, child_relationship_class=child_relationship_class, - child_relationship_param=j) + child_relationship_param=j + ) self._diff(next_level, parents_ids_added, local_tree=local_tree) def _diff_ordered_iterable_by_difflib(