Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix for bug when diffing two lists with ignore_order and providing compare_func #454

Merged
merged 1 commit into from
Mar 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion deepdiff/diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -1125,7 +1125,9 @@ def defaultdict_orderedset():
pre_calced_distances = self._precalculate_numpy_arrays_distance(
hashes_added, hashes_removed, t1_hashtable, t2_hashtable, _original_type)

if hashes_added and hashes_removed and self.iterable_compare_func and len(hashes_added) > 1 and len(hashes_removed) > 1:
if hashes_added and hashes_removed \
and self.iterable_compare_func \
and len(hashes_added) > 0 and len(hashes_removed) > 0:
pre_calced_distances = self._precalculate_distance_by_custom_compare_func(
hashes_added, hashes_removed, t1_hashtable, t2_hashtable, _original_type)

Expand Down
115 changes: 115 additions & 0 deletions tests/test_ignore_order.py
Original file line number Diff line number Diff line change
Expand Up @@ -1072,6 +1072,121 @@ def compare_func(x, y, level=None):
assert expected_with_compare_func == ddiff2
assert ddiff != ddiff2

def test_ignore_order_with_compare_func_with_one_each_hashes_added_hashes_removed(self):
"""
Scenario:
In this example which demonstrates the problem... We have two dictionaries containing lists for
individualNames. Each list contains exactly 2 elements. The effective change is that we are
replacing the 2nd element in the list.
NOTE: This is considered a REPLACEMENT of the second element and not an UPDATE of the element
because we are providing a custom compare_func which will determine matching elements based on
the value of the nameIdentifier field. If the custom compare_func is not used, then
deepdiff.diff will mistakenly treat the difference as being individual field updates for every
field in the second element of the list.

Intent:
Use our custom compare_func, since we have provided it.
We need to fall into self._precalculate_distance_by_custom_compare_func
To do this, we are proposing a change to deepdiff.diff line 1128:

Original:
if hashes_added and hashes_removed and self.iterable_compare_func and len(hashes_added) > 1 and len(hashes_removed) > 1:

Proposed/Updated:
if hashes_added and hashes_removed \
and self.iterable_compare_func \
and len(hashes_added) > 0 and len(hashes_removed) > 0:

NOTE: It is worth mentioning that deepdiff.diff line 1121, might also benefit by changing the length conditions
to evaluate for > 0 (rather than > 1).
"""

t1 = {
"individualNames": [
{
"firstName": "Johnathan",
"lastName": "Doe",
"prefix": "COLONEL",
"middleName": "A",
"primaryIndicator": True,
"professionalDesignation": "PHD",
"suffix": "SR",
"nameIdentifier": "00001"
},
{
"firstName": "John",
"lastName": "Doe",
"prefix": "",
"middleName": "",
"primaryIndicator": False,
"professionalDesignation": "",
"suffix": "SR",
"nameIdentifier": "00002"
}
]
}

t2 = {
"individualNames": [
{
"firstName": "Johnathan",
"lastName": "Doe",
"prefix": "COLONEL",
"middleName": "A",
"primaryIndicator": True,
"professionalDesignation": "PHD",
"suffix": "SR",
"nameIdentifier": "00001"
},
{
"firstName": "Johnny",
"lastName": "Doe",
"prefix": "",
"middleName": "A",
"primaryIndicator": False,
"professionalDesignation": "",
"suffix": "SR",
"nameIdentifier": "00003"
}
]
}
def compare_func(item1, item2, level=None):
print("*** inside compare ***")
it1_keys = item1.keys()

try:

# --- individualNames ---
if 'nameIdentifier' in it1_keys and 'lastName' in it1_keys:
match_result = item1['nameIdentifier'] == item2['nameIdentifier']
print("individualNames - matching result:", match_result)
return match_result
else:
print("Unknown list item...", "matching result:", item1 == item2)
return item1 == item2
except Exception:
raise CannotCompare() from None
# ---------------------------- End of nested function

actual_diff = DeepDiff(t1, t2, report_repetition=True,
ignore_order=True, iterable_compare_func=compare_func, cutoff_intersection_for_pairs=1)

old_invalid_diff = {
'values_changed': {"root['individualNames'][1]['firstName']": {'new_value': 'Johnny', 'old_value': 'John'},
"root['individualNames'][1]['middleName']": {'new_value': 'A', 'old_value': ''},
"root['individualNames'][1]['nameIdentifier']": {'new_value': '00003',
'old_value': '00002'}}}
new_expected_diff = {'iterable_item_added': {
"root['individualNames'][1]": {'firstName': 'Johnny', 'lastName': 'Doe', 'prefix': '', 'middleName': 'A',
'primaryIndicator': False, 'professionalDesignation': '', 'suffix': 'SR',
'nameIdentifier': '00003'}}, 'iterable_item_removed': {
"root['individualNames'][1]": {'firstName': 'John', 'lastName': 'Doe', 'prefix': '', 'middleName': '',
'primaryIndicator': False, 'professionalDesignation': '', 'suffix': 'SR',
'nameIdentifier': '00002'}}}

assert old_invalid_diff != actual_diff
assert new_expected_diff == actual_diff


class TestDynamicIgnoreOrder:
def test_ignore_order_func(self):
Expand Down
Loading