Skip to content

Commit

Permalink
Merge pull request biolab#6849 from markotoplak/deepcopy-outermost
Browse files Browse the repository at this point in the history
[FIX] Only deepcopy the .attributes for the outermost Table transformation
  • Loading branch information
JakaKokosar authored Jul 26, 2024
2 parents 4b68a84 + 4460fbb commit 09df730
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 11 deletions.
28 changes: 17 additions & 11 deletions Orange/data/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -792,6 +792,16 @@ def from_table(cls, domain, source, row_indices=...):
:return: a new table
:rtype: Orange.data.Table
"""
if domain is source.domain:
table = cls.from_table_rows(source, row_indices)
# assure resulting domain is the instance passed on input
table.domain = domain
# since sparse flags are not considered when checking for
# domain equality, fix manually.
with table.unlocked_reference():
table = assure_domain_conversion_sparsity(table, source)
return table

new_cache = _thread_local.conversion_cache is None
try:
if new_cache:
Expand All @@ -801,15 +811,6 @@ def from_table(cls, domain, source, row_indices=...):
cached = _idcache_restore(_thread_local.conversion_cache, (domain, source))
if cached is not None:
return cached
if domain is source.domain:
table = cls.from_table_rows(source, row_indices)
# assure resulting domain is the instance passed on input
table.domain = domain
# since sparse flags are not considered when checking for
# domain equality, fix manually.
with table.unlocked_reference():
table = assure_domain_conversion_sparsity(table, source)
return table

# avoid boolean indices; also convert to slices if possible
row_indices = _optimize_indices(row_indices, len(source))
Expand All @@ -834,7 +835,9 @@ def from_table(cls, domain, source, row_indices=...):
self.W = source.W[row_indices]
self.name = getattr(source, 'name', '')
self.ids = source.ids[row_indices]
self.attributes = deepcopy(getattr(source, 'attributes', {}))
self.attributes = getattr(source, 'attributes', {})
if new_cache: # only deepcopy attributes for the outermost transformation
self.attributes = deepcopy(self.attributes)
_idcache_save(_thread_local.conversion_cache, (domain, source), self)
return self
finally:
Expand Down Expand Up @@ -879,6 +882,7 @@ def from_table_rows(cls, source, row_indices):
:return: a new table
:rtype: Orange.data.Table
"""
is_outermost_transformation = _thread_local.conversion_cache is None
self = cls()
self.domain = source.domain
with self.unlocked_reference():
Expand All @@ -892,7 +896,9 @@ def from_table_rows(cls, source, row_indices):
self.W = source.W[row_indices]
self.name = getattr(source, 'name', '')
self.ids = source.ids[row_indices]
self.attributes = deepcopy(getattr(source, 'attributes', {}))
self.attributes = getattr(source, 'attributes', {})
if is_outermost_transformation:
self.attributes = deepcopy(self.attributes)
return self

@classmethod
Expand Down
18 changes: 18 additions & 0 deletions Orange/tests/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -2068,6 +2068,24 @@ def test_attributes_copied(self):
# attributes dict of old table not be changed since new dist is a copy
self.assertDictEqual(self.table.attributes, {"A": "Test", "B": []})

def test_attributes_copied_once(self):
A = Mock()
A.__deepcopy__ = Mock()
self.table.attributes = {"A": A}

# a single direct transformation
self.table.from_table(self.table.domain, self.table)
self.assertEqual(1, A.__deepcopy__.call_count)
A.__deepcopy__.reset_mock()

# hierarchy of transformations
ndom = Domain([a.copy(compute_value=lambda x: x.transform(Domain([a])))
for a in self.table.domain.attributes])
self.table.from_table(ndom, self.table)
self.assertEqual(1, A.__deepcopy__.call_count)
# HISTORIC: before only the outermost transformation deepcopied the
# attributes, here were 23 calls to __deepcopy__ instead of 1


def isspecial(s):
return isinstance(s, slice) or s is Ellipsis
Expand Down

0 comments on commit 09df730

Please sign in to comment.