From eeae34d5c7bf07ec2c613adc4a40359188d6fcb1 Mon Sep 17 00:00:00 2001 From: Marko Toplak Date: Mon, 8 Jul 2024 10:50:49 +0200 Subject: [PATCH 1/5] Table.from_table: refactor: separate cache-independent part --- Orange/data/table.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/Orange/data/table.py b/Orange/data/table.py index beb46089305..d238c4d62c8 100644 --- a/Orange/data/table.py +++ b/Orange/data/table.py @@ -792,6 +792,16 @@ def from_table(cls, domain, source, row_indices=...): :return: a new table :rtype: Orange.data.Table """ + if domain is source.domain: + table = cls.from_table_rows(source, row_indices) + # assure resulting domain is the instance passed on input + table.domain = domain + # since sparse flags are not considered when checking for + # domain equality, fix manually. + with table.unlocked_reference(): + table = assure_domain_conversion_sparsity(table, source) + return table + new_cache = _thread_local.conversion_cache is None try: if new_cache: @@ -801,15 +811,6 @@ def from_table(cls, domain, source, row_indices=...): cached = _idcache_restore(_thread_local.conversion_cache, (domain, source)) if cached is not None: return cached - if domain is source.domain: - table = cls.from_table_rows(source, row_indices) - # assure resulting domain is the instance passed on input - table.domain = domain - # since sparse flags are not considered when checking for - # domain equality, fix manually. - with table.unlocked_reference(): - table = assure_domain_conversion_sparsity(table, source) - return table # avoid boolean indices; also convert to slices if possible row_indices = _optimize_indices(row_indices, len(source)) From 074ca1fd37adb8103a064382f885892670c544c5 Mon Sep 17 00:00:00 2001 From: Marko Toplak Date: Thu, 4 Jul 2024 16:07:51 +0200 Subject: [PATCH 2/5] Table: only deepcopy .attributes for the outermost transform --- Orange/data/table.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Orange/data/table.py b/Orange/data/table.py index d238c4d62c8..21fdb88b5c1 100644 --- a/Orange/data/table.py +++ b/Orange/data/table.py @@ -835,7 +835,8 @@ def from_table(cls, domain, source, row_indices=...): self.W = source.W[row_indices] self.name = getattr(source, 'name', '') self.ids = source.ids[row_indices] - self.attributes = deepcopy(getattr(source, 'attributes', {})) + if new_cache: # only deepcopy attributes for the outermost transformation + self.attributes = deepcopy(getattr(source, 'attributes', {})) _idcache_save(_thread_local.conversion_cache, (domain, source), self) return self finally: From 9bcbbcaf983b2e326a5b23d2d2375232a587df83 Mon Sep 17 00:00:00 2001 From: Marko Toplak Date: Mon, 8 Jul 2024 09:45:21 +0200 Subject: [PATCH 3/5] Table: single deepcopy of .attributes in from_table/from_table_rows mixes --- Orange/data/table.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Orange/data/table.py b/Orange/data/table.py index 21fdb88b5c1..076c84cd035 100644 --- a/Orange/data/table.py +++ b/Orange/data/table.py @@ -881,6 +881,7 @@ def from_table_rows(cls, source, row_indices): :return: a new table :rtype: Orange.data.Table """ + is_outermost_transformation = _thread_local.conversion_cache is None self = cls() self.domain = source.domain with self.unlocked_reference(): @@ -894,7 +895,8 @@ def from_table_rows(cls, source, row_indices): self.W = source.W[row_indices] self.name = getattr(source, 'name', '') self.ids = source.ids[row_indices] - self.attributes = deepcopy(getattr(source, 'attributes', {})) + if is_outermost_transformation: + self.attributes = deepcopy(getattr(source, 'attributes', {})) return self @classmethod From d7634aa3ae3aff8d55f8e034a98076e96e166e48 Mon Sep 17 00:00:00 2001 From: Marko Toplak Date: Mon, 8 Jul 2024 09:51:42 +0200 Subject: [PATCH 4/5] Table: single deepcopy of .attributes test --- Orange/tests/test_table.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/Orange/tests/test_table.py b/Orange/tests/test_table.py index a9b237259ea..a86e27ba82a 100644 --- a/Orange/tests/test_table.py +++ b/Orange/tests/test_table.py @@ -2068,6 +2068,24 @@ def test_attributes_copied(self): # attributes dict of old table not be changed since new dist is a copy self.assertDictEqual(self.table.attributes, {"A": "Test", "B": []}) + def test_attributes_copied_once(self): + A = Mock() + A.__deepcopy__ = Mock() + self.table.attributes = {"A": A} + + # a single direct transformation + self.table.from_table(self.table.domain, self.table) + self.assertEqual(1, A.__deepcopy__.call_count) + A.__deepcopy__.reset_mock() + + # hierarchy of transformations + ndom = Domain([a.copy(compute_value=lambda x: x.transform(Domain([a]))) + for a in self.table.domain.attributes]) + self.table.from_table(ndom, self.table) + self.assertEqual(1, A.__deepcopy__.call_count) + # HISTORIC: before only the outermost transformation deepcopied the + # attributes, here were 23 calls to __deepcopy__ instead of 1 + def isspecial(s): return isinstance(s, slice) or s is Ellipsis From 4460fbb1ca5c0c010826cc96f5cab6fcd0af7fb9 Mon Sep 17 00:00:00 2001 From: Marko Toplak Date: Mon, 8 Jul 2024 11:09:38 +0200 Subject: [PATCH 5/5] table.attributes are always referenced in transformations --- Orange/data/table.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Orange/data/table.py b/Orange/data/table.py index 076c84cd035..c82f3e10271 100644 --- a/Orange/data/table.py +++ b/Orange/data/table.py @@ -835,8 +835,9 @@ def from_table(cls, domain, source, row_indices=...): self.W = source.W[row_indices] self.name = getattr(source, 'name', '') self.ids = source.ids[row_indices] + self.attributes = getattr(source, 'attributes', {}) if new_cache: # only deepcopy attributes for the outermost transformation - self.attributes = deepcopy(getattr(source, 'attributes', {})) + self.attributes = deepcopy(self.attributes) _idcache_save(_thread_local.conversion_cache, (domain, source), self) return self finally: @@ -895,8 +896,9 @@ def from_table_rows(cls, source, row_indices): self.W = source.W[row_indices] self.name = getattr(source, 'name', '') self.ids = source.ids[row_indices] + self.attributes = getattr(source, 'attributes', {}) if is_outermost_transformation: - self.attributes = deepcopy(getattr(source, 'attributes', {})) + self.attributes = deepcopy(self.attributes) return self @classmethod