diff --git a/CHANGELOG.md b/CHANGELOG.md index 24602c890..c73a4d965 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,8 @@ - Fixed broken LaTeX PDF build of the docs. @oruebel (#669) - Fixed adding containers as a child to a parent container sometimes not marking the parent container as modified. @rly (#683) +- Fixed `to_hierarchcial_dataframe` failing when a table contains a `VectorIndex` column as a regular data column. + @oruebel (#666) ## HDMF 3.1.1 (July 29, 2021) diff --git a/src/hdmf/common/hierarchicaltable.py b/src/hdmf/common/hierarchicaltable.py index b5ca4db08..8322d2d73 100644 --- a/src/hdmf/common/hierarchicaltable.py +++ b/src/hdmf/common/hierarchicaltable.py @@ -101,7 +101,7 @@ def to_hierarchical_dataframe(dynamic_table): names=('source_table', 'label')) # Case 2: Our DynamicTableRegion columns points to another table with a DynamicTableRegion, i.e., - # we need to recursively resolve more levels of the table hieararchy + # we need to recursively resolve more levels of the table hierarchy else: # First we need to recursively flatten the hierarchy by calling 'to_hierarchical_dataframe()' # (i.e., this function) on the target of our hierarchical column @@ -134,6 +134,24 @@ def to_hierarchical_dataframe(dynamic_table): hcol_hdf.index.names) columns = hcol_hdf.columns + # Check if the index contains any unhashable types. If a table contains a VectorIndex column + # (other than the DynamicTableRegion column) then "TypeError: unhashable type: 'list'" will + # occur when converting the index to pd.MultiIndex. To avoid this error, we next check if any + # of the columns in our index are of type list or np.ndarray + unhashable_index_cols = [] + if len(index) > 0: + unhashable_index_cols = [i for i, v in enumerate(index[0]) if isinstance(v, (list, np.ndarray))] + + # If we have any unhashable list or np.array objects in the index then update them to tuples. + # Ideally we would detect this case when constructing the index, but it is easier to do this + # here and it should not be much more expensive, but it requires iterating over all rows again + if len(unhashable_index_cols) > 0: + for i, v in enumerate(index): + temp = list(v) + for ci in unhashable_index_cols: + temp[ci] = tuple(temp[ci]) + index[i] = tuple(temp) + # Construct the pandas dataframe with the hierarchical multi-index multi_index = pd.MultiIndex.from_tuples(index, names=index_names) out_df = pd.DataFrame(data=data, index=multi_index, columns=columns) diff --git a/tests/unit/common/test_linkedtables.py b/tests/unit/common/test_linkedtables.py index 48a9fd6a0..09680d4c2 100644 --- a/tests/unit/common/test_linkedtables.py +++ b/tests/unit/common/test_linkedtables.py @@ -438,6 +438,58 @@ def test_to_hierarchical_dataframe_indexed_dtr_on_last_level(self): self.assertListEqual(hier_df[('aligned_table', ('level0_0', 'tags'))].to_list(), [['tag1'], ['tag2'], ['tag2', 'tag1']]) + def test_to_hierarchical_dataframe_indexed_data_nparray(self): + # Test that we can convert a table that contains a VectorIndex column as regular data, + # i.e., it is not our DynamicTableRegion column that is index but a regular data column. + # In this test the data is defined as an numpy nd.array so that an nd.array is injected + # into the MultiIndex of the table. As a numpy array is not hashable this would normally + # create an error when creating the MultiIndex + # Parent table + dtr_p1 = DynamicTableRegion(name='l1', description='l1', data=np.arange(4), table=self.aligned_table) + vi_dtr_p1 = VectorIndex(name='sl1_index', data=[1, 2, 3], target=dtr_p1) + p1 = DynamicTable(name='parent_table', description='parent_table', + columns=[VectorData(name='p1', description='p1', data=np.arange(3)), dtr_p1, vi_dtr_p1]) + # Super-parent table + dtr_sp = DynamicTableRegion(name='sl1', description='sl1', data=np.arange(3), table=p1) + spt = DynamicTable(name='super_parent_table', description='super_parent_table', + columns=[VectorData(name='sp1', description='sp1', data=np.arange(3)), dtr_sp]) + spt.add_column(name='vic', description='vic', data=np.arange(9), index=[2, 4, 6]) + hier_df = to_hierarchical_dataframe(spt).reset_index() + expected_columns = [('super_parent_table', 'id'), ('super_parent_table', 'sp1'), ('super_parent_table', 'vic'), + ('parent_table', 'id'), ('parent_table', 'p1'), + ('aligned_table', 'id'), + ('aligned_table', ('aligned_table', 'a1')), ('aligned_table', ('level0_0', 'id')), + ('aligned_table', ('level0_0', 'tags')), ('aligned_table', ('level0_0', 'myid'))] + self.assertListEqual(hier_df.columns.to_list(), expected_columns) # make sure we have the right columns + self.assertListEqual(hier_df[('aligned_table', ('level0_0', 'tags'))].to_list(), + [['tag1'], ['tag2'], ['tag2', 'tag1']]) + + def test_to_hierarchical_dataframe_indexed_data_list(self): + # Test that we can convert a table that contains a VectorIndex column as regular data, + # i.e., it is not our DynamicTableRegion column that is index but a regular data column. + # In this test the data is defined as an list so that a list is injected + # into the MultiIndex of the table. As a list is not hashable this would normally + # create an error when creating the MultiIndex + # Parent table + dtr_p1 = DynamicTableRegion(name='l1', description='l1', data=np.arange(4), table=self.aligned_table) + vi_dtr_p1 = VectorIndex(name='sl1_index', data=[1, 2, 3], target=dtr_p1) + p1 = DynamicTable(name='parent_table', description='parent_table', + columns=[VectorData(name='p1', description='p1', data=np.arange(3)), dtr_p1, vi_dtr_p1]) + # Super-parent table + dtr_sp = DynamicTableRegion(name='sl1', description='sl1', data=np.arange(3), table=p1) + spt = DynamicTable(name='super_parent_table', description='super_parent_table', + columns=[VectorData(name='sp1', description='sp1', data=np.arange(3)), dtr_sp]) + spt.add_column(name='vic', description='vic', data=list(range(9)), index=list([2, 4, 6])) + hier_df = to_hierarchical_dataframe(spt).reset_index() + expected_columns = [('super_parent_table', 'id'), ('super_parent_table', 'sp1'), ('super_parent_table', 'vic'), + ('parent_table', 'id'), ('parent_table', 'p1'), + ('aligned_table', 'id'), + ('aligned_table', ('aligned_table', 'a1')), ('aligned_table', ('level0_0', 'id')), + ('aligned_table', ('level0_0', 'tags')), ('aligned_table', ('level0_0', 'myid'))] + self.assertListEqual(hier_df.columns.to_list(), expected_columns) # make sure we have the right columns + self.assertListEqual(hier_df[('aligned_table', ('level0_0', 'tags'))].to_list(), + [['tag1'], ['tag2'], ['tag2', 'tag1']]) + def test_to_hierarchical_dataframe_empty_tables(self): # Setup empty tables with the following hierarchy # super_parent_table ---> parent_table ---> child_table