Skip to content

Commit

Permalink
Add RegionMap.from_dataframe() and RegionMap.as_dict() methods (#32)
Browse files Browse the repository at this point in the history
* Add RegionMap.from_dataframe() method

* Convert nan to None and float to int when it is possible

* Add test for multiple root nodes

* Separate the dict creation from the RegionMap creation

* Move the dataframe_to_dict() to a protected free function

* Move _dataframe_to_dict to the end of the file

* Add RegionMap.as_dict() method

* Minor cleaning
  • Loading branch information
adrien-berchet authored Apr 3, 2024
1 parent 6522ae7 commit 1feef57
Show file tree
Hide file tree
Showing 3 changed files with 125 additions and 8 deletions.
3 changes: 2 additions & 1 deletion tests/data/region_map.json
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@
"graph_order": 3,
"st_level": null,
"hemisphere_id": 3,
"parent_structure_id": 567
"parent_structure_id": 567,
"children": []
}
]
}
Expand Down
51 changes: 44 additions & 7 deletions tests/test_region_map.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
import json
import os
from unittest.mock import mock_open, patch

import pytest

import voxcell.region_map as test_module
from voxcell.exceptions import VoxcellError

TEST_RMAP = test_module.RegionMap.from_dict({
'id': -1,
DATA_PATH = os.path.join(os.path.dirname(__file__), 'data')

TEST_RMAP_DICT = {
'id': 0,
'name': 'root',
'fullname': 'The Root Node',
'children': [
Expand All @@ -20,6 +23,7 @@
'id': 2,
'name': 'B',
'fullname': 'Bb',
'children': [],
},
{
'id': 3,
Expand All @@ -30,12 +34,15 @@
'id': 4,
'name': 'B',
'fullname': 'bB',
'children': [],
}
]
}
]}
]
})
}

TEST_RMAP = test_module.RegionMap.from_dict(TEST_RMAP_DICT)


def test_find_basic():
Expand Down Expand Up @@ -156,6 +163,16 @@ def test_from_dict_duplicate_id():
})


def test_as_dict():
res = TEST_RMAP.as_dict()
assert res == TEST_RMAP_DICT

with open(os.path.join(DATA_PATH, "region_map.json")) as f:
initial_dict = json.load(f)
rmap = test_module.RegionMap.from_dict(initial_dict)
assert rmap.as_dict() == initial_dict


def test_is_leaf_id():
assert TEST_RMAP.is_leaf_id(1) is False
assert TEST_RMAP.is_leaf_id(2) is True
Expand All @@ -165,23 +182,43 @@ def test_is_leaf_id():

def test_is_leaf_id_non_existing_id():
with pytest.raises(VoxcellError):
TEST_RMAP.is_leaf_id(0) # non-existing id
TEST_RMAP.is_leaf_id(9999) # non-existing id


def test_as_dataframe():
df = TEST_RMAP.as_dataframe()
assert df.loc[-1].parent_id == -1
assert df.loc[1].parent_id == -1
assert df.loc[0].parent_id == -1
assert df.loc[1].parent_id == 0
assert df.loc[2].parent_id == 1
assert df.loc[3].parent_id == 1
assert df.loc[4].parent_id == 3

assert df.loc[1]['name'] == 'A'
assert df.loc[1]['fullname'] == 'aA'

assert df.loc[-1].children_count == 1
assert df.loc[0].children_count == 1
assert df.loc[1].children_count == 2
assert df.loc[2].children_count == 0
assert df.loc[3].children_count == 1
assert df.loc[4].children_count == 0


def test_from_dataframe():
# Test with a simple RegionMap
rmap = test_module.RegionMap.from_dataframe(TEST_RMAP.as_dataframe())
assert rmap._data == TEST_RMAP._data
assert rmap._parent == TEST_RMAP._parent
assert rmap._children == TEST_RMAP._children

# Test with more complex data
initial_rmap = test_module.RegionMap.load_json(os.path.join(DATA_PATH, "region_map.json"))
final_rmap = test_module.RegionMap.from_dataframe(initial_rmap.as_dataframe())
assert final_rmap._data == initial_rmap._data
assert final_rmap._parent == initial_rmap._parent
assert final_rmap._children == initial_rmap._children

# Test with multiple root nodes
rmap_df = initial_rmap.as_dataframe()
rmap_df.loc[8, "parent_id"] = -1
with pytest.raises(RuntimeError):
test_module.RegionMap.from_dataframe(rmap_df)
79 changes: 79 additions & 0 deletions voxcell/region_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import logging
import re

import numpy as np
import pandas as pd

from voxcell.exceptions import VoxcellError
Expand Down Expand Up @@ -142,6 +143,17 @@ def as_dataframe(self):
ret.loc[:, 'children_count'] = [len(self._children[_id]) for _id in ret.index.to_list()]
return ret

@classmethod
def from_dataframe(cls, hierarchy_df):
"""Converts a DataFrame to a region_map.
Note: the 'root' node should have a parent value of -1.
Note: if it is possible to cast all non-null values of a column with float dtype to int,
then it will be done.
"""
return cls.from_dict(_dataframe_to_dict(hierarchy_df))

def _get(self, _id, attr):
"""Fetch attribute value for a given region ID."""
if _id not in self._data:
Expand Down Expand Up @@ -185,6 +197,28 @@ def include(data, parent_id):
include(copy.deepcopy(d), None)
return result

def as_dict(self):
"""Converts a region_map to a dict."""
root_idx = None
for k, v in self._parent.items():
if v is None:
root_idx = k
break

def create_node(key):
return copy.deepcopy(self._data[key])

def add_children(data, key):
data["children"] = []
for i in self._children[key]:
new_node = create_node(i)
add_children(new_node, i)
data["children"].append(new_node)

res = create_node(root_idx)
add_children(res, root_idx)
return res

@classmethod
def load_json(cls, filepath):
"""Construct RegionMap from JSON file.
Expand All @@ -201,3 +235,48 @@ def load_json(cls, filepath):
content = content['msg'][0]

return cls.from_dict(content)


def _dataframe_to_dict(hierarchy_df):
"""Use a dataframe to create a dict that can then be used by RegionMap.from_dict()."""
nodes = hierarchy_df.to_dict(orient="index")
float_cols = hierarchy_df.dtypes.loc[hierarchy_df.dtypes == float].index.to_list()
dropna_float_cols = {
float_col: hierarchy_df[float_col].dropna()
for float_col in float_cols
}
float_int_cols = {
float_col
for float_col, col in dropna_float_cols.items()
if (col.astype(int) == col).all()
}
root_idx = None
for k, v in nodes.items():
v["id"] = k
v.pop("children_count", None)
parent_id = v.pop("parent_id", None)
for float_col in float_cols:
if float_col in v:
if np.isnan(v[float_col]):
v[float_col] = None
elif float_col in float_int_cols:
v[float_col] = int(v[float_col])
if parent_id == -1:
if root_idx is not None:
msg = (
f"Only one node can be the root node with parent_id == -1 but the node "
f"{root_idx} was already defined as root"
)
raise RuntimeError(msg)
root_idx = k
if "children" not in v:
v["children"] = []
continue
parent_node = nodes[parent_id]
if "children" not in parent_node:
parent_node["children"] = []
parent_node["children"].append(v)

# Here the root element is extracted since each element is referenced at both the root of
# the dict and in the children of another element
return nodes[root_idx]

0 comments on commit 1feef57

Please sign in to comment.