Skip to content

Commit

Permalink
Updated the assemble to allow append option
Browse files Browse the repository at this point in the history
  • Loading branch information
VisLab committed Nov 21, 2024
1 parent d1bb820 commit 56bea86
Show file tree
Hide file tree
Showing 16 changed files with 15,145 additions and 212 deletions.
5 changes: 2 additions & 3 deletions hed/schema/hed_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,9 +266,8 @@ def get_as_xml_string(self, save_merged=True):
def get_as_dataframes(self, save_merged=False):
""" Get a dict of dataframes representing this file
save_merged: bool
If True, this will save the schema as a merged schema if it is a "withStandard" schema.
If it is not a "withStandard" schema, this setting has no effect.
Parameters:
save_merged (bool): If True, returns DFs as if merged with standard.
Returns:
dataframes(dict): a dict of dataframes you can load as a schema
Expand Down
3 changes: 2 additions & 1 deletion hed/schema/hed_schema_df_constants.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from hed.schema.hed_schema_constants import HedSectionKey
from hed.schema import hed_schema_constants

# Known tsv format suffixes
KEY_COLUMN_NAME = 'rdfs.label'

# Known tsv format suffixes
STRUCT_KEY = "Structure"
TAG_KEY = "Tag"
UNIT_KEY = "Unit"
Expand Down
41 changes: 41 additions & 0 deletions hed/schema/schema_io/df_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,47 @@
UNKNOWN_LIBRARY_VALUE = 0


def merge_dataframe_dicts(df_dict1, df_dict2, key_column=constants.KEY_COLUMN_NAME):
""" Create a new dictionary of DataFrames where dict2 is merged into dict1.
Does not validate contents or suffixes.
Parameters:
df_dict1(dict of str: df.DataFrame): dataframes to use as destination merge.
df_dict2(dict of str: df.DataFrame): dataframes to use as a merge element.
key_column(str): name of the column that is treated as the key when dataframes are merged
"""

result_dict = {}
all_keys = set(df_dict1.keys()).union(set(df_dict2.keys()))

for key in all_keys:
if key in df_dict1 and key in df_dict2:
result_dict[key] = _merge_dataframes(df_dict1[key], df_dict2[key], key_column)
elif key in df_dict1:
result_dict[key] = df_dict1[key]
else:
result_dict[key] = df_dict2[key]

return result_dict


def _merge_dataframes(df1, df2, key_column):
# Add columns from df2 that are not in df1, only for rows that are in df1

if df1.empty or df2.empty or key_column not in df1.columns or key_column not in df2.columns:
raise HedFileError(HedExceptions.BAD_COLUMN_NAMES,
f"Both dataframes to be merged must be non-empty had nave a '{key_column}' column", "")
df1 = df1.copy()
for col in df2.columns:
if col not in df1.columns and col != key_column:
df1 = df1.merge(df2[[key_column, col]], on=key_column, how='left')

# Fill missing values with ''
df1.fillna('', inplace=True)

return df1

def save_dataframes(base_filename, dataframe_dict):
""" Writes out the dataframes using the provided suffixes.
Expand Down
Loading

0 comments on commit 56bea86

Please sign in to comment.