Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Minor version updates of supporting libraries #1037

Merged
merged 2 commits into from
Nov 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions hed/schema/hed_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,9 +266,8 @@ def get_as_xml_string(self, save_merged=True):
def get_as_dataframes(self, save_merged=False):
""" Get a dict of dataframes representing this file

save_merged: bool
If True, this will save the schema as a merged schema if it is a "withStandard" schema.
If it is not a "withStandard" schema, this setting has no effect.
Parameters:
save_merged (bool): If True, returns DFs as if merged with standard.

Returns:
dataframes(dict): a dict of dataframes you can load as a schema
Expand Down
3 changes: 2 additions & 1 deletion hed/schema/hed_schema_df_constants.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from hed.schema.hed_schema_constants import HedSectionKey
from hed.schema import hed_schema_constants

# Known tsv format suffixes
KEY_COLUMN_NAME = 'rdfs.label'

# Known tsv format suffixes
STRUCT_KEY = "Structure"
TAG_KEY = "Tag"
UNIT_KEY = "Unit"
Expand Down
41 changes: 41 additions & 0 deletions hed/schema/schema_io/df_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,47 @@
UNKNOWN_LIBRARY_VALUE = 0


def merge_dataframe_dicts(df_dict1, df_dict2, key_column=constants.KEY_COLUMN_NAME):
""" Create a new dictionary of DataFrames where dict2 is merged into dict1.

Does not validate contents or suffixes.

Parameters:
df_dict1(dict of str: df.DataFrame): dataframes to use as destination merge.
df_dict2(dict of str: df.DataFrame): dataframes to use as a merge element.
key_column(str): name of the column that is treated as the key when dataframes are merged
"""

result_dict = {}
all_keys = set(df_dict1.keys()).union(set(df_dict2.keys()))

for key in all_keys:
if key in df_dict1 and key in df_dict2:
result_dict[key] = _merge_dataframes(df_dict1[key], df_dict2[key], key_column)
elif key in df_dict1:
result_dict[key] = df_dict1[key]
else:
result_dict[key] = df_dict2[key]

return result_dict


def _merge_dataframes(df1, df2, key_column):
# Add columns from df2 that are not in df1, only for rows that are in df1

if df1.empty or df2.empty or key_column not in df1.columns or key_column not in df2.columns:
raise HedFileError(HedExceptions.BAD_COLUMN_NAMES,
f"Both dataframes to be merged must be non-empty had nave a '{key_column}' column", "")
df1 = df1.copy()
for col in df2.columns:
if col not in df1.columns and col != key_column:
df1 = df1.merge(df2[[key_column, col]], on=key_column, how='left')

# Fill missing values with ''
df1.fillna('', inplace=True)

return df1

def save_dataframes(base_filename, dataframe_dict):
""" Writes out the dataframes using the provided suffixes.

Expand Down
Loading