Skip to content

Commit

Permalink
Updating word cloud visualization in HED tag summary to have parameters
Browse files Browse the repository at this point in the history
  • Loading branch information
VisLab committed Feb 19, 2024
1 parent 9766bea commit a91a4dd
Show file tree
Hide file tree
Showing 5 changed files with 95 additions and 59 deletions.
35 changes: 20 additions & 15 deletions hed/tools/remodeling/dispatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ def __init__(self, operation_list, data_root=None,
:raises ValueError:
- If any of the operations cannot be parsed correctly.
"""

self.data_root = data_root
self.backup_name = backup_name
self.backup_man = None
Expand All @@ -60,7 +60,6 @@ def get_summaries(self, file_formats=['.txt', '.json']):
Returns:
list: A list of dictionaries of summaries keyed to filenames.
"""

summary_list = []
Expand Down Expand Up @@ -101,9 +100,9 @@ def get_data_file(self, file_designator):
In this case, the corresponding backup file is read and returned.
- If a string is passed and there is no backup manager,
the data file corresponding to the file_designator is read and returned.
- If a Pandas DataFrame, return a copy.
- If a Pandas DataFrame, return a copy.
"""

if isinstance(file_designator, pd.DataFrame):
return file_designator.copy()
if self.backup_man:
Expand All @@ -126,7 +125,6 @@ def get_summary_save_dir(self):
:raises HedFileError:
- If this dispatcher does not have a data_root.
"""

if self.data_root:
Expand All @@ -143,7 +141,6 @@ def run_operations(self, file_path, sidecar=None, verbose=False):
Returns:
DataFrame: The processed dataframe.
"""

# string to functions
Expand Down Expand Up @@ -173,8 +170,8 @@ def save_summaries(self, save_formats=['.json', '.txt'], individual_summaries="s
- "consolidated" means that the overall summary and summaries of individual files are in one summary file.
- "individual" means that the summaries of individual files are in separate files.
- "none" means that only the overall summary is produced.
"""

if not save_formats:
return
if not summary_dir:
Expand All @@ -185,6 +182,15 @@ def save_summaries(self, save_formats=['.json', '.txt'], individual_summaries="s

@staticmethod
def parse_operations(operation_list):
""" Return a parsed a list of remodeler operations.
Parameters:
operation_list (list): List of JSON remodeler operations.
Returns:
list: List of Python objects containing parsed remodeler operations.
"""

operations = []
for index, item in enumerate(operation_list):
new_operation = valid_operations[item["operation"]](item["parameters"])
Expand All @@ -197,24 +203,24 @@ def prep_data(df):
Parameters:
df (DataFrame) - The DataFrame to be processed.
"""

result = df.replace('n/a', np.nan)
# Comment in the next line if this behavior was actually needed, but I don't think it is.
# result = result.infer_objects(copy=False)
return result

@staticmethod
def post_proc_data(df):
""" Replace all nan entries with 'n/a' for BIDS compliance
""" Replace all nan entries with 'n/a' for BIDS compliance.
Parameters:
df (DataFrame): The DataFrame to be processed.
Returns:
DataFrame: DataFrame with the 'np.NAN replaced by 'n/a'
DataFrame: DataFrame with the 'np.NAN replaced by 'n/a'.
"""

dtypes = df.dtypes.to_dict()
for col_name, typ in dtypes.items():
if typ == 'category':
Expand All @@ -232,10 +238,9 @@ def errors_to_str(messages, title="", sep='\n'):
Returns:
str: Single string representing the messages.
"""
error_list = [0]*len(messages)

error_list = [0] * len(messages)
for index, message in enumerate(messages):
error_list[index] = f"Operation[{message.get('index', None)}] " + \
f"has error:{message.get('error_type', None)}" + \
Expand All @@ -255,8 +260,8 @@ def get_schema(hed_versions):
Returns:
HedSchema or HedSchemaGroup: Objects loaded from the hed_versions specification.
"""

if not hed_versions:
return None
elif isinstance(hed_versions, str) or isinstance(hed_versions, list):
Expand Down
83 changes: 71 additions & 12 deletions hed/tools/remodeling/operations/summarize_hed_tags_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,43 @@ class SummarizeHedTagsOp(BaseOp):
"type": "boolean"
},
"word_cloud": {
"type": "boolean"
"type": "object",
"properties": {
"height": {
"type": "integer"
},
"width": {
"type": "integer"
},
"prefer_horizontal": {
"type": "number"
},
"min_font_size": {
"type": "number"
},
"max_font_size": {
"type": "number"
},
"scale_adjustment": {
"type": "number"
},
"contour_width": {
"type": "number"
},
"contour_color": {
"type": "string"
},
"background_color": {
"type": "string"
},
"use_mask": {
"type": "boolean"
},
"mask_path": {
"type": "string"
}
},
"additionalProperties": False
},
},
"required": [
Expand Down Expand Up @@ -102,7 +138,26 @@ def __init__(self, parameters):
self.include_context = parameters.get('include_context', True)
self.replace_defs = parameters.get("replace_defs", True)
self.remove_types = parameters.get("remove_types", [])
self.word_cloud = parameters.get("word_cloud", False)
if "word_cloud" not in parameters:
self.word_cloud = None
else:
wc_params = parameters["word_cloud"]
self.word_cloud = {
"height": wc_params.get("height", 300),
"width": wc_params.get("width", 400),
"prefer_horizontal": wc_params.get("prefer_horizontal", 0.75),
"min_font_size": wc_params.get("min_font_size", 8),
"max_font_size": wc_params.get("max_font_size", 15),
"scale_adjustment": wc_params.get("scale_adjustment", 7),
"contour_width": wc_params.get("contour_width", 3),
"contour_color": wc_params.get("contour_color", 'black'),
"background_color": wc_params.get("background_color", None),
"use_mask": wc_params.get("use_mask", False),
"mask_path": wc_params.get("mask_path", None)
}
if self.word_cloud["use_mask"] and not self.word_cloud["mask_path"]:
self.word_cloud["mask_path"] = os.path.realpath(os.path.join(os.path.dirname(__file__),
'../../../resources/word_cloud_brain_mask.png'))

def do_op(self, dispatcher, df, name, sidecar=None):
""" Summarize the HED tags present in the dataset.
Expand Down Expand Up @@ -144,6 +199,7 @@ def __init__(self, sum_op):
sum_op (BaseOp): Operation associated with this summary.
"""

super().__init__(sum_op)
self.sum_op = sum_op

Expand Down Expand Up @@ -237,31 +293,34 @@ def save_visualizations(self, save_dir, file_formats=['.svg'], individual_summar
"""
if not self.sum_op.word_cloud:
return
else:
wc = self.sum_op.word_cloud
# summary = self.get_summary(individual_summaries='none')
summary = self.get_summary(individual_summaries='none')
overall_summary = summary.get("Dataset", {})
overall_summary = overall_summary.get("Overall summary", {})
specifics = overall_summary.get("Specifics", {})
word_dict = self.summary_to_dict(specifics)
width = 400
height = 300
mask_path = os.path.realpath(os.path.join(os.path.dirname(__file__),
'../../../resources/word_cloud_brain_mask.png'))
tag_wc = create_wordcloud(word_dict, mask_path=mask_path, width=width, height=height)
word_dict = self.summary_to_dict(specifics, scale_adjustment=wc["scale_adjustment"])

tag_wc = create_wordcloud(word_dict, mask_path=wc["mask_path"], width=wc["width"], height=wc["height"],
prefer_horizontal=wc["prefer_horizontal"], background_color=wc["background_color"],
min_font_size=wc["min_font_size"], max_font_size=wc["max_font_size"],
contour_width=wc["contour_width"], contour_color=wc["contour_color"])
svg_data = word_cloud_to_svg(tag_wc)
cloud_filename = os.path.realpath(os.path.join(save_dir, self.op.summary_name, '_word_cloud.svg'))
with open(cloud_filename, "w") as outfile:
outfile.writelines(svg_data)

@staticmethod
def summary_to_dict(specifics, transform=np.log10, adjustment=7):
def summary_to_dict(specifics, transform=np.log10, scale_adjustment=7):
"""Convert a HedTagSummary json specifics dict into the word cloud input format.
Parameters:
specifics(dict): Dictionary with keys "Main tags" and "Other tags".
transform(func): The function to transform the number of found tags.
Default log10
adjustment(int): Value added after transform.
scale_adjustment(int): Value added after transform.
Returns:
word_dict(dict): a dict of the words and their occurrence count.
Expand All @@ -278,10 +337,10 @@ def transform(x):
if tag == "Exclude tags":
continue
for tag_sub_dict in tag_sub_list:
word_dict[tag_sub_dict['tag']] = transform(tag_sub_dict['events']) + adjustment
word_dict[tag_sub_dict['tag']] = transform(tag_sub_dict['events']) + scale_adjustment
other_dict = specifics.get("Other tags", [])
for tag_sub_list in other_dict:
word_dict[tag_sub_list['tag']] = transform(tag_sub_list['events']) + adjustment
word_dict[tag_sub_list['tag']] = transform(tag_sub_list['events']) + scale_adjustment
return word_dict

@staticmethod
Expand Down
2 changes: 1 addition & 1 deletion hed/tools/visualization/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" Visualization tools for HED. """

from .tag_word_cloud import create_wordcloud, summary_to_dict, word_cloud_to_svg
from .tag_word_cloud import create_wordcloud, word_cloud_to_svg

32 changes: 2 additions & 30 deletions hed/tools/visualization/tag_word_cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ def create_wordcloud(word_dict, mask_path=None, background_color=None, width=400
kwargs.setdefault('color_func', default_color_func)
kwargs.setdefault('relative_scaling', 1)
kwargs.setdefault('max_font_size', height / 20)
kwargs.setdefault('min_font_size', 8)
kwargs.setdefault('min_font_size', 8),


wc = WordCloud(background_color=background_color, mask=mask_image,
width=width, height=height, mode="RGBA", **kwargs)
Expand All @@ -66,35 +67,6 @@ def word_cloud_to_svg(wc):
return svg_string


def summary_to_dict(summary, transform=np.log10, adjustment=5):
"""Convert a HedTagSummary JSON dict into the word cloud input format.
Parameters:
summary(dict): The summary from a SummarizeHedTagsOp.
transform(func): The function to transform the number of found tags (Default log10).
adjustment(int): Value added after transform.
Returns:
word_dict(dict): A dict of the words and their occurrence count.
:raises KeyError:
A malformed dictionary was passed.
"""
if transform is None:
def transform(x):
return x
overall_summary = summary.get("Overall summary", {})
specifics = overall_summary.get("Specifics", {})
tag_dict = specifics.get("Main tags", {})
word_dict = {}
for tag_sub_list in tag_dict.values():
for tag_sub_dict in tag_sub_list:
word_dict[tag_sub_dict['tag']] = transform(tag_sub_dict['events']) + adjustment

return word_dict


def load_and_resize_mask(mask_path, width=None, height=None):
""" Load a mask image and resize it according to given dimensions.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ def test_convert_summary_to_word_dict(self):
}
expected_output = {'tag1': 5, 'tag2': 3, 'tag3': 7}

word_dict = HedTagSummary.summary_to_dict(summary_json, transform=None, adjustment=0)
word_dict = HedTagSummary.summary_to_dict(summary_json, transform=None, scale_adjustment=0)
self.assertEqual(word_dict, expected_output)


Expand Down

0 comments on commit a91a4dd

Please sign in to comment.