From 35b4048f44412bb7e52669b29d08d5ba84552163 Mon Sep 17 00:00:00 2001 From: Kay Robbins <1189050+VisLab@users.noreply.github.com> Date: Fri, 2 Feb 2024 14:18:40 -0600 Subject: [PATCH] Many corrections to the remodeling tools documentation --- docs/source/FileRemodelingTools.md | 88 ++++++++++++++++-------------- docs/source/HedMatlabTools.md | 5 ++ 2 files changed, 51 insertions(+), 42 deletions(-) diff --git a/docs/source/FileRemodelingTools.md b/docs/source/FileRemodelingTools.md index d9b0fbc..01fb569 100644 --- a/docs/source/FileRemodelingTools.md +++ b/docs/source/FileRemodelingTools.md @@ -711,8 +711,7 @@ from the data file if the columns exist. "operation": "remove_columns", "description": "Remove unwanted columns prior to analysis", "parameters": { - "remove_names": ["value", "sample"], - "ignore_missing": true + "remove_names": ["value", "sample"] } } ] @@ -829,7 +828,7 @@ based on column values. | ------------ | ---- | ----------- | | *column_name* | str | The name of the column to be factored.| | *factor_values* | list | Column values to be included as factors. | -| *factor_names* | list| Column names for created factors. | +| *factor_names* | list| (**Optional**) Column names for created factors. | ``` If *column_name* is not a column in the data file, a `ValueError` is raised. @@ -841,8 +840,8 @@ If a specified value is missing in a particular file, the corresponding factor c If *factor_names* is empty, the newly created columns are of the form *column_name.factor_value*. Otherwise, the newly created columns have names *factor_names*. -If *factor_names* is not empty, then *factor_values* must also be specified and -both lists must be of the same length. +If *factor_names* is not empty, then *factor_values* must also be specified +and both lists must be of the same length. (factor-column-example-anchor)= #### Factor column example @@ -906,9 +905,9 @@ The [**HED search guide**](./HedSearchGuide.md) tutorial discusses the HED searc | Parameter | Type | Description | | ------------ | ---- | ----------- | | *queries* | list | A list of HED query strings. | -| *query_names* | list | A list of names for the resulting factor columns generated by the queries. | -| *remove_types* | list | Structural HED tags to be removed (usually `Condition-variable` and `Task`). | -| *expand_context* | bool | (Optional) Expand the context and remove `Onse` and`Offset` tags before the query. | +| *query_names* | list | (**Optional**) A list of names for the factor columns generated by the queries. | +| *remove_types* | list | (**Optional**) Structural HED tags to be removed (usually `Condition-variable` and `Task`). | +| *expand_context* | bool | (**Optional**: default True) Expand the context and remove
`Onset` and`Offset` tags before the query. | ``` The *query_names* list, which must be empty or the same length as *queries*, @@ -916,7 +915,10 @@ contains the names of the factor columns produced by the search. If the *query_names* list is empty, the result columns are titled "query_1", "query_2", etc. -The *remove_types* and *expand_context* are not yet implemented, and hence ignored in the current release. +Most of the time the *remove_types* should be set to `["Condition-variable", "Task"]` and the effects of +the experimental design captured using the `factor_hed_types_op`. +If *expand_context* is set to *false*, the additional context provided by `Onset`, `Offset`, and `Duration` +is ignored. (factor-hed-tags-example-anchor)= #### Factor HED tags example @@ -936,7 +938,7 @@ The resulting factor columns are named *correct* and *incorrect*, respectively. "parameters": { "queries": ["correct-action", "incorrect-action"], "query_names": ["correct", "incorrect"], - "remove_types": [], + "remove_types": ["Condition-variable", "Task"], "expand_context": false } }] @@ -986,8 +988,10 @@ For additional information on how to encode experimental designs using HED, see | Parameter | Type | Description | | ------------ | ---- | ----------- | | *type_tag* | str | HED tag used to find the factors (most commonly *Condition-variable*).| -| *type_values* | list | Values to factor for the *type_tag*.
If empty, all values of that *type_tag* are used. | +| *type_values* | list | (**Optional**) Values to factor for the *type_tag*.
If omitted, all values of that *type_tag* are used. | ``` +The event context (as defined by onsets, offsets and durations) is always expanded and one-hot (0's and 1's) +encoding is used for the factors. (factor-hed-type-example-anchor)= #### Factor HED type example @@ -1006,8 +1010,7 @@ applies and 0's otherwise. "operation": "factor_hed_type", "description": "Factor based on the sex of the images being presented.", "parameters": { - "type_tag": "Condition-variable", - "type_values": [] + "type_tag": "Condition-variable" } }] ``` @@ -1047,9 +1050,9 @@ duration updated to encompass the temporal extent of the merged events. | ------------ | ---- | ----------- | | *column_name* | str | The name of the column which is the basis of the merge.| | *event_code* | str, int, float | The value in *column_name* that triggers the merge. | -| *match_columns* | list | Columns whose values must match to collapse events. | | *set_durations* | bool | If true, set durations based on merged events. | -| *ignore_missing* | bool | If true, missing *column_name* or *match_columns* do not raise an error. | +| *ignore_missing* | bool | If true, missing *column_name* or *match_columns* do not raise an error. | +| *match_columns* | list | (**Optional**) Columns whose values must match to collapse events. | ``` The first of the group of rows (each representing an event) to be merged is called the anchor @@ -1088,9 +1091,9 @@ have the same values to be merged into a single event. "parameters": { "column_name": "trial_type", "event_code": "succesful_stop", - "match_columns": ["stop_signal_delay", "response_hand", "sex"], "set_durations": true, - "ignore_missing": true + "ignore_missing": true, + "match_columns": ["stop_signal_delay", "response_hand", "sex"] } }] ``` @@ -1161,7 +1164,7 @@ Remapping can be used to convert the column containing these codes into one or m | *destination_columns* | list | A list of *n* names of the destination columns for the map. | | *map_list* | list | A list of mappings. Each element is a list of *m* source
column values followed by *n* destination values.
Mapping source values are treated as strings. | | *ignore_missing* | bool | If false, source column values not in the map generate "n/a"
destination values instead of errors. | -| *integer_sources* | list | [**Optional**] A list of source columns that are integers.
The *integer_sources* must be a subset of *source_columns*. | +| *integer_sources* | list | (**Optional**) A list of source columns that are integers.
The *integer_sources* must be a subset of *source_columns*. | ``` A column cannot be both a source and a destination, and all source columns must be present in the data files. @@ -1169,7 +1172,7 @@ New columns are created for destination columns that are missing from a data fil The *remap_columns* operation only works for columns containing strings or integers, as it is meant for remapping categorical codes. -You must specify the which source columns contain integers so that `n/a` values +You must specify which source columns contain integers so that `n/a` values can be handled appropriately. The *map_list* parameter specifies how each unique combination of values from the source @@ -1490,6 +1493,7 @@ The results of executing the previous *reorder_columns* transformation on the The *split_rows* operation is often used to convert event files from trial-level encoding to event-level encoding. +This operation is meant only for tabular files that have `onset` and `duration` columns. In **trial-level** encoding, all the events in a single trial (usually some variation of the cue-stimulus-response-feedback-ready sequence) @@ -1515,7 +1519,6 @@ In this case a trial consists of a sequence of multiple events. ``` - The *split_rows* operation requires an *anchor_column*, which could be an existing column or a new column to be appended to the data. The purpose of the *anchor_column* is to hold the codes for the new events. @@ -1651,7 +1654,7 @@ all summaries. | ------------ | ---- | ----------- | | *summary_name* | str | A unique name used to identify this summary.| | *summary_filename* | str | A unique file basename to use for saving this summary. | -| *append_timecode* | bool | (Optional) If True, append a time code to filename.
False is the default. | +| *append_timecode* | bool | (**Optional**: Default false) If true, append a time code to filename. | ``` (summarize-column-names-example-anchor)= @@ -1730,11 +1733,11 @@ The following table lists the parameters required for using the summary. | ------------ | ---- | ----------- | | *summary_name* | str | A unique name used to identify this summary.| | *summary_filename* | str | A unique file basename to use for saving this summary. | -| *skip_columns* | list | A list of column names to omit from the summary.| -| *value_columns* | list | A list of columns to omit the listing unique values. | -| *append_timecode* | bool | (Optional) If True, append a time code to filename.
False is the default.| -| *max_categorical* | int | (Optional) If given, the text summary shows top *max_categorical* values.
Otherwise the text summary displays all categorical values.| -| *values_per_line* | bool | (Optional) If given, the text summary displays this
number of values per line (default is 5).| +| *append_timecode* | bool | (**Optional**: Default false) If True, append a time code to filename. | +| *max_categorical* | int | (**Optional**: Default 50) If given, the text summary shows top *max_categorical* values.
Otherwise the text summary displays all categorical values.| +| *skip_columns* | list | (**Optional**) A list of column names to omit from the summary.| +| *value_columns* | list | (**Optional**) A list of columns to omit the listing unique values. | +| *values_per_line* | int | (**Optional**: Default 5) If given, the text summary displays this
number of values per line (default is 5).| ``` @@ -1866,10 +1869,11 @@ The following table lists the parameters required for using the summary. | ------------ | ---- | ----------- | | *summary_name* | str | A unique name used to identify this summary.| | *summary_filename* | str | A unique file basename to use for saving this summary. | -| *append_timecode* | bool | (Optional) If True, append a time code to filename.
False is the default.| +| *append_timecode* | bool | (**Optional**: Default false) If true, append a time code to filename. | ``` -The *summarize_definitions* is mainly meant for verifying consistency in unknown `Def-expand` tags. This comes up where you have an assembled dataset, but no longer have the definitions stored (or never created them to begin with). +The *summarize_definitions* is mainly meant for verifying consistency in unknown `Def-expand` tags. +This comes up where you have an assembled dataset, but no longer have the definitions stored (or never created them to begin with). (summarize-definitions-example-anchor)= @@ -2029,10 +2033,10 @@ The *summarize_hed_tags* operation has the two required parameters | *summary_name* | str | A unique name used to identify this summary.| | *summary_filename* | str | A unique file basename to use for saving this summary. | | *tags* | dict | Dictionary with category title keys and tags in that category as values. | -| *append_timecode* | bool | (Optional) If True, append a time code to filename.
False is the default.| -| *include_context* | bool | (Optional) If true, expand the event context to
account for onsets and offsets. | -| *replace_defs* | bool | (Optional) If true, the `Def` tags are replaced with the
contents of the definition (no `Def` or `Def-expand`). | -| *remove_types* | list | (Optional) A list of types (such as `Condition-variable` and `Task` to remove. | +| *append_timecode* | bool | (**Optional**: Default false) If true, append a time code to filename. | +| *include_context* | bool | (**Optional**: Default true) If true, expand the event context to
account for onsets and offsets. | +| *replace_defs* | bool | (**Optional**: Default true) If true, the `Def` tags are replaced with the
contents of the definition (no `Def` or `Def-expand`). | +| *remove_types* | list | (**Optional**) A list of types such as `Condition-variable` and `Task` to remove. | ``` The *tags* dictionary has keys that specify how the user wishes the tags @@ -2159,7 +2163,7 @@ This summary provides useful information about experimental design. | *summary_name* | str | A unique name used to identify this summary.| | *summary_filename* | str | A unique file basename to use for saving this summary. | | *type_tag* | str | Tag to produce a summary for (most often *condition-variable*).| -| *append_timecode* | bool | (Optional) If True, append a time code to filename.
False is the default.| +| *append_timecode* | bool | (**Optional**: Default false) If true, append a time code to filename.| ``` In addition to the two standard parameters (*summary_name* and *summary_filename*), the *type_tag* parameter is required. @@ -2251,8 +2255,8 @@ If *check_for_warnings* is false, the summary will not report warnings. | ------------ | ---- | ----------- | | *summary_name* | str | A unique name used to identify this summary.| | *summary_filename* | str | A unique file basename to use for saving this summary. | -| *append_timecode* | bool | (Optional) If True, append a time code to filename.
False is the default.| -| *check_for_warnings* | bool | (Optional) If true, warnings are reported in addition to errors.
False is the default.| +| *append_timecode* | bool | (**Optional**: Default false) If true, append a time code to filename. | +| *check_for_warnings* | bool | (**Optional**: Default false) If true, warnings are reported in addition to errors. | ``` The *summarize_hed_validation* is a HED operation and the calling program must provide a HED schema version and usually a JSON sidecar containing the HED annotations. @@ -2622,13 +2626,13 @@ since the names specified in the first parameter are meant to represent the quer The check only takes place if `query_names` exists, since naming is handled automatically otherwise. ```python - @staticmethod - def validate_input_data(parameters): - errors = [] - if parameters.get("query_names", False): - if len(parameters.get("query_names")) != len(parameters.get("queries")): - errors.append("The list in query_names, in the factor_hed_tags operation, should have the same number of items as queries.") - return errors +@staticmethod +def validate_input_data(parameters): + errors = [] + if parameters.get("query_names", False): + if len(parameters.get("query_names")) != len(parameters.get("queries")): + errors.append("The list in query_names, in the factor_hed_tags operation, should have the same number of items as queries.") + return errors ``` diff --git a/docs/source/HedMatlabTools.md b/docs/source/HedMatlabTools.md index 01b637d..53c6b63 100644 --- a/docs/source/HedMatlabTools.md +++ b/docs/source/HedMatlabTools.md @@ -595,10 +595,15 @@ Python may be installed in your user space or in system space for all users. - You may want to add the location of the Python executable to your PATH. (Most installers give you that option as part of the installation.) +#### Installing in a virtual environment +https://www.mathworks.com/support/search.html/answers/1750425-python-virtual-environments-with-python-interface.html?fq%5B%5D=asset_type_name:answer&page=1 (step-3-connect-python-to-matlab-anchor)= #### Step 3: Connect Python to Matlab + +C:\Users\username\AppData\Local\Programs\Python\python -m venv C:\Users\username\py38 + Setting the Python version uses the MATLAB `pyenv` function with the `'Version'` argument as illustrated by the following example.