From de7a6508f1a562c4e1b238bd029f7137af6f1468 Mon Sep 17 00:00:00 2001 From: Wu-Jung Lee Date: Thu, 28 Mar 2024 02:30:48 +0800 Subject: [PATCH 1/3] docs: README and docs/index revision (#185) * separate out the goal and specific current dev for Echoview * small wording changes --- README.md | 22 +++++++++++++--------- docs/source/index.md | 20 ++++++++++++-------- 2 files changed, 25 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 5c95e8f9..6a318031 100644 --- a/README.md +++ b/README.md @@ -5,17 +5,21 @@ ![example workflow](https://github.com/OSOceanAcoustics/echoregions/actions/workflows/pytest.yml/badge.svg) -Echoregions is a tool that interfaces annotations from Echoview and masks for water column sonar data for Machine Learning (ML) developments. Manual annotations from Echoview are widely used in fisheries acoustics community for labeling the presence of different animal species, and the presence of bottoms on echograms. Echoregions is designed to be used as an intermediate software between Echoview annotation data products and conventional Python Machine Learning data products. The end goal for Echoregions is to allow the user to easily go from Echoview -> ML data products, and ML -> Echoview data products. Presently, the Echoview -> ML data products pipeline has been built. +Echoregions is a tool that interfaces annotations of water column sonar data with Machine Learning (ML) models. + +The annotations are typically regions indicating the presence of specific animal species or lines delineating ocean boundaries, such as the seafloor or sea surface, in the "echogram" (sonar images formed by echo returns). The interfacing functionalities operate in two directions: +- Annotation to ML: Parsing and organizing manual annotations for preparing training and test datasets for ML developments +- ML to annotation: Generating annotations from ML predictions that can be used for further downstream processing + +At present, functionalities in the Annotation to ML direction have been built for interfacing the Echoview software that is widely used in the fisheries acoustics community. We plan to add functionalities in the ML to Annotation direction in the near future. ## Functionalities As of now, Echoregions contains functions to: -- Read, organize, and store Echoview manual annotations +- Read, organize, and store Echoview manual annotations (regions and lines) - Create masks by combining the manual annotations and xarray water column sonar datasets generated by [Echopype](https://github.com/OSOceanAcoustics/echopype) -We plan to add additional functions to build the ML -> Echoview data products pipeline. This will allow the user to create bottom and region annotations from ML predictions and convert to a format that can be easily visualized and manipulated in Echoview. - -The underlying annotation data is stored as a Pandas dataframe, which allows users to leverage the powerful indexing and computational tools provided by Pandas. +Note that in Echoregions, the underlying annotation data is stored as a Pandas dataframe, which allows users to directly leverage the powerful indexing and computing functionalities provided by Pandas. ## Documentation @@ -23,16 +27,16 @@ Learn more about Echoregions functions in the documentation at https://echoregio See the [API documentation](https://echoregions.readthedocs.io/en/latest/api.html) for all of the classes and functions available in echoregions. +## Contributors + +Echoregions development is currently led by Caesar Tuguinay (@ctuguinay), with inputs from Wu-Jung Lee (@leewujung) and Valentina Staneva (@valentina-s). Kavin Nguyen (@ngkavin) contributed significantly to the initial version. + ## Acknowledgement We thank the NOAA Northwest Fisheries Science Center (NWFSC) Fisheries Engineering and Acoustics Team (FEAT) for supporting this project. NOAA_fisheries_logo -## Contributors - -Echoregions development is currently led by Caesar Tuguinay (@ctuguinay), with inputs from Wu-Jung Lee (@leewujung) and Valentina Staneva (@valentina-s). Kavin Nguyen (@ngkavin) contributed significantly to the initial version. - ## License Echoregions is licensed under the open source [Apache 2.0 license](https://opensource.org/licenses/Apache-2.0). diff --git a/docs/source/index.md b/docs/source/index.md index 74377fba..c29c6513 100644 --- a/docs/source/index.md +++ b/docs/source/index.md @@ -1,16 +1,24 @@ # Echoregions -Echoregions is a tool that interfaces annotations from Echoview and masks for water column sonar data for Machine Learning (ML) developments. Manual annotations from Echoview are widely used in fisheries acoustics community for labeling the presence of different animal species, and the presence of bottoms on echograms. Echoregions is designed to be used as an intermediate software between Echoview annotation data products and conventional Python Machine Learning data products. The end goal for Echoregions is to allow the user to easily go from Echoview -> ML data products, and ML -> Echoview data products. Presently, the Echoview -> ML data products pipeline has been built. +Echoregions is a tool that interfaces annotations of water column sonar data with Machine Learning (ML) models. + +The annotations are typically regions indicating the presence of specific animal species or lines delineating ocean boundaries, such as the seafloor or sea surface, in the "echogram" (sonar images formed by echo returns). The interfacing functionalities operate in two directions: +- Annotation to ML: Parsing and organizing manual annotations for preparing training and test datasets for ML developments +- ML to Annotation: Generating annotations from ML predictions that can be used for further downstream processing + +At present, functionalities in the Annotation to ML direction have been built for interfacing manual annotations from the Echoview software, which is widely used in the fisheries acoustics community. We plan to add functionalities in the ML to Annotation direction in the near future. ## Functionalities As of now, Echoregions contains functions to: -- Read, organize, and store Echoview manual annotations +- Read, organize, and store Echoview manual annotations (regions and lines) - Create masks by combining the manual annotations and xarray water column sonar datasets generated by [Echopype](https://github.com/OSOceanAcoustics/echopype) -We plan to add additional functions to build the ML -> Echoview data products pipeline. This will allow the user to create bottom and region annotations from ML predictions and convert to a format that can be easily visualized and manipulated in Echoview. +Note that in Echoregions, the underlying annotation data is stored as a Pandas dataframe, which allows users to directly leverage the powerful indexing and computing functionalities provided by Pandas. -The underlying annotation data is stored as a Pandas dataframe, which allows users to leverage the powerful indexing and computational tools provided by Pandas. +## Contributors + +Echoregions development is currently led by Caesar Tuguinay (@ctuguinay), with inputs from Wu-Jung Lee (@leewujung) and Valentina Staneva (@valentina-s). Kavin Nguyen (@ngkavin) contributed significantly to the initial version. ## Acknowledgement @@ -22,10 +30,6 @@ We thank the NOAA Northwest Fisheries Science Center (NWFSC) Fisheries Engineeri ``` -## Contributors - -Echoregions development is currently led by Caesar Tuguinay (@ctuguinay), with inputs from Wu-Jung Lee (@leewujung) and Valentina Staneva (@valentina-s). Kavin Nguyen (@ngkavin) contributed significantly to the initial version. - ## License Echoregions is licensed under the open source [Apache 2.0 license](https://opensource.org/licenses/Apache-2.0). From 6df235663c28bf9f5b3a4c7bf0cb3583eed9520e Mon Sep 17 00:00:00 2001 From: Caesar Tuguinay <87830138+ctuguinay@users.noreply.github.com> Date: Wed, 27 Mar 2024 12:03:42 -0700 Subject: [PATCH 2/3] Import Transect Checking from Hake-Labels (#162) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * import transect checking from hake-labels, reduce strictness * add small bbox distance threshold test * simplify logic and fix bt et test * Update echoregions/regions2d/regions2d.py Co-authored-by: Wu-Jung Lee * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add wu jung's review suggestions * incorporate suggestion 5 second part * add wujungs suggestions * Merge main to feature branch (#168) * [pre-commit.ci] pre-commit autoupdate (#161) updates: - [github.com/PyCQA/isort: 5.12.0 → 5.13.1](https://github.com/PyCQA/isort/compare/5.12.0...5.13.1) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> * [pre-commit.ci] pre-commit autoupdate (#165) updates: - [github.com/psf/black: 23.11.0 → 23.12.0](https://github.com/psf/black/compare/23.11.0...23.12.0) - [github.com/PyCQA/isort: 5.13.1 → 5.13.2](https://github.com/PyCQA/isort/compare/5.13.1...5.13.2) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> * Bump actions/download-artifact from 3 to 4 (#164) Bumps [actions/download-artifact](https://github.com/actions/download-artifact) from 3 to 4. - [Release notes](https://github.com/actions/download-artifact/releases) - [Commits](https://github.com/actions/download-artifact/compare/v3...v4) --- updated-dependencies: - dependency-name: actions/download-artifact dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * Bump actions/upload-artifact from 3 to 4 (#163) Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 3 to 4. - [Release notes](https://github.com/actions/upload-artifact/releases) - [Commits](https://github.com/actions/upload-artifact/compare/v3...v4) --- updated-dependencies: - dependency-name: actions/upload-artifact dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * [pre-commit.ci] pre-commit autoupdate (#167) updates: - [github.com/psf/black: 23.12.0 → 23.12.1](https://github.com/psf/black/compare/23.12.0...23.12.1) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> * Disentangle Nested If Else in Select Region and Add Region Class Selections (#160) * Disentangle nested if-else under Regions2D.select_region * update regions2d functions for region_id docstring and typehints * add region class * Update echoregions/regions2d/regions2d.py Co-authored-by: Wu-Jung Lee * Update echoregions/regions2d/regions2d.py Co-authored-by: Wu-Jung Lee * add wu jung's suggestions * add test for both non NaN region id and region class * small tweak of select_region docstring * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: Wu-Jung Lee Co-authored-by: ctuguinay Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --------- Signed-off-by: dependabot[bot] Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Wu-Jung Lee Co-authored-by: ctuguinay * fix idx min idx man logic * fix comment * Update .pre-commit-config.yaml * attempt to resolve conflict * test small whitespace change * revert change * add period * remove period * add space * revert change * move _check_transect_sequences outside * incorporate wu jungs bbox distance threshold comment --------- Signed-off-by: dependabot[bot] Co-authored-by: ctuguinay Co-authored-by: Wu-Jung Lee Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- echoregions/regions2d/regions2d.py | 336 ++++++++++++----------- echoregions/test_data/transect_BT_ET.evr | 4 +- echoregions/test_data/transect_no_ET.evr | 225 --------------- echoregions/test_data/transect_no_ST.evr | 232 ---------------- echoregions/tests/test_regions2d.py | 150 +++++++--- 5 files changed, 297 insertions(+), 650 deletions(-) delete mode 100644 echoregions/test_data/transect_no_ET.evr delete mode 100644 echoregions/test_data/transect_no_ST.evr diff --git a/echoregions/regions2d/regions2d.py b/echoregions/regions2d/regions2d.py index 0c5f5fd0..bd36cd69 100644 --- a/echoregions/regions2d/regions2d.py +++ b/echoregions/regions2d/regions2d.py @@ -16,6 +16,72 @@ from .regions2d_parser import parse_evr, parse_regions_df +def _check_transect_sequences( + transect_df: pd.DataFrame, + transect_sequence_type_next_allowable_dict: dict, + bbox_distance_threshold: float, + must_pass_check: bool, +) -> None: + """ + Checking of transect sequences in the Regions2d transect dataframe. + + Parameters + ---------- + transect_df : pd.DataFrame + Inner Regions2d transect dataframe. + transect_sequence_type_next_allowable_dict : dict + Dictionary for the allowable transect sequence type value(s) that can follow a + transect sequence type value. + bbox_distance_threshold: float + Maximum allowable value between the left and right bounding box timestamps + for each region that is marked as a transect log. Default is set to 1 minute. + must_pass_check : bool + True: Will check transect strings to enforce sequence rules. If this check + encounters any incorrect transect type sequence orders or wider than bbox distance + threshold regions, it will raise an exception. + False: Will still check transect strings but will instead just print out warnings + for violations of the above mentioned sequence rules. + """ + # Create an empty list to collect error messages. + warning_messages = [] + + # Ensure correct sequence of transect types occur. + # If they do not, append to warning_messages. + for _, row in transect_df.iterrows(): + transect_type = row["transect_type"] + transect_type_next = row["transect_type_next"] + # Check for correct transect_type_next values + if transect_type_next not in transect_sequence_type_next_allowable_dict[transect_type]: + type_next_warning_message = ( + f"Error in region_id {row['region_id']}:" + f"Transect string {transect_type} is followed by " + f"invalid value {transect_type_next}. Must be followed by " + f"{transect_sequence_type_next_allowable_dict[transect_type]}" + ) + warning_messages.append(type_next_warning_message) + + # Identify rows wider than bbox distance threshold if they exist + wider_than_bbox_distance_threshold_rows = transect_df[ + (transect_df["region_bbox_right"] - transect_df["region_bbox_left"]).dt.total_seconds() / 60 + > bbox_distance_threshold + ] + wider_than_bbox_distance_threshold_region_ids = wider_than_bbox_distance_threshold_rows[ + "region_id" + ].tolist() + if wider_than_bbox_distance_threshold_region_ids: + warning_messages.append( + f"Problematic region id values with maximum time width wider than bbox " + f"distance threshold: {wider_than_bbox_distance_threshold_region_ids}" + ) + + # Raise an exception if there are any warning messages and must_pass_check is True. + # Else, print warning messages. + if len(warning_messages) > 0 and must_pass_check: + raise Exception("\n".join(warning_messages)) + else: + print("\n".join(warning_messages)) + + class Regions2D: """ Class that contains and performs operations with Regions2D data from Echoview EVR files. @@ -544,13 +610,20 @@ def mask( def transect_mask( self, da_Sv: DataArray, - transect_dict: dict = { + transect_sequence_type_dict: dict = { "start": "ST", "break": "BT", "resume": "RT", "end": "ET", }, + transect_sequence_type_next_allowable_dict: dict = { + "ST": ["BT", "ET"], + "BT": ["RT"], + "RT": ["BT", "ET"], + "ET": ["ET", ""], + }, bbox_distance_threshold: float = 1.0, + must_pass_check: bool = False, ) -> DataArray: """Mask data from Data Array containing Sv data based off of a Regions2D object and its transect_values. @@ -564,30 +637,43 @@ def transect_mask( ---------- da_Sv : Data Array DataArray of shape (ping_time, depth) containing Sv data. - transect_dict : dict - Dictionary for transect values. Values must be unique. + transect_sequence_type_dict : dict + Dictionary for transect sequence type values. The values denote where in the context + of the transect each region lays in, i.e. whether we are at the beginning of the + transect, a break in the transect, a resumption of the transect, or at the end + of the transect. + transect_sequence_type_next_allowable_dict : dict + Dictionary for the allowable transect sequence type value(s) that can follow a + transect sequence type value. bbox_distance_threshold: float - The maximum value for how far apart the left and right bounding box for each transect - value region. Default is set to 1 minute. + Maximum allowable value between the left and right bounding box timestamps + for each region that is marked as a transect log. Default is set to 1 minute. + must_pass_check : bool + True: Will check transect strings to enforce sequence rules. If this check + encounters any incorrect transect type sequence orders or wider than bbox distance + threshold regions, it will raise an exception. + False: Will still check transect strings but will instead just print out warnings + for violations of the above mentioned sequence rules. Returns ------- M : Data Array - A DataArray masked by the transect values from the Regions2d.data dataframe - with dimensions (ping_time, depth). + A binary DataArray with dimensions (ping_time, depth) where 1s are within transect + and 0s are outside transect. """ # Get transect strings - start_str = transect_dict["start"] - break_str = transect_dict["break"] - resume_str = transect_dict["resume"] - end_str = transect_dict["end"] + start_str = transect_sequence_type_dict["start"] + break_str = transect_sequence_type_dict["break"] + resume_str = transect_sequence_type_dict["resume"] + end_str = transect_sequence_type_dict["end"] transect_strs = [start_str, break_str, resume_str, end_str] # Check that there are 4 unique transect strings if len(transect_strs) != len(set(transect_strs)): raise ValueError( - "There exist duplicate values in transect_dict. " "All values must be unique." + "There exist duplicate values in transect_sequence_type_dict. " + "All values must be unique." ) for transect_str in transect_strs: if not isinstance(transect_str, str): @@ -605,157 +691,97 @@ def transect_mask( | region_df.loc[:, "region_name"].str.startswith(end_str) ].copy() - # Create a new column which stores the transect_type without the transect number - transect_df.loc[:, "transect_type"] = transect_df.loc[:, "region_name"].str.extract( - rf"({start_str}|{break_str}|{resume_str}|{end_str})" - ) - - # Check if for all transects, there exists 1 start_str transect type. - # If there does not exists a start_str transect, set the first region - # to be the start_str transect. - if not ( - transect_df.groupby("file_name").apply( - lambda x: x[x["transect_type"] == start_str].count() - )["file_name"] - == 1 - ).all(): - warnings.warn( - UserWarning( - f"There exists a transect that does not contain a single {start_str} " - "transect_type." - ) - ) - # Modify first row of original dataframe such that its transect type has value start_str - # and add it into transect df as its first row. - first_row = region_df.loc[region_df.index[0]].copy().to_frame().T - first_row["transect_type"] = start_str - transect_df = pd.concat([first_row, transect_df]).reset_index(drop=True) - - # Check if for all transects, there exists 1 end_str transect type. - if not ( - transect_df.groupby("file_name").apply( - lambda x: x[x["transect_type"] == end_str].count() - )["file_name"] - == 1 - ).all(): - warnings.warn( - UserWarning( - f"There exists a transect that does not contain a single {end_str} " - "transect type." - ) - ) - # Modify last row of original dataframe such that its transect type has value end_str - # and add it into transect df as its last row. - last_row = region_df.tail(1).copy() - last_row["transect_type"] = end_str - transect_df = pd.concat([transect_df, last_row]).reset_index(drop=True) - - # Checking the maximum width of a transect log region bbox. - # If over a minute, throw an error. - max_time = (transect_df["region_bbox_right"] - transect_df["region_bbox_left"]).max() - max_time_minutes = max_time.total_seconds() / 60 - if max_time_minutes > bbox_distance_threshold: - Warning( - f"Maximum width in time of transect log region bboxs is " - f"too large i.e. over {bbox_distance_threshold} minute(s). " - f"The maximum width is: {max_time_minutes}.", - UserWarning, + if not transect_df.empty: + # Drop time duplicates, sort the dataframe by datetime, and reset Transect Dataframe Index. + transect_df = ( + transect_df.drop_duplicates(subset=["region_bbox_left"]) + .sort_values(by="region_bbox_left") + .reset_index() ) - # Drop time duplicates - transect_df = transect_df.drop_duplicates(subset=["region_bbox_left"]) - - # Sort the dataframe by datetime - transect_df = transect_df.sort_values(by="region_bbox_left") - # Create new shifted columns with the next transect log type and next region - # bbox left datetime value. - transect_df.loc[:, "transect_type_next"] = transect_df.loc[:, "transect_type"].shift(-1) - transect_df.loc[:, "region_bbox_left_next"] = transect_df.loc[:, "region_bbox_left"].shift( - -1 - ) - - # Check if start_str followed by break_str/end_str. - start_transect_rows = transect_df[transect_df["transect_type"] == start_str] - start_transect_type_next_list = list(start_transect_rows["transect_type_next"].values) - for transect_type_next in start_transect_type_next_list: - if transect_type_next not in [break_str, end_str]: - raise ValueError( - f"Transect start string is followed by invalid value " - f"{transect_type_next}. Must be followed by either " - f"{break_str} or {end_str}" - ) + # Create a new column which stores the transect_type without the transect number + transect_df.loc[:, "transect_type"] = transect_df.loc[:, "region_name"].str.extract( + rf"({start_str}|{break_str}|{resume_str}|{end_str})" + ) - # Check if break_str followed by resume_str. - break_transect_rows = transect_df[transect_df["transect_type"] == break_str] - break_transect_type_next_list = list(break_transect_rows["transect_type_next"].values) - for transect_type_next in break_transect_type_next_list: - if transect_type_next != resume_str: - raise ValueError( - f"Transect break string is followed by invalid value " - f"{transect_type_next}. Must be followed by {resume_str}." - ) + # Create new shifted columns with the next transect log type and next region + # bbox left datetime value. + transect_df.loc[:, "transect_type_next"] = transect_df.loc[:, "transect_type"].shift(-1) + transect_df.loc[:, "region_bbox_left_next"] = transect_df.loc[ + :, "region_bbox_left" + ].shift(-1) + + # Set transect_type_next values to be empty strings if they are NAs. + transect_df["transect_type_next"] = transect_df.apply( + lambda x: "" if isna(x["transect_type_next"]) else x["transect_type_next"], + axis=1, + ) - # Check if resume_str followed by break_str/end_str. - resume_transect_rows = transect_df[transect_df["transect_type"] == resume_str] - resume_transect_type_next_list = list(resume_transect_rows["transect_type_next"].values) - for transect_type_next in resume_transect_type_next_list: - if transect_type_next not in [break_str, end_str]: - raise ValueError( - f"Transect resume string is followed by invalid value " - f"{transect_type_next}. Must be followed by either " - f"{break_str} or {end_str}." - ) + # Check transect sequences + _check_transect_sequences( + transect_df, + transect_sequence_type_next_allowable_dict, + bbox_distance_threshold, + must_pass_check, + ) - # Check if end_str followed by start_str or if NA. - end_transect_rows = transect_df[transect_df["transect_type"] == end_str] - end_transect_type_next_list = list(end_transect_rows["transect_type_next"].values) - for transect_type_next in end_transect_type_next_list: - # If this value is not NA, check if it is start_str. - if not isna(transect_type_next): - if transect_type_next != start_str: - raise ValueError( - f"Transect end string is followed by invalid value " - f"{transect_type_next}. Must be followed by {start_str}." - ) + # Create binary variable indicating within transect segments. + transect_df["within_transect"] = False + + # Indices where start_str followed by break_str/end_str + st_indices = (transect_df["transect_type"] == start_str) & transect_df[ + "transect_type_next" + ].isin(transect_sequence_type_next_allowable_dict[start_str]) + transect_df.loc[st_indices, "within_transect"] = True + + # Indices where resume_str followed by break_str/end_str + rt_indices = (transect_df["transect_type"] == resume_str) & transect_df[ + "transect_type_next" + ].isin(transect_sequence_type_next_allowable_dict[resume_str]) + transect_df.loc[rt_indices, "within_transect"] = True + + # Extract the min and max timestamps for filtering + min_timestamp = da_Sv.ping_time.min().values + max_timestamp = da_Sv.ping_time.max().values + + # Find the last index right before min_timestamp if it exists. + # Else choose the minimum row. + region_bbox_left_prior = transect_df.loc[ + transect_df["region_bbox_left"] < min_timestamp, "region_bbox_left" + ] + if region_bbox_left_prior.empty: + last_index_before_min = transect_df["region_bbox_left"].idxmin() + else: + last_index_before_min = region_bbox_left_prior.idxmax() + # Find the first index after max_timestamp if it exists. + # Else choose the maximum row. + region_bbox_right_after = transect_df.loc[ + transect_df["region_bbox_right"] > max_timestamp, "region_bbox_right" + ] + if region_bbox_right_after.empty: + first_index_after_max = transect_df["region_bbox_right"].idxmax() + else: + first_index_after_max = region_bbox_right_after.idxmin() - # Create binary variable indicating within transect segments. - transect_df["within_transect"] = False - - # Indices where start_str followed by break_str/end_str - st_indices = (transect_df["transect_type"] == start_str) & transect_df[ - "transect_type_next" - ].isin([break_str, end_str]) - transect_df.loc[st_indices, "within_transect"] = True - - # Indices where resume_str followed by break_str/end_str - rt_indices = (transect_df["transect_type"] == resume_str) & transect_df[ - "transect_type_next" - ].isin([break_str, end_str]) - transect_df.loc[rt_indices, "within_transect"] = True - - # Get all unique file_names in transect_df. - transect_querying_list = list(transect_df["file_name"].unique()) - - # Create list of masks for each file name to be queried. - mask_list = [] - for transect_querying_file_name in transect_querying_list: - within_transect_df = transect_df.query( - f'file_name == "{transect_querying_file_name}" and within_transect == True' - ) - T = xr.zeros_like(da_Sv) - for _, row in within_transect_df.iterrows(): - T = T + xr.where( - (T.ping_time > row["region_bbox_left"]) - & (T.ping_time < row["region_bbox_left_next"]), - 1, - 0, - ) - mask_list.append(T) + # Filter transect_df to get the within transect df + within_transect_df = transect_df[ + (transect_df["within_transect"]) + & (transect_df.index >= last_index_before_min) + & (transect_df.index <= first_index_after_max) + ] + else: + # Create empty within transect df + within_transect_df = pd.DataFrame() - # Combine masks. + # Create within transect mask M = xr.zeros_like(da_Sv) - for _, T in enumerate(mask_list): - M = M + T + for _, row in within_transect_df.iterrows(): + M = M + xr.where( + (M.ping_time > row["region_bbox_left"]) + & (M.ping_time < row["region_bbox_left_next"]), + 1, + 0, + ) # If M contains channel dimension, then drop it. if "channel" in M.dims: diff --git a/echoregions/test_data/transect_BT_ET.evr b/echoregions/test_data/transect_BT_ET.evr index be16144a..779654a5 100644 --- a/echoregions/test_data/transect_BT_ET.evr +++ b/echoregions/test_data/transect_BT_ET.evr @@ -203,11 +203,11 @@ Region27 13 4 28 0 6 -1 1 20170626 0018312145 -9999.9900000000 20170626 0018340925 9999.9900000000 1 -RT1 +Log 0 Log 20170626 0018312145 -9999.9900000000 20170626 0018312145 9999.9900000000 20170626 0018340925 9999.9900000000 20170626 0018340925 -9999.9900000000 2 -RT1 +Log 13 4 29 0 4 -1 1 20170625 2345389395 9.2447583998 20170626 0018312145 758.9732173069 0 diff --git a/echoregions/test_data/transect_no_ET.evr b/echoregions/test_data/transect_no_ET.evr deleted file mode 100644 index 61ad9c67..00000000 --- a/echoregions/test_data/transect_no_ET.evr +++ /dev/null @@ -1,225 +0,0 @@ -EVRG 7 12.0.341.42620 -29 - -13 4 1 0 6 -1 1 20170625 1612343335 -9999.99 20170625 1612382880 9999.99 -1 -CTD005 at depth -0 -Log -20170625 1612343335 -9999.9900000000 20170625 1612343335 9999.9900000000 20170625 1612382880 9999.9900000000 20170625 1612382880 -9999.9900000000 2 -CTD005 - -13 4 2 0 6 -1 1 20170625 1631363385 -9999.9900000000 20170625 1631402115 9999.9900000000 -1 -VN001 @ PC1500 -0 -Log -20170625 1631363385 -9999.9900000000 20170625 1631363385 9999.9900000000 20170625 1631402115 9999.9900000000 20170625 1631402115 -9999.9900000000 2 -VN001 - -13 4 3 0 6 -1 1 20170625 1658091225 -9999.9900000000 20170625 1658129995 9999.9900000000 -1 -ST1 - Finally!!!! -0 -Log -20170625 1658091225 -9999.9900000000 20170625 1658091225 9999.9900000000 20170625 1658129995 9999.9900000000 20170625 1658129995 -9999.9900000000 2 -ST1 - -13 4 4 0 4 -1 1 20170625 1539223320 9.2447583998 20170625 1658091225 758.9732173069 -0 -0 -Side station -20170625 1539223320 9.2447583998 20170625 1539223320 758.9732173069 20170625 1658091225 758.9732173069 20170625 1658091225 9.2447583998 0 -Region4 - -13 4 5 0 4 -1 1 20170625 1504281370 9.2447583998 20170625 1539262050 758.9732173069 -0 -0 -Off-transect -20170625 1504281370 9.2447583998 20170625 1504281370 758.9732173069 20170625 1539262050 758.9732173069 20170625 1539262050 9.2447583998 0 -Region5 - -13 4 6 0 6 -1 1 20170625 1757068065 -9999.9900000000 20170625 1757096875 9999.9900000000 -1 -BT2 for VN2 PC1000 -0 -Log -20170625 1757068065 -9999.9900000000 20170625 1757068065 9999.9900000000 20170625 1757096875 9999.9900000000 20170625 1757096875 -9999.9900000000 2 -BT2 - -13 4 7 0 6 -1 1 20170625 1826313220 -9999.9900000000 20170625 1826342015 9999.9900000000 -1 -CTD006 at depth -0 -Log -20170625 1826313220 -9999.9900000000 20170625 1826313220 9999.9900000000 20170625 1826342015 9999.9900000000 20170625 1826342015 -9999.9900000000 2 -CTD006 - -13 4 8 0 6 -1 1 20170625 1845164705 -9999.9900000000 20170625 1845193445 9999.9900000000 -1 -VN002 @ PC1000 in the water -0 -Log -20170625 1845164705 -9999.9900000000 20170625 1845164705 9999.9900000000 20170625 1845193445 9999.9900000000 20170625 1845193445 -9999.9900000000 2 -VN002 - -13 4 9 0 6 -1 1 20170625 1913155635 -9999.9900000000 20170625 1913185185 9999.9900000000 -1 -RT1 after VN002 -0 -Log -20170625 1913155635 -9999.9900000000 20170625 1913155635 9999.9900000000 20170625 1913185185 9999.9900000000 20170625 1913185185 -9999.9900000000 2 -RT1 - -13 4 10 0 4 -1 1 20170625 1757096875 9.2447583998 20170625 1913126075 758.9732173069 -0 -0 -Side station -20170625 1757096875 9.2447583998 20170625 1757096875 758.9732173069 20170625 1913126075 758.9732173069 20170625 1913126075 9.2447583998 0 -Region10 - -13 10 11 0 2 -1 1 20170625 2000591807 102.2552007996 20170625 2002085357 127.9476029355 -0 -0 -Unknown -20170625 2001470930 102.2552007996 20170625 2001361638 103.7403107496 20170625 2001378879 109.5322395548 20170625 2001212025 113.5420364200 20170625 2000591807 116.2152343301 20170625 2001014815 125.1258940304 20170625 2001014815 126.0169600004 20170625 2001350142 127.9476029355 20170625 2002085357 120.0765202002 20170625 2002068059 106.7105306497 1 -Chicken nugget - -13 4 12 0 6 -1 1 20170625 2011470885 -9999.9900000000 20170625 2011499610 9999.9900000000 -1 -BT1 for VN3 @ PC500 -0 -Log -20170625 2011470885 -9999.9900000000 20170625 2011470885 9999.9900000000 20170625 2011499610 9999.9900000000 20170625 2011499610 -9999.9900000000 2 -BT1 - -13 4 13 0 6 -1 1 20170625 2043188975 -9999.9900000000 20170625 2043217705 9999.9900000000 -1 -CTD007 at PC500 -0 -Log -20170625 2043188975 -9999.9900000000 20170625 2043188975 9999.9900000000 20170625 2043217705 9999.9900000000 20170625 2043217705 -9999.9900000000 2 -CTD007 - -13 4 14 0 6 -1 1 20170625 2058302350 -9999.9900000000 20170625 2058331085 9999.9900000000 -1 -Vertical net 003 @ PC500 in the water -0 -Log -20170625 2058302350 -9999.9900000000 20170625 2058302350 9999.9900000000 20170625 2058331085 9999.9900000000 20170625 2058331085 -9999.9900000000 2 -VN003 - -13 4 15 0 4 -1 1 20170625 2011488840 9.2447583998 20170625 2123567876 758.9732173069 -0 -0 -Side station -20170625 2011488840 9.2447583998 20170625 2011488840 758.9732173069 20170625 2123567876 758.9732173069 20170625 2123567876 9.2447583998 0 -Region15 - -13 4 16 0 6 -1 1 20170625 2123547350 -9999.9900000000 20170625 2123576085 9999.9900000000 -1 -RT1 -0 -Log -20170625 2123547350 -9999.9900000000 20170625 2123547350 9999.9900000000 20170625 2123576085 9999.9900000000 20170625 2123576085 -9999.9900000000 2 -RT1 - -13 4 17 0 6 -1 1 20170625 2147596090 -9999.9900000000 20170625 2148024870 9999.9900000000 -1 -BT1 for PC300 + VN004 -0 -Log -20170625 2147596090 -9999.9900000000 20170625 2147596090 9999.9900000000 20170625 2148024870 9999.9900000000 20170625 2148024870 -9999.9900000000 2 -BT1 - -13 4 18 0 6 -1 1 20170625 2215348545 -9999.9900000000 20170625 2215377285 9999.9900000000 -1 -CTD008 at depth -0 -Log -20170625 2215348545 -9999.9900000000 20170625 2215348545 9999.9900000000 20170625 2215377285 9999.9900000000 20170625 2215377285 -9999.9900000000 2 -CTD008 - -13 4 19 0 6 -1 1 20170625 2228249685 -9999.9900000000 20170625 2228278455 9999.9900000000 -1 -Vertical net 004 @ PC300 in the water -0 -Log -20170625 2228249685 -9999.9900000000 20170625 2228249685 9999.9900000000 20170625 2228278455 9999.9900000000 20170625 2228278455 -9999.9900000000 2 -VN004 - -13 4 20 0 4 -1 1 20170625 2148010460 9.2447583998 20170625 2252513223 758.9732173069 -0 -0 -Side station -20170625 2148010460 9.2447583998 20170625 2148010460 758.9732173069 20170625 2252513223 758.9732173069 20170625 2252513223 9.2447583998 0 -Region20 - -13 4 22 0 6 -1 1 20170625 2252496410 -9999.9900000000 20170625 2252525190 9999.9900000000 -1 -Resume transect 1 -0 -Log -20170625 2252496410 -9999.9900000000 20170625 2252496410 9999.9900000000 20170625 2252525190 9999.9900000000 20170625 2252525190 -9999.9900000000 2 -RT1 - -13 4 23 0 4 -1 1 20170625 2252579762 9.2447583998 20170625 2253068170 758.9732173069 -0 -0 -Unclassified regions -20170625 2252579762 9.2447583998 20170625 2252579762 758.9732173069 20170625 2253068170 758.9732173069 20170625 2253068170 9.2447583998 0 -Region23 - -13 4 24 0 6 -1 1 20170625 2302464615 -9999.9900000000 20170625 2302494205 9999.9900000000 -1 -Break transect 1 -0 -Log -20170625 2302464615 -9999.9900000000 20170625 2302464615 9999.9900000000 20170625 2302494205 9999.9900000000 20170625 2302494205 -9999.9900000000 2 -BT1 - -13 4 25 0 6 -1 1 20170625 2330103330 -9999.9900000000 20170625 2330132060 9999.9900000000 -1 -CTD09 at depth at PC150 -0 -Log -20170625 2330103330 -9999.9900000000 20170625 2330103330 9999.9900000000 20170625 2330132060 9999.9900000000 20170625 2330132060 -9999.9900000000 2 -CTD009 - -13 4 26 0 6 -1 1 20170625 2339327410 -9999.9900000000 20170625 2339356195 9999.9900000000 -1 -Vertical net 005 in the water @ PC150 -0 -Log -20170625 2339327410 -9999.9900000000 20170625 2339327410 9999.9900000000 20170625 2339356195 9999.9900000000 20170625 2339356195 -9999.9900000000 2 -VN005 - -13 4 27 0 4 -1 1 20170625 2302472892 9.2447583998 20170625 2345389395 758.9732173069 -0 -0 -Side station -20170625 2302472892 9.2447583998 20170625 2302472892 758.9732173069 20170625 2345389395 758.9732173069 20170625 2345389395 9.2447583998 0 -Region27 - -13 4 28 0 6 -1 1 20170626 0018312145 -9999.9900000000 20170626 0018340925 9999.9900000000 -1 -RT1 -0 -Log -20170626 0018312145 -9999.9900000000 20170626 0018312145 9999.9900000000 20170626 0018340925 9999.9900000000 20170626 0018340925 -9999.9900000000 2 -RT1 - -13 4 29 0 4 -1 1 20170625 2345389395 9.2447583998 20170626 0018312145 758.9732173069 -0 -0 -Off-transect -20170625 2345389395 9.2447583998 20170625 2345389395 758.9732173069 20170626 0018312145 758.9732173069 20170626 0018312145 9.2447583998 0 -Region29 - -13 4 30 0 6 -1 1 20170626 0031238970 -9999.9900000000 20170626 0031267745 9999.9900000000 -1 -Back on original transect line (went around oil platform) -0 -Log -20170626 0031238970 -9999.9900000000 20170626 0031238970 9999.9900000000 20170626 0031267745 9999.9900000000 20170626 0031267745 -9999.9900000000 2 -COM diff --git a/echoregions/test_data/transect_no_ST.evr b/echoregions/test_data/transect_no_ST.evr deleted file mode 100644 index ade03079..00000000 --- a/echoregions/test_data/transect_no_ST.evr +++ /dev/null @@ -1,232 +0,0 @@ -EVRG 7 12.0.341.42620 -28 - -13 4 1 0 4 -1 1 20170625 1539223320 9.2447583998 20170625 1540223320 758.9732173069 -0 -0 -Side station -20170625 1539223320 9.2447583998 20170625 1539223320 758.9732173069 20170625 1658091225 758.9732173069 20170625 1658091225 9.2447583998 0 -Region4 - -13 4 2 0 4 -1 1 20170625 1504281370 9.2447583998 20170625 1539262050 758.9732173069 -0 -0 -Off-transect -20170625 1504281370 9.2447583998 20170625 1504281370 758.9732173069 20170625 1539262050 758.9732173069 20170625 1539262050 9.2447583998 0 -Region5 - -13 4 3 0 6 -1 1 20170625 1757068065 -9999.9900000000 20170625 1757096875 9999.9900000000 -1 -BT2 for VN2 PC1000 -0 -Log -20170625 1757068065 -9999.9900000000 20170625 1757068065 9999.9900000000 20170625 1757096875 9999.9900000000 20170625 1757096875 -9999.9900000000 2 -BT2 - -13 4 4 0 6 -1 1 20170625 1826313220 -9999.9900000000 20170625 1826342015 9999.9900000000 -1 -CTD006 at depth -0 -Log -20170625 1826313220 -9999.9900000000 20170625 1826313220 9999.9900000000 20170625 1826342015 9999.9900000000 20170625 1826342015 -9999.9900000000 2 -CTD006 - -13 4 5 0 6 -1 1 20170625 1845164705 -9999.9900000000 20170625 1845193445 9999.9900000000 -1 -VN002 @ PC1000 in the water -0 -Log -20170625 1845164705 -9999.9900000000 20170625 1845164705 9999.9900000000 20170625 1845193445 9999.9900000000 20170625 1845193445 -9999.9900000000 2 -VN002 - -13 4 6 0 6 -1 1 20170625 1913155635 -9999.9900000000 20170625 1913185185 9999.9900000000 -1 -RT1 after VN002 -0 -Log -20170625 1913155635 -9999.9900000000 20170625 1913155635 9999.9900000000 20170625 1913185185 9999.9900000000 20170625 1913185185 -9999.9900000000 2 -RT1 - -13 4 7 0 4 -1 1 20170625 1757096875 9.2447583998 20170625 1913126075 758.9732173069 -0 -0 -Side station -20170625 1757096875 9.2447583998 20170625 1757096875 758.9732173069 20170625 1913126075 758.9732173069 20170625 1913126075 9.2447583998 0 -Region10 - -13 10 8 0 2 -1 1 20170625 2000591807 102.2552007996 20170625 2002085357 127.9476029355 -0 -0 -Unknown -20170625 2001470930 102.2552007996 20170625 2001361638 103.7403107496 20170625 2001378879 109.5322395548 20170625 2001212025 113.5420364200 20170625 2000591807 116.2152343301 20170625 2001014815 125.1258940304 20170625 2001014815 126.0169600004 20170625 2001350142 127.9476029355 20170625 2002085357 120.0765202002 20170625 2002068059 106.7105306497 1 -Chicken nugget - -13 4 9 0 6 -1 1 20170625 2011470885 -9999.9900000000 20170625 2011499610 9999.9900000000 -1 -BT1 for VN3 @ PC500 -0 -Log -20170625 2011470885 -9999.9900000000 20170625 2011470885 9999.9900000000 20170625 2011499610 9999.9900000000 20170625 2011499610 -9999.9900000000 2 -BT1 - -13 4 10 0 6 -1 1 20170625 2043188975 -9999.9900000000 20170625 2043217705 9999.9900000000 -1 -CTD007 at PC500 -0 -Log -20170625 2043188975 -9999.9900000000 20170625 2043188975 9999.9900000000 20170625 2043217705 9999.9900000000 20170625 2043217705 -9999.9900000000 2 -CTD007 - -13 4 11 0 6 -1 1 20170625 2058302350 -9999.9900000000 20170625 2058331085 9999.9900000000 -1 -Vertical net 003 @ PC500 in the water -0 -Log -20170625 2058302350 -9999.9900000000 20170625 2058302350 9999.9900000000 20170625 2058331085 9999.9900000000 20170625 2058331085 -9999.9900000000 2 -VN003 - -13 4 12 0 4 -1 1 20170625 2011488840 9.2447583998 20170625 2123567876 758.9732173069 -0 -0 -Side station -20170625 2011488840 9.2447583998 20170625 2011488840 758.9732173069 20170625 2123567876 758.9732173069 20170625 2123567876 9.2447583998 0 -Region15 - -13 4 13 0 6 -1 1 20170625 2123547350 -9999.9900000000 20170625 2123576085 9999.9900000000 -1 -RT1 -0 -Log -20170625 2123547350 -9999.9900000000 20170625 2123547350 9999.9900000000 20170625 2123576085 9999.9900000000 20170625 2123576085 -9999.9900000000 2 -RT1 - -13 4 14 0 6 -1 1 20170625 2147596090 -9999.9900000000 20170625 2148024870 9999.9900000000 -1 -BT1 for PC300 + VN004 -0 -Log -20170625 2147596090 -9999.9900000000 20170625 2147596090 9999.9900000000 20170625 2148024870 9999.9900000000 20170625 2148024870 -9999.9900000000 2 -BT1 - -13 4 15 0 6 -1 1 20170625 2215348545 -9999.9900000000 20170625 2215377285 9999.9900000000 -1 -CTD008 at depth -0 -Log -20170625 2215348545 -9999.9900000000 20170625 2215348545 9999.9900000000 20170625 2215377285 9999.9900000000 20170625 2215377285 -9999.9900000000 2 -CTD008 - -13 4 16 0 6 -1 1 20170625 2228249685 -9999.9900000000 20170625 2228278455 9999.9900000000 -1 -Vertical net 004 @ PC300 in the water -0 -Log -20170625 2228249685 -9999.9900000000 20170625 2228249685 9999.9900000000 20170625 2228278455 9999.9900000000 20170625 2228278455 -9999.9900000000 2 -VN004 - -13 4 17 0 4 -1 1 20170625 2148010460 9.2447583998 20170625 2252513223 758.9732173069 -0 -0 -Side station -20170625 2148010460 9.2447583998 20170625 2148010460 758.9732173069 20170625 2252513223 758.9732173069 20170625 2252513223 9.2447583998 0 -Region20 - -13 4 18 0 6 -1 1 20170625 2252496410 -9999.9900000000 20170625 2252525190 9999.9900000000 -1 -Resume transect 1 -0 -Log -20170625 2252496410 -9999.9900000000 20170625 2252496410 9999.9900000000 20170625 2252525190 9999.9900000000 20170625 2252525190 -9999.9900000000 2 -RT1 - -13 4 19 0 4 -1 1 20170625 2252579762 9.2447583998 20170625 2253068170 758.9732173069 -0 -0 -Unclassified regions -20170625 2252579762 9.2447583998 20170625 2252579762 758.9732173069 20170625 2253068170 758.9732173069 20170625 2253068170 9.2447583998 0 -Region23 - -13 4 20 0 6 -1 1 20170625 2302464615 -9999.9900000000 20170625 2302494205 9999.9900000000 -1 -Break transect 1 -0 -Log -20170625 2302464615 -9999.9900000000 20170625 2302464615 9999.9900000000 20170625 2302494205 9999.9900000000 20170625 2302494205 -9999.9900000000 2 -BT1 - -13 4 21 0 6 -1 1 20170625 2330103330 -9999.9900000000 20170625 2330132060 9999.9900000000 -1 -CTD09 at depth at PC150 -0 -Log -20170625 2330103330 -9999.9900000000 20170625 2330103330 9999.9900000000 20170625 2330132060 9999.9900000000 20170625 2330132060 -9999.9900000000 2 -CTD009 - -13 4 22 0 6 -1 1 20170625 2339327410 -9999.9900000000 20170625 2339356195 9999.9900000000 -1 -Vertical net 005 in the water @ PC150 -0 -Log -20170625 2339327410 -9999.9900000000 20170625 2339327410 9999.9900000000 20170625 2339356195 9999.9900000000 20170625 2339356195 -9999.9900000000 2 -VN005 - -13 4 23 0 4 -1 1 20170625 2302472892 9.2447583998 20170625 2345389395 758.9732173069 -0 -0 -Side station -20170625 2302472892 9.2447583998 20170625 2302472892 758.9732173069 20170625 2345389395 758.9732173069 20170625 2345389395 9.2447583998 0 -Region27 - -13 4 24 0 6 -1 1 20170626 0018312145 -9999.9900000000 20170626 0018340925 9999.9900000000 -1 -RT1 -0 -Log -20170626 0018312145 -9999.9900000000 20170626 0018312145 9999.9900000000 20170626 0018340925 9999.9900000000 20170626 0018340925 -9999.9900000000 2 -RT1 - -13 4 25 0 4 -1 1 20170625 2345389395 9.2447583998 20170626 0018312145 758.9732173069 -0 -0 -Off-transect -20170625 2345389395 9.2447583998 20170625 2345389395 758.9732173069 20170626 0018312145 758.9732173069 20170626 0018312145 9.2447583998 0 -Region29 - -13 4 26 0 6 -1 1 20170626 0031238970 -9999.9900000000 20170626 0031267745 9999.9900000000 -1 -Back on original transect line (went around oil platform) -0 -Log -20170626 0031238970 -9999.9900000000 20170626 0031238970 9999.9900000000 20170626 0031267745 9999.9900000000 20170626 0031267745 -9999.9900000000 2 -COM - -13 4 27 0 6 -1 1 20170626 0110573765 -9999.9900000000 20170626 0111002540 9999.9900000000 -1 -End transect 1 -0 -Log -20170626 0110573765 -9999.9900000000 20170626 0110573765 9999.9900000000 20170626 0111002540 9999.9900000000 20170626 0111002540 -9999.9900000000 2 -ET1 - -13 4 28 0 6 -1 1 20170626 0134430310 -9999.9900000000 20170626 0134458790 9999.9900000000 -1 -CTD010 at depth -0 -Log -20170626 0134430310 -9999.9900000000 20170626 0134430310 9999.9900000000 20170626 0134458790 9999.9900000000 20170626 0134458790 -9999.9900000000 2 -CTD010 - -13 4 29 0 6 -1 1 20170626 0144554085 -9999.9900000000 20170626 0144582565 9999.9900000000 -1 -Vertical net 006 at PC60 -0 -Log -20170626 0144554085 -9999.9900000000 20170626 0144554085 9999.9900000000 20170626 0144582565 9999.9900000000 20170626 0144582565 -9999.9900000000 2 -VN006 - -13 4 30 0 4 -1 1 20170626 0110584576 9.2447583998 20170626 0220098722 758.9732173069 -0 -0 -Side station -20170626 0110584576 9.2447583998 20170626 0110584576 758.9732173069 20170626 0220098722 758.9732173069 20170626 0220098722 9.2447583998 0 -Region35 diff --git a/echoregions/tests/test_regions2d.py b/echoregions/tests/test_regions2d.py index 5c88adc3..96fabb09 100644 --- a/echoregions/tests/test_regions2d.py +++ b/echoregions/tests/test_regions2d.py @@ -804,8 +804,45 @@ def test_within_transect(regions2d_fixture: Regions2D, da_Sv_fixture: DataArray) """ # Create transect mask with no errors - transect_dict = {"start": "ST", "break": "BT", "resume": "RT", "end": "ET"} - M = regions2d_fixture.transect_mask(da_Sv=da_Sv_fixture, transect_dict=transect_dict).compute() + transect_sequence_type_dict = {"start": "ST", "break": "BT", "resume": "RT", "end": "ET"} + df = regions2d_fixture.data + df.loc[df["region_id"] == 5, "region_name"] = "Log" # Remove early ST + df.loc[df["region_id"] == 13, "region_name"] = "ST22" # Place ST towards middle + df.loc[df["region_id"] == 19, "region_name"] = "Log" # Remove late ET + df.loc[df["region_id"] == 14, "region_name"] = "ET22" # Place ET towards middle + regions2d_fixture.data = df + M = regions2d_fixture.transect_mask( + da_Sv=da_Sv_fixture, transect_sequence_type_dict=transect_sequence_type_dict + ).compute() + + # Check M dimensions + assert M.shape == (3955, 1681) + + # Check values + assert len(list(np.unique(M.data))) == 2 + + # Test number of 1 values + assert np.unique(M.data, return_counts=True)[1][0] == 5687290 + + +@pytest.mark.regions2d +def test_within_transect_all(regions2d_fixture: Regions2D, da_Sv_fixture: DataArray) -> None: + """ + Tests functionality for transect_mask with all values in the da_Sv within transect. + + Parameters + ---------- + regions2d_fixture : Regions2D + Object containing data of test EVR file. + da_Sv_fixture : DataArray + DataArray containing Sv data of test zarr file. + """ + + # Create transect mask with no errors + transect_sequence_type_dict = {"start": "ST", "break": "BT", "resume": "RT", "end": "ET"} + M = regions2d_fixture.transect_mask( + da_Sv=da_Sv_fixture, transect_sequence_type_dict=transect_sequence_type_dict + ).compute() # Check M dimensions assert M.shape == (3955, 1681) @@ -821,28 +858,28 @@ def test_within_transect(regions2d_fixture: Regions2D, da_Sv_fixture: DataArray) @pytest.mark.regions2d -def test_within_transect_no_ET_ST(da_Sv_fixture: DataArray) -> None: +def test_within_transect_no_regions(regions2d_fixture: Regions2D, da_Sv_fixture: DataArray) -> None: """ - Tests functionality for evr file with no ST and for evr file with no ET. - Should raise appropriate UserWarning and should use first row for ST - and last row for ET. - + Tests functionality for transect_mask for empty r2d object. Parameters ---------- + regions2d_fixture : Regions2D + Object containing data of test EVR file. da_Sv_fixture : DataArray DataArray containing Sv data of test zarr file. """ - transect_dict = {"start": "ST", "break": "BT", "resume": "RT", "end": "ET"} - with pytest.warns(UserWarning): - evr_path = DATA_DIR / "transect_no_ST.evr" - r2d = er.read_evr(evr_path) - _ = r2d.transect_mask(da_Sv=da_Sv_fixture, transect_dict=transect_dict) - with pytest.warns(UserWarning): - evr_path = DATA_DIR / "transect_no_ET.evr" - r2d = er.read_evr(evr_path) - _ = r2d.transect_mask(da_Sv=da_Sv_fixture, transect_dict=transect_dict) + # Create transect mask with no errors + r2d_empty = er.read_regions_csv(pd.DataFrame(columns=regions2d_fixture.data.columns)) + M = r2d_empty.transect_mask(da_Sv=da_Sv_fixture).compute() + + # Check M dimensions + assert M.shape == (3955, 1681) + + # This entire output should be empty. + assert len(list(np.unique(M.data))) == 1 + assert list(np.unique(M.data))[0] == 0 @pytest.mark.regions2d @@ -861,19 +898,23 @@ def test_within_transect_bad_dict(da_Sv_fixture: DataArray) -> None: r2d = er.read_evr(evr_path) # Create dictionary with duplicates - transect_dict_duplicate = { + transect_sequence_type_dict_duplicate = { "start": "BT", "break": "BT", "resume": "RT", "end": "ET", } with pytest.raises(ValueError): - _ = r2d.transect_mask(da_Sv=da_Sv_fixture, transect_dict=transect_dict_duplicate) + _ = r2d.transect_mask( + da_Sv=da_Sv_fixture, transect_sequence_type_dict=transect_sequence_type_dict_duplicate + ) # Create dictionary with integers - transect_dict_int = {"start": "ST", "break": "BT", "resume": "RT", "end": 4} + transect_sequence_type_dict_int = {"start": "ST", "break": "BT", "resume": "RT", "end": 4} with pytest.raises(TypeError): - _ = r2d.transect_mask(da_Sv=da_Sv_fixture, transect_dict=transect_dict_int) + _ = r2d.transect_mask( + da_Sv=da_Sv_fixture, transect_sequence_type_dict=transect_sequence_type_dict_int + ) @pytest.mark.regions2d @@ -888,28 +929,65 @@ def test_within_transect_invalid_next(da_Sv_fixture: DataArray) -> None: """ # Initialize proper dictionary - transect_dict = {"start": "ST", "break": "BT", "resume": "RT", "end": "ET"} + transect_sequence_type_dict = {"start": "ST", "break": "BT", "resume": "RT", "end": "ET"} - # Should raise value error as ST is followed by ST - with pytest.raises(ValueError): + # Should raise Exception if ST is followed by ST + with pytest.raises(Exception): evr_path = DATA_DIR / "x1_ST_ST.evr" r2d = er.read_evr(evr_path) - _ = r2d.transect_mask(da_Sv=da_Sv_fixture, transect_dict=transect_dict) + _ = r2d.transect_mask( + da_Sv=da_Sv_fixture, + transect_sequence_type_dict=transect_sequence_type_dict, + must_pass_check=True, + ) - # Should raise value error as RT is followed by RT - with pytest.raises(ValueError): - evr_path = DATA_DIR / "x1_RT_RT.evr" + # Should raise Exception if RT is followed by RT + with pytest.raises(Exception): + evr_path = DATA_DIR / "transect_RT_RT.evr" r2d = er.read_evr(evr_path) - _ = r2d.transect_mask(da_Sv=da_Sv_fixture, transect_dict=transect_dict) + _ = r2d.transect_mask( + da_Sv=da_Sv_fixture, + transect_sequence_type_dict=transect_sequence_type_dict, + must_pass_check=True, + ) - # Should raise value error as BT is followed by ET - with pytest.raises(ValueError): - evr_path = DATA_DIR / "x1_BT_ET.evr" + # Should raise value Exception if BT is followed by ET + with pytest.raises(Exception): + evr_path = DATA_DIR / "transect_BT_ET.evr" r2d = er.read_evr(evr_path) - _ = r2d.transect_mask(da_Sv=da_Sv_fixture, transect_dict=transect_dict) + _ = r2d.transect_mask( + da_Sv=da_Sv_fixture, + transect_sequence_type_dict=transect_sequence_type_dict, + must_pass_check=True, + ) - # Should raise value error as ET is followed by RT - with pytest.raises(ValueError): - evr_path = DATA_DIR / "x1_ET_RT.evr" + # Should raises Exception if ET is followed by RT + with pytest.raises(Exception): + evr_path = DATA_DIR / "transect_ET_RT.evr" r2d = er.read_evr(evr_path) - _ = r2d.transect_mask(da_Sv=da_Sv_fixture, transect_dict=transect_dict) + _ = r2d.transect_mask( + da_Sv=da_Sv_fixture, + transect_sequence_type_dict=transect_sequence_type_dict, + must_pass_check=True, + ) + + +@pytest.mark.regions2d +def test_within_transect_small_bbox_distance_threshold(da_Sv_fixture: DataArray) -> None: + """ + Tests functionality for transect_mask with small bbox distance threshold. + + Parameters + ---------- + da_Sv_fixture : DataArray + DataArray containing Sv data of test zarr file. + """ + + # Get Regions2D Object + evr_path = DATA_DIR / "transect.evr" + r2d = er.read_evr(evr_path) + + with pytest.raises(Exception): + _ = r2d.transect_mask( + da_Sv=da_Sv_fixture, bbox_distance_threshold=0.001, must_pass_check=True + ) From 8f58a52241758b7adafb4463173fbcf6e23f5ac1 Mon Sep 17 00:00:00 2001 From: Caesar Tuguinay <87830138+ctuguinay@users.noreply.github.com> Date: Wed, 27 Mar 2024 12:10:43 -0700 Subject: [PATCH 3/3] set inequality in Lines.mask (#187) --- echoregions/lines/lines.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/echoregions/lines/lines.py b/echoregions/lines/lines.py index bde9ae87..6ef28459 100644 --- a/echoregions/lines/lines.py +++ b/echoregions/lines/lines.py @@ -244,7 +244,7 @@ def filter_bottom(bottom, start_date, end_date): # create a mask for the bottom: # bottom: True, otherwise: False - bottom_mask = depth_da > bottom_da + bottom_mask = depth_da >= bottom_da # Reset bottom_contours index so that time index becomes time column bottom_contours = bottom_contours.reset_index()