diff --git a/CHANGES.md b/CHANGES.md index 1fd217d6e..cb8a0ef49 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -7,10 +7,12 @@ * export v2: Previously, when `strain` was not used as the metadata ID column, node attributes might have gone missing from the final Auspice JSON. This has been fixed. [#1260][], [#1262][] (@victorlin, @joverlee521) * export v1: Added a deprecation warning for this command. [#1265][] (@victorlin) * export v1: The recently introduced flag `--metadata-id-columns` did not work properly due to the same `export v2` bug that was fixed in this release. Instead of fixing it in `export v1`, drop the broken feature since this command is no longer being maintained. [#1265][] (@victorlin) +* filter: Expose internal Pandas errors from `--query` which may be useful to users. [#1267][] (@victorlin) [#1260]: https://github.com/nextstrain/augur/issues/1260 [#1262]: https://github.com/nextstrain/augur/issues/1262 [#1265]: https://github.com/nextstrain/augur/pull/1265 +[#1267]: https://github.com/nextstrain/augur/pull/1267 ## 22.1.0 (10 July 2023) diff --git a/augur/filter/include_exclude_rules.py b/augur/filter/include_exclude_rules.py index e5b9e09be..a7dab492d 100644 --- a/augur/filter/include_exclude_rules.py +++ b/augur/filter/include_exclude_rules.py @@ -718,7 +718,7 @@ def apply_filters(metadata, exclude_by: List[FilterOption], include_by: List[Fil UndefinedVariableError = pd.core.computation.ops.UndefinedVariableError # type: ignore if isinstance(e, UndefinedVariableError): raise AugurError(f"Query contains a column that does not exist in metadata.") from e - raise AugurError(f"Error when applying query. Ensure the syntax is valid per .") from e + raise AugurError(f"Internal Pandas error when applying query:\n\t{e}\nEnsure the syntax is valid per .") from e else: raise diff --git a/tests/functional/filter/cram/filter-query-errors.t b/tests/functional/filter/cram/filter-query-errors.t index 265805b7e..b2894abc9 100644 --- a/tests/functional/filter/cram/filter-query-errors.t +++ b/tests/functional/filter/cram/filter-query-errors.t @@ -12,22 +12,35 @@ Using a pandas query with a nonexistent column results in a specific error. [2] -Using pandas queries with bad syntax results in a generic errors. +Using pandas queries with bad syntax results in meaningful errors. -This raises a ValueError internally (https://github.com/nextstrain/augur/issues/940): +Some error messages from Pandas may be useful, so they are exposed: $ ${AUGUR} filter \ > --metadata "$TESTDIR/../data/metadata.tsv" \ - > --query "invalid = 'value'" \ + > --query "region >= 0.50" \ > --output-strains filtered_strains.txt > /dev/null - ERROR: Error when applying query. Ensure the syntax is valid per . + ERROR: Internal Pandas error when applying query: + '>=' not supported between instances of 'str' and 'float' + Ensure the syntax is valid per . [2] -This raises a SyntaxError internally (https://github.com/nextstrain/augur/issues/941): +However, other Pandas errors are not so helpful, so a link is provided for users to learn more about query syntax. + + $ ${AUGUR} filter \ + > --metadata "$TESTDIR/../data/metadata.tsv" \ + > --query "invalid = 'value'" \ + > --output-strains filtered_strains.txt > /dev/null + ERROR: Internal Pandas error when applying query: + cannot assign without a target object + Ensure the syntax is valid per . + [2] $ ${AUGUR} filter \ > --metadata "$TESTDIR/../data/metadata.tsv" \ > --query "some bad syntax" \ > --output-strains filtered_strains.txt > /dev/null - ERROR: Error when applying query. Ensure the syntax is valid per . + ERROR: Internal Pandas error when applying query: + invalid syntax (, line 1) + Ensure the syntax is valid per . [2]