diff --git a/RULES.md b/RULES.md index 7fb6681..5eb6b64 100644 --- a/RULES.md +++ b/RULES.md @@ -6,6 +6,7 @@ This file contains a list of all lints checked by yml2block. |-------------------------|---------|------| | `unique_names` | b001 | Ensure that within a block (e.g. DatasetField) all names are unique. | | `block_is_list` | b002 | Ensure that each block content is a valid yaml list. | +| `unique_titles` | b003 | Ensure that within the DatasetField block all titles are unique. | | `keywords_valid` | k001 | Ensure top level keywords (i.e. block names) are present and contain no typos. | | `keywords_unique` | k002 | Ensure no top level keyword occurs multiple times. | | `keys_valid` | e001 | Ensure that no invalid keys are present, e.g. through typos. | diff --git a/tests/integration_tests.py b/tests/integration_tests.py index e9ca449..d0aa4b7 100644 --- a/tests/integration_tests.py +++ b/tests/integration_tests.py @@ -35,6 +35,23 @@ def test_duplicate_names_detected(): assert result.exit_code == 1, result.output +def test_duplicate_titles_detected(): + """This test ensures that duplicate titles are detected.""" + runner = CliRunner() + result = runner.invoke( + yml2block.__main__.main, + ["check", "tests/invalid/duplicate_datasetfield_title.yml"], + ) + assert result.exit_code == 1, result.output + + runner = CliRunner() + result = runner.invoke( + yml2block.__main__.main, + ["check", "tests/invalid/duplicate_datasetfield_title.tsv"], + ) + assert result.exit_code == 1, result.output + + def test_duplicate_top_level_key_detected(): """This test ensures that duplicates in top-level keys are detected.""" runner = CliRunner() diff --git a/yml2block/rules.py b/yml2block/rules.py index cefc2eb..6e48ada 100644 --- a/yml2block/rules.py +++ b/yml2block/rules.py @@ -216,6 +216,35 @@ def unique_names(yaml_chunk, tsv_keyword, level=Level.ERROR): return errors +def unique_titles(yaml_chunk, tsv_keyword, level=Level.ERROR): + """Make sure that each title in the block is only used once. + + block content level lint + """ + if tsv_keyword not in ["datasetField"]: + return [] + titles = Counter() + occurrences = defaultdict(list) + + for item in yaml_chunk: + item_title = item["title"].value + titles.update([item_title]) + occurrences[item_title].append(item) + + errors = [] + for title, count in titles.items(): + if count > 1: + occs = [f"line {o.line}" for o in occurrences[title]] + errors.append( + LintViolation( + level, + "unique_titles", + f"Title '{title}' occurs {count} times: {', '.join(occs) if occs else ''}. Titles should be unique.", + ) + ) + return errors + + def block_is_list(yaml_chunk, level=Level.ERROR): """Make sure that the yaml chunk is a list. @@ -427,6 +456,8 @@ def no_trailing_spaces(list_item, tsv_keyword, level=Level.ERROR): "b001": unique_names, "block_is_list": block_is_list, "b002": block_is_list, + "unique_titles": unique_titles, + "b003": unique_titles, "keywords_valid": keywords_valid, "k001": keywords_valid, "keywords_unique": keywords_unique, diff --git a/yml2block/validation.py b/yml2block/validation.py index dbf9c76..c06bbca 100644 --- a/yml2block/validation.py +++ b/yml2block/validation.py @@ -66,7 +66,7 @@ def validate_entry(yaml_chunk, tsv_keyword, lint_conf, verbose): longest_row = 0 - for lint in (rules.unique_names,): + for lint in (rules.unique_names, rules.unique_titles): lint = lint_conf.get(lint) violations.extend(lint(yaml_chunk, tsv_keyword))