Skip to content

Commit

Permalink
Add lint to spot duplicate titles
Browse files Browse the repository at this point in the history
  • Loading branch information
HenningTimm committed Oct 29, 2024
1 parent 0acbcf7 commit 8f1e150
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 1 deletion.
1 change: 1 addition & 0 deletions RULES.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ This file contains a list of all lints checked by yml2block.
|-------------------------|---------|------|
| `unique_names` | b001 | Ensure that within a block (e.g. DatasetField) all names are unique. |
| `block_is_list` | b002 | Ensure that each block content is a valid yaml list. |
| `unique_titles` | b003 | Ensure that within the DatasetField block all titles are unique. |
| `keywords_valid` | k001 | Ensure top level keywords (i.e. block names) are present and contain no typos. |
| `keywords_unique` | k002 | Ensure no top level keyword occurs multiple times. |
| `keys_valid` | e001 | Ensure that no invalid keys are present, e.g. through typos. |
Expand Down
17 changes: 17 additions & 0 deletions tests/integration_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,23 @@ def test_duplicate_names_detected():
assert result.exit_code == 1, result.output


def test_duplicate_titles_detected():
"""This test ensures that duplicate titles are detected."""
runner = CliRunner()
result = runner.invoke(
yml2block.__main__.main,
["check", "tests/invalid/duplicate_datasetfield_title.yml"],
)
assert result.exit_code == 1, result.output

runner = CliRunner()
result = runner.invoke(
yml2block.__main__.main,
["check", "tests/invalid/duplicate_datasetfield_title.tsv"],
)
assert result.exit_code == 1, result.output


def test_duplicate_top_level_key_detected():
"""This test ensures that duplicates in top-level keys are detected."""
runner = CliRunner()
Expand Down
31 changes: 31 additions & 0 deletions yml2block/rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,35 @@ def unique_names(yaml_chunk, tsv_keyword, level=Level.ERROR):
return errors


def unique_titles(yaml_chunk, tsv_keyword, level=Level.ERROR):
"""Make sure that each title in the block is only used once.
block content level lint
"""
if tsv_keyword not in ["datasetField"]:
return []
titles = Counter()
occurrences = defaultdict(list)

for item in yaml_chunk:
item_title = item["title"].value
titles.update([item_title])
occurrences[item_title].append(item)

errors = []
for title, count in titles.items():
if count > 1:
occs = [f"line {o.line}" for o in occurrences[title]]
errors.append(
LintViolation(
level,
"unique_titles",
f"Title '{title}' occurs {count} times: {', '.join(occs) if occs else ''}. Titles should be unique.",
)
)
return errors


def block_is_list(yaml_chunk, level=Level.ERROR):
"""Make sure that the yaml chunk is a list.
Expand Down Expand Up @@ -427,6 +456,8 @@ def no_trailing_spaces(list_item, tsv_keyword, level=Level.ERROR):
"b001": unique_names,
"block_is_list": block_is_list,
"b002": block_is_list,
"unique_titles": unique_titles,
"b003": unique_titles,
"keywords_valid": keywords_valid,
"k001": keywords_valid,
"keywords_unique": keywords_unique,
Expand Down
2 changes: 1 addition & 1 deletion yml2block/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def validate_entry(yaml_chunk, tsv_keyword, lint_conf, verbose):

longest_row = 0

for lint in (rules.unique_names,):
for lint in (rules.unique_names, rules.unique_titles):
lint = lint_conf.get(lint)
violations.extend(lint(yaml_chunk, tsv_keyword))

Expand Down

0 comments on commit 8f1e150

Please sign in to comment.