From 0dbe8c678afcd62a11716f191895866a873c1e1d Mon Sep 17 00:00:00 2001 From: Henning Timm Date: Thu, 22 Feb 2024 15:29:03 +0100 Subject: [PATCH] Formatted --- yml2block/rules.py | 1 + yml2block/tsv_input.py | 19 +++++++++++-------- yml2block/validation.py | 1 + yml2block/yaml_input.py | 2 ++ 4 files changed, 15 insertions(+), 8 deletions(-) diff --git a/yml2block/rules.py b/yml2block/rules.py index 854b996..840a5ef 100644 --- a/yml2block/rules.py +++ b/yml2block/rules.py @@ -4,6 +4,7 @@ LintViolation object containing error severity, rule name, and an error message. """ + import re from enum import IntEnum diff --git a/yml2block/tsv_input.py b/yml2block/tsv_input.py index b32dba9..022682b 100644 --- a/yml2block/tsv_input.py +++ b/yml2block/tsv_input.py @@ -1,6 +1,7 @@ """This module provides a parser to convert a Dataverse TSV file into the same dictionary-based representation used for YAML input. """ + import csv import io import itertools @@ -8,13 +9,14 @@ from yml2block.rules import LintViolation, Level - class MDBlockList(list): __slots__ = ("line", "column") + class MDBlockDict(dict): __slots__ = ("line", "column") + class MDBlockNode: __slots__ = ("line", "column", "value") @@ -25,8 +27,7 @@ def __init__(self, value, line=None, column=None): def __repr__(self): return f"({self.line}, {self.column}) {self.value}" - - + def _identify_break_points(full_file): """Identify where to split the metadata block into its three subsections""" @@ -82,13 +83,16 @@ def _parse(block): # Unpack each tsv-chunk of the metadata block into a list # of dictionaries. - parsed_blocks = [zip(_parse(block), itertools.repeat(offset)) for offset, block in enumerate(split_blocks, 1)] + parsed_blocks = [ + zip(_parse(block), itertools.repeat(offset)) + for offset, block in enumerate(split_blocks, 1) + ] for line_no, (row, offset) in enumerate(itertools.chain(*parsed_blocks), 1): # Each row corresponds to a content line in the TSV file # unpacked into a dictionary with keys depending # on the part of the block identified by the top level keyword - + # Get the toplevel keyword from the first column of the TSV file # e.g. #metadataBlock, #datasetField, #controlledVocabulary toplevel_key_with_prefix = [ @@ -103,8 +107,7 @@ def _parse(block): offset_line_no = line_no + offset row_as_dict.line = offset_line_no row_as_dict.column = None - - + for key, value in row.items(): if key is None: # These entries cannot be associated with a column header @@ -125,7 +128,7 @@ def _parse(block): # Initialize the entry for this toplevel keyword with an empty list if toplevel_key not in data.keys(): - block_list = MDBlockList() + block_list = MDBlockList() block_list.line = line_no block_list.column = None data[toplevel_key] = block_list diff --git a/yml2block/validation.py b/yml2block/validation.py index 578a61e..dbf9c76 100644 --- a/yml2block/validation.py +++ b/yml2block/validation.py @@ -1,4 +1,5 @@ """Dispatch entry to specialized lint rules.""" + from yml2block import rules diff --git a/yml2block/yaml_input.py b/yml2block/yaml_input.py index 9461b77..685f4f0 100644 --- a/yml2block/yaml_input.py +++ b/yml2block/yaml_input.py @@ -1,6 +1,8 @@ """Import module for YAML files.""" + from ruamel.yaml import YAML from ruamel.yaml.constructor import DuplicateKeyError + # from ruamel.yaml.scalarstring import LiteralScalarString, FoldedScalarString, DoubleQuotedScalarString, SingleQuotedScalarString, PlainScalarString # from ruamel.yaml.scalarint import ScalarInt # from ruamel.yaml.scalarbool import ScalarBoolean