Skip to content

Commit

Permalink
Formatted
Browse files Browse the repository at this point in the history
  • Loading branch information
HenningTimm committed Feb 22, 2024
1 parent 0cff720 commit 0dbe8c6
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 8 deletions.
1 change: 1 addition & 0 deletions yml2block/rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
LintViolation object containing error severity, rule name, and
an error message.
"""

import re

from enum import IntEnum
Expand Down
19 changes: 11 additions & 8 deletions yml2block/tsv_input.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,22 @@
"""This module provides a parser to convert a Dataverse TSV file into the same
dictionary-based representation used for YAML input.
"""

import csv
import io
import itertools

from yml2block.rules import LintViolation, Level



class MDBlockList(list):
__slots__ = ("line", "column")


class MDBlockDict(dict):
__slots__ = ("line", "column")


class MDBlockNode:
__slots__ = ("line", "column", "value")

Expand All @@ -25,8 +27,7 @@ def __init__(self, value, line=None, column=None):

def __repr__(self):
return f"({self.line}, {self.column}) {self.value}"




def _identify_break_points(full_file):
"""Identify where to split the metadata block into its three subsections"""
Expand Down Expand Up @@ -82,13 +83,16 @@ def _parse(block):

# Unpack each tsv-chunk of the metadata block into a list
# of dictionaries.
parsed_blocks = [zip(_parse(block), itertools.repeat(offset)) for offset, block in enumerate(split_blocks, 1)]
parsed_blocks = [
zip(_parse(block), itertools.repeat(offset))
for offset, block in enumerate(split_blocks, 1)
]

for line_no, (row, offset) in enumerate(itertools.chain(*parsed_blocks), 1):
# Each row corresponds to a content line in the TSV file
# unpacked into a dictionary with keys depending
# on the part of the block identified by the top level keyword

# Get the toplevel keyword from the first column of the TSV file
# e.g. #metadataBlock, #datasetField, #controlledVocabulary
toplevel_key_with_prefix = [
Expand All @@ -103,8 +107,7 @@ def _parse(block):
offset_line_no = line_no + offset
row_as_dict.line = offset_line_no
row_as_dict.column = None



for key, value in row.items():
if key is None:
# These entries cannot be associated with a column header
Expand All @@ -125,7 +128,7 @@ def _parse(block):

# Initialize the entry for this toplevel keyword with an empty list
if toplevel_key not in data.keys():
block_list = MDBlockList()
block_list = MDBlockList()
block_list.line = line_no
block_list.column = None
data[toplevel_key] = block_list
Expand Down
1 change: 1 addition & 0 deletions yml2block/validation.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Dispatch entry to specialized lint rules."""

from yml2block import rules


Expand Down
2 changes: 2 additions & 0 deletions yml2block/yaml_input.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
"""Import module for YAML files."""

from ruamel.yaml import YAML
from ruamel.yaml.constructor import DuplicateKeyError

# from ruamel.yaml.scalarstring import LiteralScalarString, FoldedScalarString, DoubleQuotedScalarString, SingleQuotedScalarString, PlainScalarString
# from ruamel.yaml.scalarint import ScalarInt
# from ruamel.yaml.scalarbool import ScalarBoolean
Expand Down

0 comments on commit 0dbe8c6

Please sign in to comment.