From ee4f16e90fbb8a03ac859f5c1fad2d71159d0bb1 Mon Sep 17 00:00:00 2001 From: reconman Date: Sun, 6 Feb 2022 13:23:41 +0100 Subject: [PATCH] Make file-encoding UTF-8 by default and let users choose --- yamale/command_line.py | 24 ++++++++++++---------- yamale/readers/yaml_reader.py | 4 ++-- yamale/schema/schema.py | 4 ++-- yamale/yamale.py | 38 +++++++++++++++++++++++++++++------ 4 files changed, 49 insertions(+), 21 deletions(-) diff --git a/yamale/command_line.py b/yamale/command_line.py index cd90896..24bd6b7 100644 --- a/yamale/command_line.py +++ b/yamale/command_line.py @@ -20,18 +20,18 @@ schemas = {} -def _validate(schema_path, data_path, parser, strict, _raise_error): +def _validate(schema_path, data_path, parser, strict, _raise_error, encoding): schema = schemas.get(schema_path) try: if not schema: - schema = yamale.make_schema(schema_path, parser) + schema = yamale.make_schema(schema_path, parser, encoding=encoding) schemas[schema_path] = schema except (SyntaxError, ValueError) as e: results = [Result([str(e)])] if not _raise_error: return results raise YamaleError(results) - data = yamale.make_data(data_path, parser) + data = yamale.make_data(data_path, parser, encoding) return yamale.validate(schema, data, strict, _raise_error) @@ -60,15 +60,15 @@ def _find_schema(data_path, schema_name): return _find_data_path_schema(data_path, schema_name) -def _validate_single(yaml_path, schema_name, parser, strict): +def _validate_single(yaml_path, schema_name, parser, strict, encoding): print('Validating %s...' % yaml_path) s = _find_schema(yaml_path, schema_name) if not s: raise ValueError("Invalid schema name for '{}' or schema not found.".format(schema_name)) - _validate(s, yaml_path, parser, strict, True) + _validate(s, yaml_path, parser, strict, True, encoding) -def _validate_dir(root, schema_name, cpus, parser, strict): +def _validate_dir(root, schema_name, cpus, parser, strict, encoding): pool = Pool(processes=cpus) res = [] error_messages = [] @@ -80,7 +80,7 @@ def _validate_dir(root, schema_name, cpus, parser, strict): s = _find_schema(d, schema_name) if s: res.append(pool.apply_async(_validate, - (s, d, parser, strict, False))) + (s, d, parser, strict, False, encoding))) else: print('No schema found for: %s' % d) @@ -97,12 +97,12 @@ def _validate_dir(root, schema_name, cpus, parser, strict): raise ValueError('\n----\n'.join(set(error_messages))) -def _router(root, schema_name, cpus, parser, strict=True): +def _router(root, schema_name, cpus, parser, strict=True, encoding='utf-8'): root = os.path.abspath(root) if os.path.isfile(root): - _validate_single(root, schema_name, parser, strict) + _validate_single(root, schema_name, parser, strict, encoding) else: - _validate_dir(root, schema_name, cpus, parser, strict) + _validate_dir(root, schema_name, cpus, parser, strict, encoding) def main(): @@ -117,9 +117,11 @@ def main(): help='YAML library to load files. Choices are "ruamel" or "pyyaml" (default).') parser.add_argument('--no-strict', action='store_true', help='Disable strict mode, unexpected elements in the data will be accepted.') + parser.add_argument('-e', '--encoding', default='utf-8', + help='Character encoding of the files. Default is utf-8.') args = parser.parse_args() try: - _router(args.path, args.schema, args.cpu_num, args.parser, not args.no_strict) + _router(args.path, args.schema, args.cpu_num, args.parser, not args.no_strict, args.encoding) print('Validation success! 👍') except (SyntaxError, NameError, TypeError, ValueError) as e: print('Validation failed!\n%s' % str(e)) diff --git a/yamale/readers/yaml_reader.py b/yamale/readers/yaml_reader.py index 59696b3..d71ea4b 100644 --- a/yamale/readers/yaml_reader.py +++ b/yamale/readers/yaml_reader.py @@ -23,7 +23,7 @@ def _ruamel(f): } -def parse_yaml(path=None, parser='pyyaml', content=None): +def parse_yaml(path: str=None, parser: str='pyyaml', content: str=None, encoding: str='utf-8'): try: parse = _parsers[parser.lower()] except KeyError: @@ -31,7 +31,7 @@ def parse_yaml(path=None, parser='pyyaml', content=None): if (path is None and content is None) or (path is not None and content is not None): raise TypeError("Pass either path= or content=, not both") if path is not None: - with open(path) as f: + with open(path, encoding) as f: return parse(f) else: return parse(StringIO(content)) diff --git a/yamale/schema/schema.py b/yamale/schema/schema.py index 8395a94..5651426 100644 --- a/yamale/schema/schema.py +++ b/yamale/schema/schema.py @@ -50,7 +50,7 @@ def _parse_schema_item(self, path, expression, validators): error = str(e) + ' at node \'%s\'' % str(path) raise SyntaxError(error) - def validate(self, data, data_name, strict): + def validate(self, data: str, data_name: str, strict: bool) -> ValidationResult: path = DataPath() errors = self._validate(self._schema, data, path, strict) return ValidationResult(data_name, self.name, errors) @@ -75,7 +75,7 @@ def _validate_item(self, validator, data, path, strict, key): return self._validate(validator, data_item, path, strict) - def _validate(self, validator, data, path, strict): + def _validate(self, validator, data: str, path: DataPath, strict: bool): """ Validate data with validator. Special handling of non-primitive validators. diff --git a/yamale/yamale.py b/yamale/yamale.py index f563585..ad3f5f8 100644 --- a/yamale/yamale.py +++ b/yamale/yamale.py @@ -1,13 +1,24 @@ #!/usr/bin/env python +from typing import Any, List + +from yamale.schema.validationresults import ValidationResult from .schema import Schema from .yamale_error import YamaleError -def make_schema(path=None, parser='PyYAML', validators=None, content=None): +def make_schema(path: str=None, parser: str='PyYAML', validators=None, content: str=None, encoding: str='utf-8') -> Schema: + """ + Reads YAML schemas from files or a string. + :param path: Path to the schema file or directory. + :param parser: Parser to use. Can be 'PyYAML' or 'ruamel'. + :param validators: List of validators to use. + :param content: Content of the YAML schema. If not provided, the schema is read from `path`. + :param encoding: Encoding of the YAML schema files. Only used if `path` is provided. + """ # validators = None means use default. # Import readers here so we can get version information in setup.py. from . import readers - raw_schemas = readers.parse_yaml(path, parser, content=content) + raw_schemas = readers.parse_yaml(path, parser, content=content, encoding=encoding) if not raw_schemas: raise ValueError('{} is an empty file!'.format(path)) # First document is the base schema @@ -24,16 +35,31 @@ def make_schema(path=None, parser='PyYAML', validators=None, content=None): return s -def make_data(path=None, parser='PyYAML', content=None): +def make_data(path: str=None, parser: str='PyYAML', content: str=None, encoding: str='utf-8'): + """ + Reads a YAML file containing the data. + :param path: Path to the YAML file or directory. + :param parser: Parser to use. Can be 'PyYAML' or 'ruamel'. + :param content: Content of the YAML file. If not provided, the data is read from `path`. + :param encoding: Encoding of the YAML files. Only used if `path` is provided. + :return: A list of parsed YAML files. The object types depend on the parser used. + """ from . import readers - raw_data = readers.parse_yaml(path, parser, content=content) + raw_data = readers.parse_yaml(path, parser, content=content, encoding=encoding) if len(raw_data) == 0: return [({}, path)] return [(d, path) for d in raw_data] -def validate(schema, data, strict=True, _raise_error=True): - results = [] +def validate(schema: Schema, data: List[Any], strict: bool=True, _raise_error: bool=True) -> List[ValidationResult]: + """ + Validates the list of YAML files against the schema. + :param schema: Schema to validate against. + :param data: List of YAML files to validate. + :param strict: If True, unexpected elements in the data will cause validation errors. + :param _raise_error: If True, raises an exception if a validation error occurs. + """ + results: List[ValidationResult] = [] is_valid = True for d, path in data: result = schema.validate(d, path, strict)