From 70ece2a2b1048b37ff5ed9d242a4c9ae4325bfcc Mon Sep 17 00:00:00 2001 From: Gunnar Andersson Date: Thu, 6 Jul 2023 16:31:14 +0200 Subject: [PATCH] Programs to "sort" and compare IFEX content stable_sort_ifex defines a known order of keys so that IFEX files can be reliably compared. It is a module but can also be run as a standalone script. The diff_ifex.py script will print the diff between two files, after normalizing the order of the content using the stable_sort_ifex module. It can also be used with an external diff program. Signed-off-by: Gunnar Andersson --- ifex/model/stable_sort_ifex.py | 109 +++++++++++++++++++++++++++++++++ ifex/scripts/diff_ifex.py | 101 ++++++++++++++++++++++++++++++ 2 files changed, 210 insertions(+) create mode 100755 ifex/model/stable_sort_ifex.py create mode 100755 ifex/scripts/diff_ifex.py diff --git a/ifex/model/stable_sort_ifex.py b/ifex/model/stable_sort_ifex.py new file mode 100755 index 0000000..cec6845 --- /dev/null +++ b/ifex/model/stable_sort_ifex.py @@ -0,0 +1,109 @@ +#!/usr/bin/python +# SPDX-FileCopyrightText: Copyright (c) 2023 Novaspring AB +# SPDX-License-Identifier: MPL-2.0 + +# This file is part of the IFEX project + +from collections import OrderedDict +import argparse +import sys +import yaml + +# This file formats a YAML input in a fixed order ("sorted", basically). +# This facilitates reliable comparisons / diffing. + +# The implementation is separated from diff/comparison-scripts to factor +# it out of that code. The first implementation is simple but it might be +# refined over time. + + +def ifex_stable_order(data): + # To define a stable order we need two things. + # 1. Use an OrderedDict instead of a normal dict + # 2. Fill the dict by inputting the keys in the right order. + if isinstance(data, dict): + ordered_data = OrderedDict() + # Insert the name first, but only if the node has a name. + name = data.get("name") + if name is not None: + ordered_data["name"] = name + + # (Note: Make sure to use a loop instead of dict comprehension here + # because dict comprehension supposedly does not guarantee key order?) + for key in sorted(data.keys()): + ordered_data[key] = ifex_stable_order(data[key]) + + return ordered_data + + elif isinstance(data, list): + return [ifex_stable_order(item) for item in data] + + else: + return data + + +# If an ordered dict is printed as a normal dict we get a lot of unrelated +# metadata output. Therefore, we need to specify how PyYAML shall represent an +# ordered dict: (PyYAML does not seemingly have OrderedDict support built in...?) +# +# Solution from: +# https://stackoverflow.com/questions/16782112/can-pyyaml-dump-dict-items-in-non-alphabetical-order +def represent_ordereddict(dumper, data): + value = [] + + for key, val in data.items(): + node_key = dumper.represent_data(key) + node_val = dumper.represent_data(val) + value.append((node_key, node_val)) + + return yaml.nodes.MappingNode("tag:yaml.org,2002:map", value) + + +# --------------------------------------------------------------------- +# MAIN, used if this file is run standalone +# --------------------------------------------------------------------- +def usage(): + print( + """ +This script reorders IFEX (YAML) input into a stable ("sorted") order and prints the result back out. +The stable order is basically: + 0. Comments have no semantic meaning so they will be filtered out completely + 1. For dicts with key-value mappings, put the item 'name' first (if there is a key for 'name') + 2. Then, all other keys in alphabetical order + 3. Lists are not re-arranged (TODO: consider if lists should be sorted "by name" somehow?) + 4. Anything else remains in the input order. +""" + ) + + +def main(): + # Create the parser + parser = argparse.ArgumentParser( + description='Reorder IFEX (YAML) input ) input into a stable ("sorted") order and prints the result back out.' + ) + # Add the arguments + parser.add_argument("file1", help="Input file. (- to use STDIN)", nargs="?") + + # Parse the arguments + args = parser.parse_args() + + if args.file1 is None: + parser.print_help() + usage() + sys.exit(1) + + if args.file1 == "-": + # Use STDIN if file is '-' + data = yaml.safe_load(sys.stdin) + out = ifex_stable_order(data) + else: + with open(args.file1, "r") as file: + data = yaml.safe_load(file) + out = ifex_stable_order(data) + + yaml.add_representer(OrderedDict, represent_ordereddict) + print(yaml.dump(out, sort_keys=False)) + + +if __name__ == "__main__": + main() diff --git a/ifex/scripts/diff_ifex.py b/ifex/scripts/diff_ifex.py new file mode 100755 index 0000000..507669d --- /dev/null +++ b/ifex/scripts/diff_ifex.py @@ -0,0 +1,101 @@ +#!/usr/bin/python + +# SPDX-FileCopyrightText: Copyright (c) 2023 Novaspring AB +# SPDX-License-Identifier: MPL-2.0 + +# This file is part of the IFEX project + +import argparse +import subprocess +import tempfile +import yaml +from collections import OrderedDict +from ifex.model.stable_sort_ifex import ifex_stable_order, represent_ordereddict + +# The program compares two IFEX (YAML) files after normalizing ("sorting", +# basically) the order of elements so that the comparison becomes more relevant. + +# The normal unix diff command seems to give the most useful output: +def diff_files_with_external_program(path1, path2): + """Run standard unix diff program on the given paths""" + # diff returns an error code if there is a difference => use run with check + # False to ignore the error, (instead of check_output()) + return subprocess.run( + ["diff", path1, path2], text=True, check=False, stdout=subprocess.PIPE + ).stdout + + +# Alternative, using difflib +def diff_files(path1, path2): + import difflib + + """Use difflib to print the difference between the given files""" + with open(path1, "r") as f1: + l1 = f1.readlines() + with open(path2, "r") as f2: + l2 = f2.readlines() + + for line in difflib.context_diff(l1, l2): + print(line, end="") + + +def stable_order_file(file1): + """Writes a new file containing the YAML content with keys in order, and + returns the file name""" + with open(file1, "r") as f1: + with tempfile.NamedTemporaryFile("w", delete=False) as f2: + yaml.add_representer(OrderedDict, represent_ordereddict) + f2.write(yaml.dump(ifex_stable_order(yaml.safe_load(f1)), sort_keys=False)) + return f2.name + + return None # Will fail on exception before this + + +def compare_yaml_files(file1, file2): + """Order the keys of the given file names, write them to new temporary + files, then diff the results""" + f1 = stable_order_file(file1) + f2 = stable_order_file(file2) + + print("Stable sorting...") + print(f"temporary files are {file1} -> {f1}, {file2} -> {f2}") + print("Comparing files:") + return diff_files_with_external_program(f1, f2) + + +# --------------------------------------------------------------------- +# MAIN, used if this file is run standalone +# --------------------------------------------------------------------- + + +def main(): + # Create the parser + parser = argparse.ArgumentParser( + description="Compare IFEX (YAML) file contents, after normalizing order of elements." + ) + + # Add the arguments + parser.add_argument("file1", help="First, original file") + parser.add_argument("file2", help="Second, possibly changed file") + parser.add_argument( + "-p", + action="store_true", + default=False, + help="Only print the created temporary file paths, for use with an external diff program", + ) + + # Parse the arguments + args = parser.parse_args() + + # If print filenames only + if args.p: + print(stable_order_file(args.file1)) + print(stable_order_file(args.file2)) + return + + # Otherwise, output diff as well + print(compare_yaml_files(args.file1, args.file2)) + + +if __name__ == "__main__": + main()