From f67c88b779d5ec7d7f792cf91f218dbdb2b6e1f4 Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Wed, 4 Jan 2023 15:41:41 +0100 Subject: [PATCH] Fix `to_checksums` with `None` values in dicts and recursion Having a `'src': None` entry in a dict for checksums is as valid as having a `None` entry directly in the list. However the current function didn't handle it and crashed. Fix that as well as a few corner cases especially in the recursive case by introducing a new function for handling checksum entries in the checksum list and limitting the recursiveness. Fixes #4142 --- easybuild/framework/easyconfig/types.py | 53 +++++++++++++------------ test/framework/type_checking.py | 52 ++++++++++++++++++++++-- 2 files changed, 76 insertions(+), 29 deletions(-) diff --git a/easybuild/framework/easyconfig/types.py b/easybuild/framework/easyconfig/types.py index 76a837a7e8..9458bded14 100644 --- a/easybuild/framework/easyconfig/types.py +++ b/easybuild/framework/easyconfig/types.py @@ -467,33 +467,36 @@ def to_dependencies(dep_list): return [to_dependency(dep) for dep in dep_list] +def _to_checksum(checksum, top_level=True, is_dict_value=False): + """Ensure the correct element type for each checksum in the checksum list""" + # each entry can be: + # * None (indicates no checksum) + # * a string (MD5, SHA256, ... checksum) + # * a list or tuple with 2 elements: checksum type + checksum value + # * a list or tuple of checksums (i.e. multiple checksums for a single file) + # * a dict (filename to checksum mapping) + if checksum is None or isinstance(checksum, string_type): + return checksum + elif isinstance(checksum, (list, tuple)): + if len(checksum) == 2 and isinstance(checksum[0], string_type) and isinstance(checksum[1], (string_type, int)): + # 2 elements => a checksum tuple (2nd element string or int) or 2 alternative checksums + return tuple(checksum) + elif top_level or is_dict_value: + # Alternative checksums, recurse if this is a list entry (i.e. top_level) + # or a dict entry (to support alternative checksums for each filename) + return tuple(_to_checksum(x, top_level=False) for x in checksum) + elif top_level and isinstance(checksum, dict): + return {key: _to_checksum(value, top_level=False, is_dict_value=True) for key, value in checksum.items()} + # Not returned -> Wrong type/format + raise ValueError('Unexpected type of "%s": %s' % (type(checksum), str(checksum))) + + def to_checksums(checksums): """Ensure correct element types for list of checksums: convert list elements to tuples.""" - res = [] - for checksum in checksums: - # each list entry can be: - # * None (indicates no checksum) - # * a string (MD5 or SHA256 checksum) - # * a tuple with 2 elements: checksum type + checksum value - # * a list of checksums (i.e. multiple checksums for a single file) - # * a dict (filename to checksum mapping) - if isinstance(checksum, string_type): - res.append(checksum) - elif isinstance(checksum, (list, tuple)): - # 2 elements + only string/int values => a checksum tuple - if len(checksum) == 2 and all(isinstance(x, (string_type, int)) for x in checksum): - res.append(tuple(checksum)) - else: - res.append(to_checksums(checksum)) - elif isinstance(checksum, dict): - validated_dict = {} - for key, value in checksum.items(): - validated_dict[key] = to_checksums(value) - res.append(validated_dict) - else: - res.append(checksum) - - return res + try: + return [_to_checksum(checksum) for checksum in checksums] + except ValueError as e: + raise EasyBuildError('Invalid checksums: %s\n\tError: %s', checksums, e) def ensure_iterable_license_specs(specs): diff --git a/test/framework/type_checking.py b/test/framework/type_checking.py index 42e22adcd8..a12408eab9 100644 --- a/test/framework/type_checking.py +++ b/test/framework/type_checking.py @@ -709,16 +709,60 @@ def test_to_checksums(self): test_inputs = [ ['be662daa971a640e40be5c804d9d7d10'], ['be662daa971a640e40be5c804d9d7d10', ('md5', 'be662daa971a640e40be5c804d9d7d10')], - [['be662daa971a640e40be5c804d9d7d10', ('md5', 'be662daa971a640e40be5c804d9d7d10')]], [('md5', 'be662daa971a640e40be5c804d9d7d10')], - ['be662daa971a640e40be5c804d9d7d10', ('adler32', '0x998410035'), ('crc32', '0x1553842328'), - ('md5', 'be662daa971a640e40be5c804d9d7d10'), ('sha1', 'f618096c52244539d0e89867405f573fdb0b55b0'), - ('size', 273)], + [ + 'be662daa971a640e40be5c804d9d7d10', + ('adler32', '0x998410035'), + ('crc32', '0x1553842328'), + ('md5', 'be662daa971a640e40be5c804d9d7d10'), + ('sha1', 'f618096c52244539d0e89867405f573fdb0b55b0'), + ('size', 273), + ], # None values should not be filtered out, but left in place [None, 'fa618be8435447a017fd1bf2c7ae922d0428056cfc7449f7a8641edf76b48265', None], + # Alternative checksums + [('main_checksum', 'alternative_checksum')], + [('1st_of_3', '2nd_of_3', '3rd_of_3')], + # Alternative checksums with types + [ + (('adler32', '1st_adler'), ('crc32', '1st_crc')), + (('adler32', '2nd_adler'), ('crc32', '2nd_crc'), ('sha1', '2nd_sha')), + ], + # Entries can be dicts even containing `None` + [ + 'be662daa971a640e40be5c804d9d7d10', + { + 'src-arm.tgz': 'be662daa971a640e40be5c804d9d7d10', + 'src-x86.tgz': ('mainchecksum', 'altchecksum'), + 'src-ppc.tgz': ('mainchecksum', ('md5', 'altchecksum')), + 'git-clone.tgz': None, + }, + ], ] for checksums in test_inputs: self.assertEqual(to_checksums(checksums), checksums) + # List converted to tuple + checksums = [['1stchecksum', ('md5', 'md5sum')]] + checksums_expected = [tuple(checksums[0])] + self.assertEqual(to_checksums(checksums), checksums_expected) + # Error detection + wrong_nesting = [('1stchecksum', ('md5', ('md5sum', 'altmd5sum')))] + self.assertErrorRegex(EasyBuildError, 'Unexpected type.*md5', to_checksums, wrong_nesting) + correct_nesting = [('1stchecksum', ('md5', 'md5sum'), ('md5', 'altmd5sum'))] + self.assertEqual(to_checksums(correct_nesting), correct_nesting) + + unexpected_set = [('1stchecksum', {'md5', 'md5sum'})] + self.assertErrorRegex(EasyBuildError, 'Unexpected type.*md5', to_checksums, unexpected_set) + unexpected_dict = [('1stchecksum', {'src': 'md5sum'})] + self.assertErrorRegex(EasyBuildError, 'Unexpected type.*md5', to_checksums, unexpected_dict) + unexpected_dict = [{'src': ('md5sum', {'src': 'shasum'})}] + self.assertErrorRegex(EasyBuildError, 'Unexpected type.*shasum', to_checksums, unexpected_dict) + correct_dict = [{'src': ('md5sum', 'shasum')}] + self.assertEqual(to_checksums(correct_dict), correct_dict) + correct_dict_1 = [{'src': [['md5', 'md5sum'], ['sha', 'shasum']]}] + correct_dict_2 = [{'src': (('md5', 'md5sum'), ('sha', 'shasum'))}] + self.assertEqual(to_checksums(correct_dict_2), correct_dict_2) + self.assertEqual(to_checksums(correct_dict_1), correct_dict_2) # Lists to tuples def test_ensure_iterable_license_specs(self): """Test ensure_iterable_license_specs function."""