From 26c7b35fd193894ccb92ad4191ea60f7226441f9 Mon Sep 17 00:00:00 2001 From: Dan Smith Date: Mon, 1 Apr 2024 08:06:31 -0700 Subject: [PATCH 1/8] Reject qcow files with data-file attributes Change-Id: I6326a3e85c1ba4cb1da944a4323769f2399ed2c1 Closes-Bug: #2059809 (cherry picked from commit 2ca29af4433e9fa99a0a48e230d8d25d6eaa4a87) (cherry picked from commit c3586f3a122f6cb0663217b12b52203e74e2e4fa) (cherry picked from commit a92c438fb5ba55440b38cae7c8b4361b58daa9dd) --- glance/async_/flows/base_import.py | 10 ++++++ .../async_/flows/plugins/image_conversion.py | 8 +++++ .../flows/plugins/test_image_conversion.py | 16 +++++++++ glance/tests/unit/async_/flows/test_import.py | 33 +++++++++++++++++++ 4 files changed, 67 insertions(+) diff --git a/glance/async_/flows/base_import.py b/glance/async_/flows/base_import.py index e6bb526b4d..c0e2b72839 100644 --- a/glance/async_/flows/base_import.py +++ b/glance/async_/flows/base_import.py @@ -181,6 +181,16 @@ def execute(self, image_id): 'bfile': backing_file} raise RuntimeError(msg) + try: + data_file = metadata['format-specific']['data']['data-file'] + except KeyError: + data_file = None + if data_file is not None: + msg = _("File %(path)s has invalid data-file " + "%(dfile)s, aborting.") % {"path": path, + "dfile": data_file} + raise RuntimeError(msg) + return path def revert(self, image_id, result, **kwargs): diff --git a/glance/async_/flows/plugins/image_conversion.py b/glance/async_/flows/plugins/image_conversion.py index f817e80985..91e8cf944e 100644 --- a/glance/async_/flows/plugins/image_conversion.py +++ b/glance/async_/flows/plugins/image_conversion.py @@ -122,6 +122,14 @@ def _execute(self, action, file_path, **kwargs): raise RuntimeError( 'QCOW images with backing files are not allowed') + try: + data_file = metadata['format-specific']['data']['data-file'] + except KeyError: + data_file = None + if data_file is not None: + raise RuntimeError( + 'QCOW images with data-file set are not allowed') + if metadata.get('format') == 'vmdk': create_type = metadata.get( 'format-specific', {}).get( diff --git a/glance/tests/unit/async_/flows/plugins/test_image_conversion.py b/glance/tests/unit/async_/flows/plugins/test_image_conversion.py index 2056f5b0ef..44ea5ac28d 100644 --- a/glance/tests/unit/async_/flows/plugins/test_image_conversion.py +++ b/glance/tests/unit/async_/flows/plugins/test_image_conversion.py @@ -185,6 +185,22 @@ def test_image_convert_invalid_qcow(self): self.assertEqual('QCOW images with backing files are not allowed', str(e)) + def test_image_convert_invalid_qcow_data_file(self): + data = {'format': 'qcow2', + 'format-specific': { + 'data': { + 'data-file': '/etc/hosts', + }, + }} + + convert = self._setup_image_convert_info_fail() + with mock.patch.object(processutils, 'execute') as exc_mock: + exc_mock.return_value = json.dumps(data), '' + e = self.assertRaises(RuntimeError, + convert.execute, 'file:///test/path.qcow') + self.assertEqual('QCOW images with data-file set are not allowed', + str(e)) + def _test_image_convert_invalid_vmdk(self): data = {'format': 'vmdk', 'format-specific': { diff --git a/glance/tests/unit/async_/flows/test_import.py b/glance/tests/unit/async_/flows/test_import.py index 79f6b6de57..55d6f09283 100644 --- a/glance/tests/unit/async_/flows/test_import.py +++ b/glance/tests/unit/async_/flows/test_import.py @@ -178,6 +178,39 @@ def create_image(*args, **kwargs): self.assertFalse(os.path.exists(tmp_image_path)) self.assertTrue(os.path.exists(image_path)) + def test_import_flow_invalid_data_file(self): + self.config(engine_mode='serial', + group='taskflow_executor') + + img_factory = mock.MagicMock() + + executor = taskflow_executor.TaskExecutor( + self.context, + self.task_repo, + self.img_repo, + img_factory) + + self.task_repo.get.return_value = self.task + + def create_image(*args, **kwargs): + kwargs['image_id'] = UUID1 + return self.img_factory.new_image(*args, **kwargs) + + self.img_repo.get.return_value = self.image + img_factory.new_image.side_effect = create_image + + with mock.patch.object(script_utils, 'get_image_data_iter') as dmock: + dmock.return_value = io.BytesIO(b"TEST_IMAGE") + + with mock.patch.object(putils, 'trycmd') as tmock: + out = json.dumps({'format-specific': + {'data': {'data-file': 'somefile'}}}) + tmock.return_value = (out, '') + e = self.assertRaises(RuntimeError, + executor.begin_processing, + self.task.task_id) + self.assertIn('somefile', str(e)) + def test_import_flow_revert_import_to_fs(self): self.config(engine_mode='serial', group='taskflow_executor') From c6d5a68297ba11c38b22f74d748bfaee34b8ab8c Mon Sep 17 00:00:00 2001 From: Dan Smith Date: Tue, 16 Apr 2024 10:29:10 -0700 Subject: [PATCH 2/8] Extend format_inspector for QCOW safety This adds two properties to the QcowInspector that makes it able to indicate whether the file specifies a backing_file or data_file in the header. Both conditions are considered unsafe for our usage. To ease checking of this condition, a classmethod is added that takes a local filename and digests just enough of the file to assert that both conditions are false. Change-Id: Iaf86b525397d41bd116999cabe0954a0a7efac65 Related-Bug: #2059809 (cherry picked from commit ae536bb394793c9a7a219cb498e03d5c81dbbbb7) (cherry picked from commit 2eba54e0821106097dfeceb424e53943fd090483) (cherry picked from commit 89dbbc838d606f461087e1494d19ddbcf9db0a38) --- glance/common/format_inspector.py | 132 +++++++++++++++++- .../unit/common/test_format_inspector.py | 87 +++++++++++- 2 files changed, 214 insertions(+), 5 deletions(-) diff --git a/glance/common/format_inspector.py b/glance/common/format_inspector.py index 351c300ddb..488e797b22 100755 --- a/glance/common/format_inspector.py +++ b/glance/common/format_inspector.py @@ -28,6 +28,14 @@ LOG = logging.getLogger(__name__) +def chunked_reader(fileobj, chunk_size=512): + while True: + chunk = fileobj.read(chunk_size) + if not chunk: + break + yield chunk + + class CaptureRegion(object): """Represents a region of a file we want to capture. @@ -176,10 +184,16 @@ def virtual_size(self): @property def actual_size(self): """Returns the total size of the file, usually smaller than - virtual_size. + virtual_size. NOTE: this will only be accurate if the entire + file is read and processed. """ return self._total_count + @property + def complete(self): + """Returns True if we have all the information needed.""" + return all(r.complete for r in self._capture_regions.values()) + def __str__(self): """The string name of this file format.""" return 'raw' @@ -194,6 +208,35 @@ def context_info(self): return {name: len(region.data) for name, region in self._capture_regions.items()} + @classmethod + def from_file(cls, filename): + """Read as much of a file as necessary to complete inspection. + + NOTE: Because we only read as much of the file as necessary, the + actual_size property will not reflect the size of the file, but the + amount of data we read before we satisfied the inspector. + + Raises ImageFormatError if we cannot parse the file. + """ + inspector = cls() + with open(filename, 'rb') as f: + for chunk in chunked_reader(f): + inspector.eat_chunk(chunk) + if inspector.complete: + # No need to eat any more data + break + if not inspector.complete or not inspector.format_match: + raise ImageFormatError('File is not in requested format') + return inspector + + def safety_check(self): + """Perform some checks to determine if this file is safe. + + Returns True if safe, False otherwise. It may raise ImageFormatError + if safety cannot be guaranteed because of parsing or other errors. + """ + return True + # The qcow2 format consists of a big-endian 72-byte header, of which # only a small portion has information we care about: @@ -202,15 +245,26 @@ def context_info(self): # 0 0x00 Magic 4-bytes 'QFI\xfb' # 4 0x04 Version (uint32_t, should always be 2 for modern files) # . . . +# 8 0x08 Backing file offset (uint64_t) # 24 0x18 Size in bytes (unint64_t) +# . . . +# 72 0x48 Incompatible features bitfield (6 bytes) # -# https://people.gnome.org/~markmc/qcow-image-format.html +# https://gitlab.com/qemu-project/qemu/-/blob/master/docs/interop/qcow2.txt class QcowInspector(FileInspector): """QEMU QCOW2 Format This should only require about 32 bytes of the beginning of the file - to determine the virtual size. + to determine the virtual size, and 104 bytes to perform the safety check. """ + + BF_OFFSET = 0x08 + BF_OFFSET_LEN = 8 + I_FEATURES = 0x48 + I_FEATURES_LEN = 8 + I_FEATURES_DATAFILE_BIT = 3 + I_FEATURES_MAX_BIT = 4 + def __init__(self, *a, **k): super(QcowInspector, self).__init__(*a, **k) self.new_region('header', CaptureRegion(0, 512)) @@ -220,6 +274,10 @@ def _qcow_header_data(self): struct.unpack('>4sIQIIQ', self.region('header').data[:32])) return magic, size + @property + def has_header(self): + return self.region('header').complete + @property def virtual_size(self): if not self.region('header').complete: @@ -236,9 +294,77 @@ def format_match(self): magic, size = self._qcow_header_data() return magic == b'QFI\xFB' + @property + def has_backing_file(self): + if not self.region('header').complete: + return None + if not self.format_match: + return False + bf_offset_bytes = self.region('header').data[ + self.BF_OFFSET:self.BF_OFFSET + self.BF_OFFSET_LEN] + # nonzero means "has a backing file" + bf_offset, = struct.unpack('>Q', bf_offset_bytes) + return bf_offset != 0 + + @property + def has_unknown_features(self): + if not self.region('header').complete: + return None + if not self.format_match: + return False + i_features = self.region('header').data[ + self.I_FEATURES:self.I_FEATURES + self.I_FEATURES_LEN] + + # This is the maximum byte number we should expect any bits to be set + max_byte = self.I_FEATURES_MAX_BIT // 8 + + # The flag bytes are in big-endian ordering, so if we process + # them in index-order, they're reversed + for i, byte_num in enumerate(reversed(range(self.I_FEATURES_LEN))): + if byte_num == max_byte: + # If we're in the max-allowed byte, allow any bits less than + # the maximum-known feature flag bit to be set + allow_mask = ((1 << self.I_FEATURES_MAX_BIT) - 1) + elif byte_num > max_byte: + # If we're above the byte with the maximum known feature flag + # bit, then we expect all zeroes + allow_mask = 0x0 + else: + # Any earlier-than-the-maximum byte can have any of the flag + # bits set + allow_mask = 0xFF + + if i_features[i] & ~allow_mask: + LOG.warning('Found unknown feature bit in byte %i: %s/%s', + byte_num, bin(i_features[byte_num] & ~allow_mask), + bin(allow_mask)) + return True + + return False + + @property + def has_data_file(self): + if not self.region('header').complete: + return None + if not self.format_match: + return False + i_features = self.region('header').data[ + self.I_FEATURES:self.I_FEATURES + self.I_FEATURES_LEN] + + # First byte of bitfield, which is i_features[7] + byte = self.I_FEATURES_LEN - 1 - self.I_FEATURES_DATAFILE_BIT // 8 + # Third bit of bitfield, which is 0x04 + bit = 1 << (self.I_FEATURES_DATAFILE_BIT - 1 % 8) + return bool(i_features[byte] & bit) + def __str__(self): return 'qcow2' + def safety_check(self): + return (not self.has_backing_file and + not self.has_data_file and + not self.has_unknown_features) + # The VHD (or VPC as QEMU calls it) format consists of a big-endian # 512-byte "footer" at the beginning of the file with various diff --git a/glance/tests/unit/common/test_format_inspector.py b/glance/tests/unit/common/test_format_inspector.py index d229d094fb..4a096e94fd 100644 --- a/glance/tests/unit/common/test_format_inspector.py +++ b/glance/tests/unit/common/test_format_inspector.py @@ -50,12 +50,29 @@ def tearDown(self): except Exception: pass - def _create_img(self, fmt, size): + def _create_img(self, fmt, size, subformat=None, options=None, + backing_file=None): if fmt == 'vhd': # QEMU calls the vhd format vpc fmt = 'vpc' - fn = tempfile.mktemp(prefix='glance-unittest-formatinspector-', + if options is None: + options = {} + opt = '' + prefix = 'glance-unittest-formatinspector-' + + if subformat: + options['subformat'] = subformat + prefix += subformat + '-' + + if options: + opt += '-o ' + ','.join('%s=%s' % (k, v) + for k, v in options.items()) + + if backing_file is not None: + opt += ' -b %s -F raw' % backing_file + + fn = tempfile.mktemp(prefix=prefix, suffix='.%s' % fmt) self._created_files.append(fn) subprocess.check_output( @@ -119,6 +136,72 @@ def test_vhdx(self): def test_vmdk(self): self._test_format('vmdk') + def test_from_file_reads_minimum(self): + img = self._create_img('qcow2', 10 * units.Mi) + file_size = os.stat(img).st_size + fmt = format_inspector.QcowInspector.from_file(img) + # We know everything we need from the first 512 bytes of a QCOW image, + # so make sure that we did not read the whole thing when we inspect + # a local file. + self.assertLess(fmt.actual_size, file_size) + + def test_qcow2_safety_checks(self): + # Create backing and data-file names (and initialize the backing file) + backing_fn = tempfile.mktemp(prefix='backing') + self._created_files.append(backing_fn) + with open(backing_fn, 'w') as f: + f.write('foobar') + data_fn = tempfile.mktemp(prefix='data') + self._created_files.append(data_fn) + + # A qcow with no backing or data file is safe + fn = self._create_img('qcow2', 5 * units.Mi, None) + inspector = format_inspector.QcowInspector.from_file(fn) + self.assertTrue(inspector.safety_check()) + + # A backing file makes it unsafe + fn = self._create_img('qcow2', 5 * units.Mi, None, + backing_file=backing_fn) + inspector = format_inspector.QcowInspector.from_file(fn) + self.assertFalse(inspector.safety_check()) + + # A data-file makes it unsafe + fn = self._create_img('qcow2', 5 * units.Mi, + options={'data_file': data_fn, + 'data_file_raw': 'on'}) + inspector = format_inspector.QcowInspector.from_file(fn) + self.assertFalse(inspector.safety_check()) + + # Trying to load a non-QCOW file is an error + self.assertRaises(format_inspector.ImageFormatError, + format_inspector.QcowInspector.from_file, + backing_fn) + + def test_qcow2_feature_flag_checks(self): + data = bytearray(512) + data[0:4] = b'QFI\xFB' + inspector = format_inspector.QcowInspector() + inspector.region('header').data = data + + # All zeros, no feature flags - all good + self.assertFalse(inspector.has_unknown_features) + + # A feature flag set in the first byte (highest-order) is not + # something we know about, so fail. + data[0x48] = 0x01 + self.assertTrue(inspector.has_unknown_features) + + # The first bit in the last byte (lowest-order) is known (the dirty + # bit) so that should pass + data[0x48] = 0x00 + data[0x4F] = 0x01 + self.assertFalse(inspector.has_unknown_features) + + # Currently (as of 2024), the high-order feature flag bit in the low- + # order byte is not assigned, so make sure we reject it. + data[0x4F] = 0x80 + self.assertTrue(inspector.has_unknown_features) + def test_vdi(self): self._test_format('vdi') From cd19a3d6f19417a7e44e07cc7caed199869adddf Mon Sep 17 00:00:00 2001 From: Dan Smith Date: Thu, 18 Apr 2024 14:51:52 -0700 Subject: [PATCH 3/8] Add VMDK safety check This makes us check the extent filenames to make sure they don't have any banned characters in them (i.e. slashes). It also makes us reject VMDK files with a footer. Since we process these files as a stream, we can't honor a footer that directs us to find the descriptor block in a location we've already processed. Thus, if a file indicates it has a footer, consider it a policy exception and unsupported. Change-Id: I4a1c6dff7854c49940a0ac7988722aa6befc04fa (cherry picked from commit c1bf35dffb7f4c3090b1f04cf0e27cb431330c3e) (cherry picked from commit d3f1d6159c0218ac01e8d881e2ec4da71fc952ee) (cherry picked from commit 2dd4d138d4b8e1d9ca69fc0dda3711553a65d912) --- glance/common/format_inspector.py | 71 ++++++++++++++++++- .../unit/common/test_format_inspector.py | 56 +++++++++++++++ 2 files changed, 124 insertions(+), 3 deletions(-) diff --git a/glance/common/format_inspector.py b/glance/common/format_inspector.py index 488e797b22..890f49dabc 100755 --- a/glance/common/format_inspector.py +++ b/glance/common/format_inspector.py @@ -642,6 +642,13 @@ class VMDKInspector(FileInspector): variable number of 512 byte sectors, but is just text defining the layout of the disk. """ + + # The beginning and max size of the descriptor is also hardcoded in Qemu + # at 0x200 and 1MB - 1 + DESC_OFFSET = 0x200 + DESC_MAX_SIZE = (1 << 20) - 1 + GD_AT_END = 0xffffffffffffffff + def __init__(self, *a, **k): super(VMDKInspector, self).__init__(*a, **k) self.new_region('header', CaptureRegion(0, 512)) @@ -653,8 +660,9 @@ def post_process(self): if not self.region('header').complete: return - sig, ver, _flags, _sectors, _grain, desc_sec, desc_num = struct.unpack( - '<4sIIQQQQ', self.region('header').data[:44]) + (sig, ver, _flags, _sectors, _grain, desc_sec, desc_num, + _numGTEsperGT, _rgdOffset, gdOffset) = struct.unpack( + '<4sIIQQQQIQQ', self.region('header').data[:64]) if sig != b'KDMV': raise ImageFormatError('Signature KDMV not found: %r' % sig) @@ -662,7 +670,11 @@ def post_process(self): if ver not in (1, 2, 3): raise ImageFormatError('Unsupported format version %i' % ver) - return + + if gdOffset == self.GD_AT_END: + # This means we have a footer, which takes precedence over the + # header, which we cannot support since we stream. + raise ImageFormatError('Unsupported VMDK footer') if not self.has_region('descriptor'): self.new_region('descriptor', CaptureRegion( @@ -702,6 +714,59 @@ def virtual_size(self): return sectors * 512 + def safety_check(self): + if (not self.has_region('descriptor') or + not self.region('descriptor').complete): + return False + + try: + # Descriptor is padded to 512 bytes + desc_data = self.region('descriptor').data.rstrip(b'\x00') + # Descriptor is actually case-insensitive ASCII text + desc_text = desc_data.decode('ascii').lower() + except UnicodeDecodeError: + LOG.error('VMDK descriptor failed to decode as ASCII') + raise ImageFormatError('Invalid VMDK descriptor data') + + extent_access = ('rw', 'rdonly', 'noaccess') + header_fields = [] + extents = [] + ddb = [] + + # NOTE(danms): Cautiously parse the VMDK descriptor. Each line must + # be something we understand, otherwise we refuse it. + for line in [x.strip() for x in desc_text.split('\n')]: + if line.startswith('#') or not line: + # Blank or comment lines are ignored + continue + elif line.startswith('ddb'): + # DDB lines are allowed (but not used by us) + ddb.append(line) + elif '=' in line and ' ' not in line.split('=')[0]: + # Header fields are a single word followed by an '=' and some + # value + header_fields.append(line) + elif line.split(' ')[0] in extent_access: + # Extent lines start with one of the three access modes + extents.append(line) + else: + # Anything else results in a rejection + LOG.error('Unsupported line %r in VMDK descriptor', line) + raise ImageFormatError('Invalid VMDK descriptor data') + + # Check all the extent lines for concerning content + for extent_line in extents: + if '/' in extent_line: + LOG.error('Extent line %r contains unsafe characters', + extent_line) + return False + + if not extents: + LOG.error('VMDK file specified no extents') + return False + + return True + def __str__(self): return 'vmdk' diff --git a/glance/tests/unit/common/test_format_inspector.py b/glance/tests/unit/common/test_format_inspector.py index 4a096e94fd..cd200ca197 100644 --- a/glance/tests/unit/common/test_format_inspector.py +++ b/glance/tests/unit/common/test_format_inspector.py @@ -202,6 +202,62 @@ def test_qcow2_feature_flag_checks(self): data[0x4F] = 0x80 self.assertTrue(inspector.has_unknown_features) + def test_vmdk_safety_checks(self): + region = format_inspector.CaptureRegion(0, 0) + inspector = format_inspector.VMDKInspector() + inspector.new_region('descriptor', region) + + # This should be a legit VMDK descriptor which comments, blank lines, + # an extent, some ddb content, and some header values. + legit_desc = ['# This is a comment', + '', + ' ', + 'createType=monolithicSparse', + 'RW 1234 SPARSE "foo.vmdk"', + 'ddb.adapterType = "MFM', + '# EOF'] + region.data = ('\n'.join(legit_desc)).encode('ascii') + region.length = len(region.data) + self.assertTrue(inspector.safety_check()) + + # Any of these lines should trigger an error indicating that there is + # something in the descriptor we don't understand + bad_lines = [ + '#\U0001F4A9', + 'header Name=foo', + 'foo bar', + 'WR 123 SPARSE "foo.vmdk"', + ] + + for bad_line in bad_lines: + # Encode as UTF-8 purely so we can test that anything non-ASCII + # will trigger the decode check + region.data = bad_line.encode('utf-8') + region.length = len(region.data) + self.assertRaisesRegex(format_inspector.ImageFormatError, + 'Invalid VMDK descriptor', + inspector.safety_check) + + # Extents with slashes in the name fail the safety check + region.data = b'RW 123 SPARSE "/etc/shadow"' + region.length = len(region.data) + self.assertFalse(inspector.safety_check()) + + # A descriptor that specifies no extents fails the safety check + region.data = b'# Nothing' + region.length = len(region.data) + self.assertFalse(inspector.safety_check()) + + def test_vmdk_reject_footer(self): + data = struct.pack('<4sIIQQQQIQQ', b'KDMV', 3, 0, 0, 0, 0, 1, 0, 0, + format_inspector.VMDKInspector.GD_AT_END) + inspector = format_inspector.VMDKInspector() + inspector.region('header').data = data + inspector.region('header').length = len(data) + self.assertRaisesRegex(format_inspector.ImageFormatError, + 'footer', + inspector.post_process) + def test_vdi(self): self._test_format('vdi') From 8e00bf86480769fc683a4c82b9d6021ecd8ae27a Mon Sep 17 00:00:00 2001 From: Dan Smith Date: Tue, 16 Apr 2024 11:20:48 -0700 Subject: [PATCH 4/8] Reject unsafe qcow and vmdk files This causes us to use the format inspector to pre-examine qcow and vmdk files for safe configurations before even using qemu-img on them. Change-Id: I0554706368e573e11f649c09569f7c21cbc8634b Closes-Bug: #2059809 (cherry picked from commit a95f335bca1dfdd1c904ae475e9fe8c6806f2c56) (cherry picked from commit 55fc42563818fcf88b474233df242a796c918b3a) (cherry picked from commit f1f53075d69a9a1c006b3e25506e30eb0210de1f) --- .../async_/flows/plugins/image_conversion.py | 44 +++++++++++--- .../flows/plugins/test_image_conversion.py | 57 ++++++++++++++++--- 2 files changed, 87 insertions(+), 14 deletions(-) diff --git a/glance/async_/flows/plugins/image_conversion.py b/glance/async_/flows/plugins/image_conversion.py index 91e8cf944e..90a846ace7 100644 --- a/glance/async_/flows/plugins/image_conversion.py +++ b/glance/async_/flows/plugins/image_conversion.py @@ -26,6 +26,7 @@ from taskflow import task from glance.async_ import utils +from glance.common import format_inspector from glance.i18n import _, _LI LOG = logging.getLogger(__name__) @@ -88,8 +89,40 @@ def _execute(self, action, file_path, **kwargs): 'target': target_format} self.dest_path = dest_path + source_format = action.image_disk_format + inspector_cls = format_inspector.get_inspector(source_format) + if not inspector_cls: + # We cannot convert from disk_format types that qemu-img doesn't + # support (like iso, ploop, etc). The ones it supports overlaps + # with the ones we have inspectors for, so reject conversion for + # any format we don't have an inspector for. + raise RuntimeError( + 'Unable to convert from format %s' % source_format) + + # Use our own cautious inspector module (if we have one for this + # format) to make sure a file is the format the submitter claimed + # it is and that it passes some basic safety checks _before_ we run + # qemu-img on it. + # See https://bugs.launchpad.net/nova/+bug/2059809 for details. + try: + inspector = inspector_cls.from_file(src_path) + if not inspector.safety_check(): + LOG.error('Image failed %s safety check; aborting conversion', + source_format) + raise RuntimeError('Image has disallowed configuration') + except RuntimeError: + raise + except format_inspector.ImageFormatError as e: + LOG.error('Image claimed to be %s format failed format ' + 'inspection: %s', source_format, e) + raise RuntimeError('Image format detection failed') + except Exception as e: + LOG.exception('Unknown error inspecting image format: %s', e) + raise RuntimeError('Unable to inspect image') + try: stdout, stderr = putils.trycmd("qemu-img", "info", + "-f", source_format, "--output=json", src_path, prlimit=utils.QEMU_IMG_PROC_LIMITS, @@ -106,13 +139,10 @@ def _execute(self, action, file_path, **kwargs): raise RuntimeError(stderr) metadata = json.loads(stdout) - try: - source_format = metadata['format'] - except KeyError: - msg = ("Failed to do introspection as part of image " - "conversion for %(iid)s: Source format not reported") - LOG.error(msg, {'iid': self.image_id}) - raise RuntimeError(msg) + if metadata.get('format') != source_format: + LOG.error('Image claiming to be %s reported as %s by qemu-img', + source_format, metadata.get('format', 'unknown')) + raise RuntimeError('Image metadata disagrees about format') virtual_size = metadata.get('virtual-size', 0) action.set_image_attribute(virtual_size=virtual_size) diff --git a/glance/tests/unit/async_/flows/plugins/test_image_conversion.py b/glance/tests/unit/async_/flows/plugins/test_image_conversion.py index 44ea5ac28d..e59749fa09 100644 --- a/glance/tests/unit/async_/flows/plugins/test_image_conversion.py +++ b/glance/tests/unit/async_/flows/plugins/test_image_conversion.py @@ -13,6 +13,7 @@ # License for the specific language governing permissions and limitations # under the License. +import fixtures import json import os import sys @@ -25,6 +26,7 @@ import glance.async_.flows.api_image_import as import_flow import glance.async_.flows.plugins.image_conversion as image_conversion from glance.async_ import utils as async_utils +from glance.common import format_inspector from glance.common import utils from glance import domain from glance import gateway @@ -91,6 +93,11 @@ def setUp(self): self.image_id, self.task.task_id) + self.inspector_mock = mock.MagicMock() + self.useFixture(fixtures.MockPatch('glance.common.format_inspector.' + 'get_inspector', + self.inspector_mock)) + @mock.patch.object(os, 'stat') @mock.patch.object(os, 'remove') def test_image_convert_success(self, mock_os_remove, mock_os_stat): @@ -105,7 +112,7 @@ def test_image_convert_success(self, mock_os_remove, mock_os_stat): image = mock.MagicMock(image_id=self.image_id, virtual_size=None, extra_properties={ 'os_glance_import_task': self.task.task_id}, - disk_format='qcow2') + disk_format='raw') self.img_repo.get.return_value = image with mock.patch.object(processutils, 'execute') as exc_mock: @@ -127,7 +134,7 @@ def test_image_convert_success(self, mock_os_remove, mock_os_stat): self.assertEqual(456, image.virtual_size) self.assertEqual(123, image.size) - def _setup_image_convert_info_fail(self): + def _setup_image_convert_info_fail(self, disk_format='qcow2'): image_convert = image_conversion._ConvertImage(self.context, self.task.task_id, self.task_type, @@ -137,7 +144,7 @@ def _setup_image_convert_info_fail(self): image = mock.MagicMock(image_id=self.image_id, virtual_size=None, extra_properties={ 'os_glance_import_task': self.task.task_id}, - disk_format='qcow2') + disk_format=disk_format) self.img_repo.get.return_value = image return image_convert @@ -149,6 +156,7 @@ def test_image_convert_fails_inspection(self): convert.execute, 'file:///test/path.raw') exc_mock.assert_called_once_with( 'qemu-img', 'info', + '-f', 'qcow2', '--output=json', '/test/path.raw', prlimit=async_utils.QEMU_IMG_PROC_LIMITS, @@ -165,6 +173,7 @@ def test_image_convert_inspection_reports_error(self): convert.execute, 'file:///test/path.raw') exc_mock.assert_called_once_with( 'qemu-img', 'info', + '-f', 'qcow2', '--output=json', '/test/path.raw', prlimit=async_utils.QEMU_IMG_PROC_LIMITS, @@ -201,6 +210,36 @@ def test_image_convert_invalid_qcow_data_file(self): self.assertEqual('QCOW images with data-file set are not allowed', str(e)) + def test_image_convert_no_inspector_match(self): + convert = self._setup_image_convert_info_fail() + self.inspector_mock.return_value = None + self.assertRaisesRegex(RuntimeError, + 'Unable to convert from format', + convert.execute, 'file:///test/path.hpfs') + + def test_image_convert_fails_inspection_safety_check(self): + convert = self._setup_image_convert_info_fail() + inspector = self.inspector_mock.return_value.from_file.return_value + inspector.safety_check.return_value = False + self.assertRaisesRegex(RuntimeError, + 'Image has disallowed configuration', + convert.execute, 'file:///test/path.qcow') + + def test_image_convert_fails_inspection_format_check(self): + convert = self._setup_image_convert_info_fail() + self.inspector_mock.return_value.from_file.side_effect = ( + format_inspector.ImageFormatError()) + self.assertRaisesRegex(RuntimeError, + 'Image format detection failed', + convert.execute, 'file:///test/path.qcow') + + def test_image_convert_fails_inspection_error(self): + convert = self._setup_image_convert_info_fail() + self.inspector_mock.return_value.from_file.side_effect = ValueError + self.assertRaisesRegex(RuntimeError, + 'Unable to inspect image', + convert.execute, 'file:///test/path.qcow') + def _test_image_convert_invalid_vmdk(self): data = {'format': 'vmdk', 'format-specific': { @@ -208,7 +247,7 @@ def _test_image_convert_invalid_vmdk(self): 'create-type': 'monolithicFlat', }}} - convert = self._setup_image_convert_info_fail() + convert = self._setup_image_convert_info_fail(disk_format='vmdk') with mock.patch.object(processutils, 'execute') as exc_mock: exc_mock.return_value = json.dumps(data), '' convert.execute('file:///test/path.vmdk') @@ -237,7 +276,7 @@ def test_image_convert_valid_vmdk(self): self._test_image_convert_invalid_vmdk) def test_image_convert_fails(self): - convert = self._setup_image_convert_info_fail() + convert = self._setup_image_convert_info_fail(disk_format='raw') with mock.patch.object(processutils, 'execute') as exc_mock: exc_mock.side_effect = [('{"format":"raw"}', ''), OSError('convert_fail')] @@ -245,6 +284,7 @@ def test_image_convert_fails(self): convert.execute, 'file:///test/path.raw') exc_mock.assert_has_calls( [mock.call('qemu-img', 'info', + '-f', 'raw', '--output=json', '/test/path.raw', prlimit=async_utils.QEMU_IMG_PROC_LIMITS, @@ -257,7 +297,7 @@ def test_image_convert_fails(self): self.img_repo.save.assert_not_called() def test_image_convert_reports_fail(self): - convert = self._setup_image_convert_info_fail() + convert = self._setup_image_convert_info_fail(disk_format='raw') with mock.patch.object(processutils, 'execute') as exc_mock: exc_mock.side_effect = [('{"format":"raw"}', ''), ('', 'some error')] @@ -265,6 +305,7 @@ def test_image_convert_reports_fail(self): convert.execute, 'file:///test/path.raw') exc_mock.assert_has_calls( [mock.call('qemu-img', 'info', + '-f', 'raw', '--output=json', '/test/path.raw', prlimit=async_utils.QEMU_IMG_PROC_LIMITS, @@ -282,9 +323,10 @@ def test_image_convert_fails_source_format(self): exc_mock.return_value = ('{}', '') exc = self.assertRaises(RuntimeError, convert.execute, 'file:///test/path.raw') - self.assertIn('Source format not reported', str(exc)) + self.assertIn('Image metadata disagrees about format', str(exc)) exc_mock.assert_called_once_with( 'qemu-img', 'info', + '-f', 'qcow2', '--output=json', '/test/path.raw', prlimit=async_utils.QEMU_IMG_PROC_LIMITS, @@ -302,6 +344,7 @@ def test_image_convert_same_format_does_nothing(self): # Make sure we only called qemu-img for inspection, not conversion exc_mock.assert_called_once_with( 'qemu-img', 'info', + '-f', 'qcow2', '--output=json', '/test/path.qcow', prlimit=async_utils.QEMU_IMG_PROC_LIMITS, From cc47b71f1dbae0dfa6bf90644dbfc73d3a2d82f1 Mon Sep 17 00:00:00 2001 From: Dan Smith Date: Thu, 27 Jun 2024 09:33:55 -0700 Subject: [PATCH 5/8] Add QED format detection to format_inspector This merely recognizes this format and always marks it as unsafe because no service supports it. This prevents someone from uploading one that we will ask qemu-img to inspect. Change-Id: Ieea7b7eb0f380571bd4937cded920776e05f7ec4 (cherry picked from commit 4096c5aff1d046d5c28d0e4a69b3c9574e9e8cc8) (cherry picked from commit ba98022b98ef5b9c98e1d7d20c88e1ca4b23fa80) (cherry picked from commit a8dadcd7994c88a61f94341286b4bcd693b51b32) --- glance/common/format_inspector.py | 18 ++++++++++++++++++ .../tests/unit/common/test_format_inspector.py | 6 ++++++ 2 files changed, 24 insertions(+) diff --git a/glance/common/format_inspector.py b/glance/common/format_inspector.py index 890f49dabc..a5734bfcf8 100755 --- a/glance/common/format_inspector.py +++ b/glance/common/format_inspector.py @@ -366,6 +366,23 @@ def safety_check(self): not self.has_unknown_features) +class QEDInspector(FileInspector): + def __init__(self, tracing=False): + super().__init__(tracing) + self.new_region('header', CaptureRegion(0, 512)) + + @property + def format_match(self): + if not self.region('header').complete: + return False + return self.region('header').data.startswith(b'QED\x00') + + def safety_check(self): + # QED format is not supported by anyone, but we want to detect it + # and mark it as just always unsafe. + return False + + # The VHD (or VPC as QEMU calls it) format consists of a big-endian # 512-byte "footer" at the beginning of the file with various # information, most of which does not matter to us: @@ -868,6 +885,7 @@ def get_inspector(format_name): 'vhdx': VHDXInspector, 'vmdk': VMDKInspector, 'vdi': VDIInspector, + 'qed': QEDInspector, } return formats.get(format_name) diff --git a/glance/tests/unit/common/test_format_inspector.py b/glance/tests/unit/common/test_format_inspector.py index cd200ca197..c35a1e5232 100644 --- a/glance/tests/unit/common/test_format_inspector.py +++ b/glance/tests/unit/common/test_format_inspector.py @@ -145,6 +145,12 @@ def test_from_file_reads_minimum(self): # a local file. self.assertLess(fmt.actual_size, file_size) + def test_qed_always_unsafe(self): + img = self._create_img('qed', 10 * units.Mi) + fmt = format_inspector.get_inspector('qed').from_file(img) + self.assertTrue(fmt.format_match) + self.assertFalse(fmt.safety_check()) + def test_qcow2_safety_checks(self): # Create backing and data-file names (and initialize the backing file) backing_fn = tempfile.mktemp(prefix='backing') From 867566ad52cb67c00833d0920bb92c313cd57880 Mon Sep 17 00:00:00 2001 From: Dan Smith Date: Thu, 18 Apr 2024 08:25:24 -0700 Subject: [PATCH 6/8] Add file format detection to format_inspector Change-Id: If0a4251465507be035ffaf9d855299611637cfa9 (cherry picked from commit 79271eaa5c742a1741321198c43807857fb6ed94) (cherry picked from commit e1c36248c7660dea1bedfa8f1c0711a4b97d279c) (cherry picked from commit d54121d6a937fd50aae1018aede228a3c0985dce) --- glance/common/format_inspector.py | 54 ++++++++++++++---- .../unit/common/test_format_inspector.py | 56 ------------------- 2 files changed, 43 insertions(+), 67 deletions(-) diff --git a/glance/common/format_inspector.py b/glance/common/format_inspector.py index a5734bfcf8..7fbf3d6502 100755 --- a/glance/common/format_inspector.py +++ b/glance/common/format_inspector.py @@ -872,20 +872,52 @@ def close(self): self._source.close() +ALL_FORMATS = { + 'raw': FileInspector, + 'qcow2': QcowInspector, + 'vhd': VHDInspector, + 'vhdx': VHDXInspector, + 'vmdk': VMDKInspector, + 'vdi': VDIInspector, + 'qed': QEDInspector, +} + + def get_inspector(format_name): """Returns a FormatInspector class based on the given name. :param format_name: The name of the disk_format (raw, qcow2, etc). :returns: A FormatInspector or None if unsupported. """ - formats = { - 'raw': FileInspector, - 'qcow2': QcowInspector, - 'vhd': VHDInspector, - 'vhdx': VHDXInspector, - 'vmdk': VMDKInspector, - 'vdi': VDIInspector, - 'qed': QEDInspector, - } - - return formats.get(format_name) + + return ALL_FORMATS.get(format_name) + + +def detect_file_format(filename): + """Attempts to detect the format of a file. + + This runs through a file one time, running all the known inspectors in + parallel. It stops reading the file once one of them matches or all of + them are sure they don't match. + + Returns the FileInspector that matched, if any. None if 'raw'. + """ + inspectors = {k: v() for k, v in ALL_FORMATS.items()} + with open(filename, 'rb') as f: + for chunk in chunked_reader(f): + for format, inspector in list(inspectors.items()): + try: + inspector.eat_chunk(chunk) + except ImageFormatError: + # No match, so stop considering this format + inspectors.pop(format) + continue + if (inspector.format_match and inspector.complete and + format != 'raw'): + # First complete match (other than raw) wins + return inspector + if all(i.complete for i in inspectors.values()): + # If all the inspectors are sure they are not a match, avoid + # reading to the end of the file to settle on 'raw'. + break + return inspectors['raw'] diff --git a/glance/tests/unit/common/test_format_inspector.py b/glance/tests/unit/common/test_format_inspector.py index c35a1e5232..491c5636c3 100644 --- a/glance/tests/unit/common/test_format_inspector.py +++ b/glance/tests/unit/common/test_format_inspector.py @@ -208,62 +208,6 @@ def test_qcow2_feature_flag_checks(self): data[0x4F] = 0x80 self.assertTrue(inspector.has_unknown_features) - def test_vmdk_safety_checks(self): - region = format_inspector.CaptureRegion(0, 0) - inspector = format_inspector.VMDKInspector() - inspector.new_region('descriptor', region) - - # This should be a legit VMDK descriptor which comments, blank lines, - # an extent, some ddb content, and some header values. - legit_desc = ['# This is a comment', - '', - ' ', - 'createType=monolithicSparse', - 'RW 1234 SPARSE "foo.vmdk"', - 'ddb.adapterType = "MFM', - '# EOF'] - region.data = ('\n'.join(legit_desc)).encode('ascii') - region.length = len(region.data) - self.assertTrue(inspector.safety_check()) - - # Any of these lines should trigger an error indicating that there is - # something in the descriptor we don't understand - bad_lines = [ - '#\U0001F4A9', - 'header Name=foo', - 'foo bar', - 'WR 123 SPARSE "foo.vmdk"', - ] - - for bad_line in bad_lines: - # Encode as UTF-8 purely so we can test that anything non-ASCII - # will trigger the decode check - region.data = bad_line.encode('utf-8') - region.length = len(region.data) - self.assertRaisesRegex(format_inspector.ImageFormatError, - 'Invalid VMDK descriptor', - inspector.safety_check) - - # Extents with slashes in the name fail the safety check - region.data = b'RW 123 SPARSE "/etc/shadow"' - region.length = len(region.data) - self.assertFalse(inspector.safety_check()) - - # A descriptor that specifies no extents fails the safety check - region.data = b'# Nothing' - region.length = len(region.data) - self.assertFalse(inspector.safety_check()) - - def test_vmdk_reject_footer(self): - data = struct.pack('<4sIIQQQQIQQ', b'KDMV', 3, 0, 0, 0, 0, 1, 0, 0, - format_inspector.VMDKInspector.GD_AT_END) - inspector = format_inspector.VMDKInspector() - inspector.region('header').data = data - inspector.region('header').length = len(data) - self.assertRaisesRegex(format_inspector.ImageFormatError, - 'footer', - inspector.post_process) - def test_vdi(self): self._test_format('vdi') From 11d3b65a482def98ae03e288eac1f982a7fb0f61 Mon Sep 17 00:00:00 2001 From: Dan Smith Date: Wed, 26 Jun 2024 08:41:02 -0700 Subject: [PATCH 7/8] Add safety check and detection support to FI tool This adds a safety check and detection mechanism to the tools/test_format_inspector.py utility for verifying those features outside of glance. Change-Id: I447e7e51315472f8fa6013d4c4852f54c1e0c43d (cherry picked from commit b27040b87e43ff4acfb6870ef0d13d54e5ca5caa) (cherry picked from commit b394ef00c3426771092d099cf1d96077bfa4b919) (cherry picked from commit aa12d39b453068d9ee8367591eb60d4a15cddece) --- tools/test_format_inspector.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tools/test_format_inspector.py b/tools/test_format_inspector.py index aa554386ef..63e23210cc 100755 --- a/tools/test_format_inspector.py +++ b/tools/test_format_inspector.py @@ -102,6 +102,13 @@ def main(): else: print('Confirmed size with qemu-img') + print('Image safety check: %s' % ( + fmt.safety_check() and 'passed' or 'FAILED')) + if args.input: + detected_fmt = format_inspector.detect_file_format(args.input) + print('Detected inspector for image as: %s' % ( + detected_fmt.__class__.__name__)) + if __name__ == '__main__': sys.exit(main()) From 543b1f39066c210cf0e9f6966b691fc79212ea99 Mon Sep 17 00:00:00 2001 From: Mark Goddard Date: Tue, 2 Jul 2024 11:53:01 +0100 Subject: [PATCH 8/8] Backport fix ups for Yoga --- glance/async_/flows/plugins/image_conversion.py | 5 ++++- glance/tests/unit/common/test_format_inspector.py | 2 +- glance/tests/unit/v2/test_tasks_resource.py | 4 ++-- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/glance/async_/flows/plugins/image_conversion.py b/glance/async_/flows/plugins/image_conversion.py index 90a846ace7..bd0b7993d4 100644 --- a/glance/async_/flows/plugins/image_conversion.py +++ b/glance/async_/flows/plugins/image_conversion.py @@ -89,7 +89,10 @@ def _execute(self, action, file_path, **kwargs): 'target': target_format} self.dest_path = dest_path - source_format = action.image_disk_format + # Backport fixup due to lack of + # Ic51c5fd87caf04d38aeaf758ad2d0e2f28098e4d in Yoga: + #source_format = action.image_disk_format + source_format = action._image.disk_format inspector_cls = format_inspector.get_inspector(source_format) if not inspector_cls: # We cannot convert from disk_format types that qemu-img doesn't diff --git a/glance/tests/unit/common/test_format_inspector.py b/glance/tests/unit/common/test_format_inspector.py index 491c5636c3..4c503f3303 100644 --- a/glance/tests/unit/common/test_format_inspector.py +++ b/glance/tests/unit/common/test_format_inspector.py @@ -76,7 +76,7 @@ def _create_img(self, fmt, size, subformat=None, options=None, suffix='.%s' % fmt) self._created_files.append(fn) subprocess.check_output( - 'qemu-img create -f %s %s %i' % (fmt, fn, size), + 'qemu-img create -f %s %s %s %i' % (fmt, opt, fn, size), shell=True) return fn diff --git a/glance/tests/unit/v2/test_tasks_resource.py b/glance/tests/unit/v2/test_tasks_resource.py index 754612b11f..15562724b7 100644 --- a/glance/tests/unit/v2/test_tasks_resource.py +++ b/glance/tests/unit/v2/test_tasks_resource.py @@ -415,8 +415,8 @@ def test_create_with_wrong_import_form(self): "non-local source of image data.") else: supported = ['http', ] - msg = ("The given uri is not valid. Please specify a " - "valid uri from the following list of supported uri " + msg = ("The given URI is not valid. Please specify a " + "valid URI from the following list of supported URI " "%(supported)s") % {'supported': supported} self.assertEqual(msg, final_task.message)