diff --git a/bindep.txt b/bindep.txt index 3a4d7bef806..f774113bc26 100644 --- a/bindep.txt +++ b/bindep.txt @@ -55,3 +55,5 @@ pcre-devel [platform:rpm test] # runtime and in unit tests. Net result is the same that lsscsi will be # installed for any nova installation. lsscsi +# NOTE(priteau): Install qemu-img for format inspector tests +qemu-utils [platform:dpkg test] diff --git a/nova/image/format_inspector.py b/nova/image/format_inspector.py index 8e57d7ed2c4..49cb75930a9 100644 --- a/nova/image/format_inspector.py +++ b/nova/image/format_inspector.py @@ -24,6 +24,7 @@ import struct from oslo_log import log as logging +from oslo_utils import units LOG = logging.getLogger(__name__) @@ -843,6 +844,93 @@ def __str__(self): return 'vdi' +class ISOInspector(FileInspector): + """ISO 9660 and UDF format + + we need to check the first 32KB + descriptor size + to look for the ISO 9660 or UDF signature. + + http://wiki.osdev.org/ISO_9660 + http://wiki.osdev.org/UDF + mkisofs --help | grep udf + + The Universal Disc Format or UDF is the filesystem used on DVDs and + Blu-Ray discs.UDF is an extension of ISO 9660 and shares the same + header structure and initial layout. + + Like the CDFS(ISO 9660) file system, + the UDF file system uses a 2048 byte sector size, + and it designates that the first 16 sectors can be used by the OS + to store proprietary data or boot logic. + + That means we need to check the first 32KB + descriptor size + to look for the ISO 9660 or UDF signature. + both formats have an extent based layout, so we can't determine + ahead of time where the descriptor will be located. + + fortunately, the ISO 9660 and UDF formats have a Primary Volume Descriptor + located at the beginning of the image, which contains the volume size. + + """ + + def __init__(self, *a, **k): + super(ISOInspector, self).__init__(*a, **k) + self.new_region('system_area', CaptureRegion(0, 32 * units.Ki)) + self.new_region('header', CaptureRegion(32 * units.Ki, 2 * units.Ki)) + + @property + def format_match(self): + if not self.complete: + return False + signature = self.region('header').data[1:6] + assert len(signature) == 5 + return signature in (b'CD001', b'NSR02', b'NSR03') + + @property + def virtual_size(self): + if not self.complete: + return 0 + if not self.format_match: + return 0 + + # the header size is 2KB or 1 sector + # the first header field is the descriptor type which is 1 byte + # the second field is the standard identifier which is 5 bytes + # the third field is the version which is 1 byte + # the rest of the header contains type specific data is 2041 bytes + # see http://wiki.osdev.org/ISO_9660#The_Primary_Volume_Descriptor + + # we need to check that the descriptor type is 1 + # to ensure that this is a primary volume descriptor + descriptor_type = self.region('header').data[0] + if descriptor_type != 1: + return 0 + # The size in bytes of a logical block is stored at offset 128 + # and is 2 bytes long encoded in both little and big endian + # int16_LSB-MSB so the field is 4 bytes long + logical_block_size_data = self.region('header').data[128:132] + assert len(logical_block_size_data) == 4 + # given the encoding we only need to read half the field so we + # can use the first 2 bytes which are the little endian part + # this is normally 2048 or 2KB but we need to check as it can be + # different according to the ISO 9660 standard. + logical_block_size, = struct.unpack(' 1: + all_formats = [str(inspector) for inspector in detections] + raise ImageFormatError( + 'Multiple formats detected: %s' % ', '.join(all_formats)) + + return inspectors['raw'] if not detections else detections[0] diff --git a/nova/tests/unit/image/test_format_inspector.py b/nova/tests/unit/image/test_format_inspector.py new file mode 100644 index 00000000000..8406dfca378 --- /dev/null +++ b/nova/tests/unit/image/test_format_inspector.py @@ -0,0 +1,660 @@ +# Copyright 2020 Red Hat, Inc +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import io +import os +import re +import struct +import subprocess +import tempfile +from unittest import mock + +from oslo_utils import units + +from nova.image import format_inspector +from nova import test + + +TEST_IMAGE_PREFIX = 'nova-unittest-formatinspector-' + + +def get_size_from_qemu_img(filename): + output = subprocess.check_output('qemu-img info "%s"' % filename, + shell=True) + for line in output.split(b'\n'): + m = re.search(b'^virtual size: .* .([0-9]+) bytes', line.strip()) + if m: + return int(m.group(1)) + + raise Exception('Could not find virtual size with qemu-img') + + +class TestFormatInspectors(test.NoDBTestCase): + def setUp(self): + super(TestFormatInspectors, self).setUp() + self._created_files = [] + + def tearDown(self): + super(TestFormatInspectors, self).tearDown() + for fn in self._created_files: + try: + os.remove(fn) + except Exception: + pass + + def _create_iso(self, image_size, subformat='9660'): + """Create an ISO file of the given size. + + :param image_size: The size of the image to create in bytes + :param subformat: The subformat to use, if any + """ + + # these tests depend on mkisofs + # being installed and in the path, + # if it is not installed, skip + try: + subprocess.check_output('mkisofs --version', shell=True) + except Exception: + self.skipTest('mkisofs not installed') + + size = image_size // units.Mi + base_cmd = "mkisofs" + if subformat == 'udf': + # depending on the distribution mkisofs may not support udf + # and may be provided by genisoimage instead. As a result we + # need to check if the command supports udf via help + # instead of checking the installed version. + # mkisofs --help outputs to stderr so we need to + # redirect it to stdout to use grep. + try: + subprocess.check_output( + 'mkisofs --help 2>&1 | grep udf', shell=True) + except Exception: + self.skipTest('mkisofs does not support udf format') + base_cmd += " -udf" + prefix = TEST_IMAGE_PREFIX + prefix += '-%s-' % subformat + fn = tempfile.mktemp(prefix=prefix, suffix='.iso') + self._created_files.append(fn) + subprocess.check_output( + 'dd if=/dev/zero of=%s bs=1M count=%i' % (fn, size), + shell=True) + # We need to use different file as input and output as the behavior + # of mkisofs is version dependent if both the input and the output + # are the same and can cause test failures + out_fn = "%s.iso" % fn + subprocess.check_output( + '%s -V "TEST" -o %s %s' % (base_cmd, out_fn, fn), + shell=True) + self._created_files.append(out_fn) + return out_fn + + def _create_img( + self, fmt, size, subformat=None, options=None, + backing_file=None): + """Create an image file of the given format and size. + + :param fmt: The format to create + :param size: The size of the image to create in bytes + :param subformat: The subformat to use, if any + :param options: A dictionary of options to pass to the format + :param backing_file: The backing file to use, if any + """ + + if fmt == 'iso': + return self._create_iso(size, subformat) + + if fmt == 'vhd': + # QEMU calls the vhd format vpc + fmt = 'vpc' + + # these tests depend on qemu-img being installed and in the path, + # if it is not installed, skip. we also need to ensure that the + # format is supported by qemu-img, this can vary depending on the + # distribution so we need to check if the format is supported via + # the help output. + try: + subprocess.check_output( + 'qemu-img --help | grep %s' % fmt, shell=True) + except Exception: + self.skipTest( + 'qemu-img not installed or does not support %s format' % fmt) + + if options is None: + options = {} + opt = '' + prefix = TEST_IMAGE_PREFIX + + if subformat: + options['subformat'] = subformat + prefix += subformat + '-' + + if options: + opt += '-o ' + ','.join('%s=%s' % (k, v) + for k, v in options.items()) + + if backing_file is not None: + opt += ' -b %s -F raw' % backing_file + + fn = tempfile.mktemp(prefix=prefix, + suffix='.%s' % fmt) + self._created_files.append(fn) + subprocess.check_output( + 'qemu-img create -f %s %s %s %i' % (fmt, opt, fn, size), + shell=True) + return fn + + def _create_allocated_vmdk(self, size_mb, subformat=None): + # We need a "big" VMDK file to exercise some parts of the code of the + # format_inspector. A way to create one is to first create an empty + # file, and then to convert it with the -S 0 option. + + if subformat is None: + # Matches qemu-img default, see `qemu-img convert -O vmdk -o help` + subformat = 'monolithicSparse' + + prefix = TEST_IMAGE_PREFIX + prefix += '-%s-' % subformat + fn = tempfile.mktemp(prefix=prefix, suffix='.vmdk') + self._created_files.append(fn) + raw = tempfile.mktemp(prefix=prefix, suffix='.raw') + self._created_files.append(raw) + + # Create a file with pseudo-random data, otherwise it will get + # compressed in the streamOptimized format + subprocess.check_output( + 'dd if=/dev/urandom of=%s bs=1M count=%i' % (raw, size_mb), + shell=True) + + # Convert it to VMDK + subprocess.check_output( + 'qemu-img convert -f raw -O vmdk -o subformat=%s -S 0 %s %s' % ( + subformat, raw, fn), + shell=True) + return fn + + def _test_format_at_block_size(self, format_name, img, block_size): + fmt = format_inspector.get_inspector(format_name)() + self.assertIsNotNone(fmt, + 'Did not get format inspector for %s' % ( + format_name)) + wrapper = format_inspector.InfoWrapper(open(img, 'rb'), fmt) + + while True: + chunk = wrapper.read(block_size) + if not chunk: + break + + wrapper.close() + return fmt + + def _test_format_at_image_size(self, format_name, image_size, + subformat=None): + """Test the format inspector for the given format at the + given image size. + + :param format_name: The format to test + :param image_size: The size of the image to create in bytes + :param subformat: The subformat to use, if any + """ + img = self._create_img(format_name, image_size, subformat=subformat) + + # Some formats have internal alignment restrictions making this not + # always exactly like image_size, so get the real value for comparison + virtual_size = get_size_from_qemu_img(img) + + # Read the format in various sizes, some of which will read whole + # sections in a single read, others will be completely unaligned, etc. + block_sizes = [64 * units.Ki, 1 * units.Mi] + # ISO images have a 32KB system area at the beginning of the image + # as a result reading that in 17 or 512 byte blocks takes too long, + # causing the test to fail. The 64KiB block size is enough to read + # the system area and header in a single read. the 1MiB block size + # adds very little time to the test so we include it. + if format_name != 'iso': + block_sizes.extend([17, 512]) + for block_size in block_sizes: + fmt = self._test_format_at_block_size(format_name, img, block_size) + self.assertTrue(fmt.format_match, + 'Failed to match %s at size %i block %i' % ( + format_name, image_size, block_size)) + self.assertEqual(virtual_size, fmt.virtual_size, + ('Failed to calculate size for %s at size %i ' + 'block %i') % (format_name, image_size, + block_size)) + memory = sum(fmt.context_info.values()) + self.assertLess(memory, 512 * units.Ki, + 'Format used more than 512KiB of memory: %s' % ( + fmt.context_info)) + + def _test_format(self, format_name, subformat=None): + # Try a few different image sizes, including some odd and very small + # sizes + for image_size in (512, 513, 2057, 7): + self._test_format_at_image_size(format_name, image_size * units.Mi, + subformat=subformat) + + def test_qcow2(self): + self._test_format('qcow2') + + def test_iso_9660(self): + self._test_format('iso', subformat='9660') + + def test_iso_udf(self): + self._test_format('iso', subformat='udf') + + def _generate_bad_iso(self): + # we want to emulate a malicious user who uploads a an + # ISO file has a qcow2 header in the system area + # of the ISO file + # we will create a qcow2 image and an ISO file + # and then copy the qcow2 header to the ISO file + # e.g. + # mkisofs -o orig.iso /etc/resolv.conf + # qemu-img create orig.qcow2 -f qcow2 64M + # dd if=orig.qcow2 of=outcome bs=32K count=1 + # dd if=orig.iso of=outcome bs=32K skip=1 seek=1 + + qcow = self._create_img('qcow2', 10 * units.Mi) + iso = self._create_iso(64 * units.Mi, subformat='9660') + # first ensure the files are valid + iso_fmt = self._test_format_at_block_size('iso', iso, 4 * units.Ki) + self.assertTrue(iso_fmt.format_match) + qcow_fmt = self._test_format_at_block_size('qcow2', qcow, 4 * units.Ki) + self.assertTrue(qcow_fmt.format_match) + # now copy the qcow2 header to an ISO file + prefix = TEST_IMAGE_PREFIX + prefix += '-bad-' + fn = tempfile.mktemp(prefix=prefix, suffix='.iso') + self._created_files.append(fn) + subprocess.check_output( + 'dd if=%s of=%s bs=32K count=1' % (qcow, fn), + shell=True) + subprocess.check_output( + 'dd if=%s of=%s bs=32K skip=1 seek=1' % (iso, fn), + shell=True) + return qcow, iso, fn + + def test_bad_iso_qcow2(self): + + _, _, fn = self._generate_bad_iso() + + iso_check = self._test_format_at_block_size('iso', fn, 4 * units.Ki) + qcow_check = self._test_format_at_block_size('qcow2', fn, 4 * units.Ki) + # this system area of the ISO file is not considered part of the format + # the qcow2 header is in the system area of the ISO file + # so the ISO file is still valid + self.assertTrue(iso_check.format_match) + # the qcow2 header is in the system area of the ISO file + # but that will be parsed by the qcow2 format inspector + # and it will match + self.assertTrue(qcow_check.format_match) + # if we call format_inspector.detect_file_format it should detect + # and raise an exception because both match internally. + e = self.assertRaises( + format_inspector.ImageFormatError, + format_inspector.detect_file_format, fn) + self.assertIn('Multiple formats detected', str(e)) + + def test_vhd(self): + self._test_format('vhd') + + def test_vhdx(self): + self._test_format('vhdx') + + def test_vmdk(self): + self._test_format('vmdk') + + def test_vmdk_stream_optimized(self): + self._test_format('vmdk', 'streamOptimized') + + def test_from_file_reads_minimum(self): + img = self._create_img('qcow2', 10 * units.Mi) + file_size = os.stat(img).st_size + fmt = format_inspector.QcowInspector.from_file(img) + # We know everything we need from the first 512 bytes of a QCOW image, + # so make sure that we did not read the whole thing when we inspect + # a local file. + self.assertLess(fmt.actual_size, file_size) + + def test_qed_always_unsafe(self): + img = self._create_img('qed', 10 * units.Mi) + fmt = format_inspector.get_inspector('qed').from_file(img) + self.assertTrue(fmt.format_match) + self.assertFalse(fmt.safety_check()) + + def _test_vmdk_bad_descriptor_offset(self, subformat=None): + format_name = 'vmdk' + image_size = 10 * units.Mi + descriptorOffsetAddr = 0x1c + BAD_ADDRESS = 0x400 + img = self._create_img(format_name, image_size, subformat=subformat) + + # Corrupt the header + fd = open(img, 'r+b') + fd.seek(descriptorOffsetAddr) + fd.write(struct.pack('