diff --git a/CHANGELOG.md b/CHANGELOG.md index 132b7c68..0c627a3d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,12 @@ # `indexed_gzip` changelog +## 1.9.3 (November 27th 2024) + + +* Expanded exception preservation to more scenarios (#161, #162). + + ## 1.9.2 (November 18th 2024) diff --git a/indexed_gzip/__init__.py b/indexed_gzip/__init__.py index c79fb998..ceba6741 100644 --- a/indexed_gzip/__init__.py +++ b/indexed_gzip/__init__.py @@ -19,4 +19,4 @@ """ -__version__ = '1.9.2' +__version__ = '1.9.3' diff --git a/indexed_gzip/indexed_gzip.pyx b/indexed_gzip/indexed_gzip.pyx index 1b0e9e06..30b76c7f 100644 --- a/indexed_gzip/indexed_gzip.pyx +++ b/indexed_gzip/indexed_gzip.pyx @@ -469,8 +469,9 @@ cdef class _IndexedGzipFile: window_size=window_size, readbuf_size=readbuf_size, flags=flags): + exc = get_python_exception() raise ZranError('zran_init returned error (file: ' - '{})'.format(self.errname)) + '{})'.format(self.errname)) from exc log.debug('%s.__init__(%s, %s, %s, %s, %s, %s, %s)', type(self).__name__, @@ -486,6 +487,7 @@ cdef class _IndexedGzipFile: self.import_index(index_file) + @contextlib.contextmanager def __file_handle(self): """This method is used as a context manager whenever access to the underlying file stream is required. It makes sure that ``index.fd`` @@ -493,38 +495,28 @@ cdef class _IndexedGzipFile: necessary (depending on the value of :attr:`drop_handles`). """ - # Errors occur with Python 2.7 and - # Cython < 0.26 when decorating - # cdef-class methods. This workaround - # can be removed when you are happy - # dropping support for cython < 0.26. - @contextlib.contextmanager - def proxy(): - - # If a file handle already exists, - # return it. This clause makes this - # context manager reentrant. - if self.index.fd is not NULL: - yield + # If a file handle already exists, + # return it. This clause makes this + # context manager reentrant. + if self.index.fd is not NULL: + yield - # If a file-like object exists (without an associated - # file descriptor, since self.index.fd is NULL), - # also return it. - elif self.pyfid is not None: - yield + # If a file-like object exists (without an associated + # file descriptor, since self.index.fd is NULL), + # also return it. + elif self.pyfid is not None: + yield - # otherwise we open a new - # file handle on each access - else: - try: - self.index.fd = fopen(self.filename.encode(), 'rb') - yield - - finally: - fclose(self.index.fd) - self.index.fd = NULL + # otherwise we open a new + # file handle on each access + else: + try: + self.index.fd = fopen(self.filename.encode(), 'rb') + yield - return proxy() + finally: + fclose(self.index.fd) + self.index.fd = NULL def seek_points(self): @@ -667,8 +659,10 @@ cdef class _IndexedGzipFile: ret = zran.zran_build_index(&self.index, 0, 0) if ret != zran.ZRAN_BUILD_INDEX_OK: + exc = get_python_exception() raise ZranError('zran_build_index returned error: {} (file: {})' - .format(ZRAN_ERRORS.ZRAN_BUILD[ret], self.errname)) + .format(ZRAN_ERRORS.ZRAN_BUILD[ret], + self.errname)) from exc log.debug('%s.build_full_index()', type(self).__name__) @@ -716,8 +710,10 @@ cdef class _IndexedGzipFile: '{})'.format(self.errname)) elif ret not in (zran.ZRAN_SEEK_OK, zran.ZRAN_SEEK_EOF): + exc = get_python_exception() raise ZranError('zran_seek returned error: {} (file: {})' - .format(ZRAN_ERRORS.ZRAN_SEEK[ret], self.errname)) + .format(ZRAN_ERRORS.ZRAN_SEEK[ret], + self.errname)) from exc offset = self.tell() @@ -826,6 +822,9 @@ cdef class _IndexedGzipFile: cdef void *vbuf cdef int64_t ret + # reference to any exceptions that are raised + exc = None + # Create a Py_Buffer which allows # us to access the memory managed # by the provided buf @@ -838,6 +837,9 @@ cdef class _IndexedGzipFile: with self.__file_handle(): with nogil: ret = zran.zran_read(index, vbuf, bufsz) + # Something in __file_handle seems to clear + # the exception state, so we look now in + # case an exception has occurred exc = get_python_exception() # release the py_buffer @@ -847,7 +849,8 @@ cdef class _IndexedGzipFile: # see how the read went if ret == zran.ZRAN_READ_FAIL: raise ZranError('zran_read returned error: {} (file: {})' - .format(ZRAN_ERRORS.ZRAN_READ[ret], self.errname)) from exc + .format(ZRAN_ERRORS.ZRAN_READ[ret], + self.errname)) from exc # This will happen if the current # seek point is not covered by the @@ -1020,9 +1023,10 @@ cdef class _IndexedGzipFile: fd = NULL ret = zran.zran_export_index(&self.index, fd, fileobj) if ret != zran.ZRAN_EXPORT_OK: + exc = get_python_exception() raise ZranError('export_index returned error: {} (file: ' '{})'.format(ZRAN_ERRORS.ZRAN_EXPORT[ret], - self.errname)) + self.errname)) from exc finally: if close_file: @@ -1070,9 +1074,10 @@ cdef class _IndexedGzipFile: fd = NULL ret = zran.zran_import_index(&self.index, fd, fileobj) if ret != zran.ZRAN_IMPORT_OK: + exc = get_python_exception() raise ZranError('import_index returned error: {} (file: ' '{})'.format(ZRAN_ERRORS.ZRAN_IMPORT[ret], - self.errname)) + self.errname)) from exc self.skip_crc_check = True diff --git a/indexed_gzip/tests/__init__.py b/indexed_gzip/tests/__init__.py index 2ccb0acb..623d303a 100644 --- a/indexed_gzip/tests/__init__.py +++ b/indexed_gzip/tests/__init__.py @@ -114,8 +114,11 @@ def compress_with_gzip_command(): chunk = inf.read(buflen) # Use python gzip module on windows, - # can't assume gzip exists - if sys.platform.startswith("win"): + # as we can't assume the gzip command + # exists. + onwin = sys.platform.startswith("win") + + if onwin: target = compress_with_gzip_module # If not windows, assume that gzip command @@ -124,9 +127,16 @@ def compress_with_gzip_command(): else: target = compress_with_gzip_command - cmpThread = threading.Thread(target=target) - cmpThread.start() - poll(lambda : not cmpThread.is_alive()) + # Some kind of corruption also seems to + # occur on windows+free-threaded builds, + # so don't thread on windows. Threading + # here is just for progress reporting. + if onwin: + target() + else: + cmpThread = threading.Thread(target=target) + cmpThread.start() + poll(lambda : not cmpThread.is_alive()) def compress_inmem(data, concat): diff --git a/indexed_gzip/tests/test_indexed_gzip.py b/indexed_gzip/tests/test_indexed_gzip.py index 7d1c2928..2468af70 100644 --- a/indexed_gzip/tests/test_indexed_gzip.py +++ b/indexed_gzip/tests/test_indexed_gzip.py @@ -6,6 +6,7 @@ from __future__ import print_function +import gc import os import os.path as op import itertools as it @@ -509,8 +510,7 @@ def test_read_beyond_end(concat, drop): os.remove(testfile) -@pytest.mark.parametrize('use_readinto', [False, True]) -def test_read_exception(testfile, nelems, use_readinto): +def test_exception_preserved(testfile, nelems): """When wrapping a python file object, if it raises an exception it should be preserved. """ @@ -519,47 +519,119 @@ def test_read_exception(testfile, nelems, use_readinto): # because it's read-only, so skip it. return - f = None - gzf = None - - MY_ERROR = "This error should be preserved" - - # We'll use a weakref to check that we are handling reference counting - # correctly, and you cannot weakref an Exception, so we need a subclass. + # We'll use a weakref to check that we are handling + # reference counting correctly, and you cannot + # weakref an Exception, so we need a subclass. class MyException(Exception): pass - my_err_weak = [None] - def my_error_fn(*args, **kwargs): - err = MyException(MY_ERROR) - my_err_weak[0] = weakref.ref(err) + + # Function which raises an exception. This is + # is patched into the python file-like to test + # that the igzip object preserves the exception. + # We store a weakref to each exception to check + # that no references to them are kept. + all_errors = [] + error_msg = "This error should be preserved" + + def error_fn(name, *args, **kwargs): + err = MyException(error_msg + ": " + name) + all_errors.append(weakref.ref(err)) raise err - try: - with open(testfile, 'rb') as f2: - f = BytesIO(f2.read()) - gzf = igzip._IndexedGzipFile(fileobj=f) - f.read = my_error_fn + # Below we check that each of these scenarios + # raises an appropriate exception + def test_create(pyf): + pyf.seek = ft.partial(error_fn, "test_create") + gzf = igzip.IndexedGzipFile(fileobj=pyf) + return gzf + + def test_build_full_index(pyf): + pyf.read = ft.partial(error_fn, "test_build_full_index") + pyf.seek = ft.partial(error_fn, "test_build_full_index") + gzf = igzip.IndexedGzipFile(fileobj=pyf) + gzf.build_full_index() + return gzf + + def test_seek(pyf): + pyf.seek = ft.partial(error_fn, "test_seek") + gzf = igzip.IndexedGzipFile(fileobj=pyf) + gzf.seek(5) + return gzf + + def test_read(pyf): + # Test _IndexedGzipFile.read, as the buffered + # IndexedGzipFile.read method uses readinto + pyf.read = ft.partial(error_fn, "test_read") + gzf = igzip._IndexedGzipFile(fileobj=pyf) + gzf.read(1) + return gzf + + def test_readinto(pyf): + pyf.read = ft.partial(error_fn, "test_readinto") + gzf = igzip.IndexedGzipFile(fileobj=pyf) + ba = bytearray(1) + gzf.readinto(ba) + return gzf + + def test_export_index(pyf): + gzf = igzip.IndexedGzipFile(fileobj=pyf) + gzf.build_full_index() + idxf = BytesIO() + idxf.write = ft.partial(error_fn, "test_export_index") + gzf.export_index(fileobj=idxf) + return gzf + + def test_import_index(pyf): + gzf = igzip.IndexedGzipFile(fileobj=pyf) + idxf = BytesIO() + gzf.build_full_index() + gzf.export_index(fileobj=idxf) + gzf.close() + del gzf + gzf = igzip.IndexedGzipFile(fileobj=pyf) + idxf.read = ft.partial(error_fn, "test_import_index") + gzf.import_index(fileobj=idxf) + return gzf + + tests = [test_create, test_build_full_index, test_seek, + test_read, test_readinto, test_export_index, + test_import_index] + + # Call the given function, making sure that + # the exception originating from the python + # file-like is preserved + def check_error_preserved(func): + pyf = None + gzf = None + with open(testfile, 'rb') as f: + pyf = BytesIO(f.read()) try: - if use_readinto: - ba = bytearray(1) - gzf.readinto(ba) - else: - gzf.read(1) + gzf = func(pyf) except Exception as e: - assert MY_ERROR in str(e) or MY_ERROR in str(e.__cause__), "Exception was not preserved; got {}".format(e) + fname = func.__name__ + estr = str(e) + " " + str(e.__cause__) + assert error_msg in estr, \ + "Exception was not preserved; got {}".format(estr) + assert fname in estr, \ + "Exception didn't come from {}; got {}".format(fname, estr) del e else: assert False, "Expected an exception to be raised" - - finally: if gzf is not None: gzf.close() - if f is not None: f .close() - del f + if pyf is not None: pyf.close() del gzf - assert (my_err_weak[0] is None or my_err_weak[0]() is None, - "Exception was not garbage collected") + del pyf + + # Run the tests + for test in tests: + check_error_preserved(test) + # Make sure there are no dangling + # references to any exception objects + gc.collect() + for e in all_errors: + assert (e() is None), "Exception was not garbage collected" def test_seek(concat): with tempdir() as tdir: diff --git a/indexed_gzip/zran_file_util.c b/indexed_gzip/zran_file_util.c index cb01dde1..9a6dde6e 100644 --- a/indexed_gzip/zran_file_util.c +++ b/indexed_gzip/zran_file_util.c @@ -194,7 +194,8 @@ size_t _fwrite_python(const void *ptr, goto fail; if ((data = PyObject_CallMethod(f, "write", "(O)", input)) == NULL) goto fail; - + if (PyErr_Occurred()) + goto fail; #if PY_MAJOR_VERSION >= 3 if ((len = PyLong_AsLong(data)) == -1 && PyErr_Occurred()) goto fail;