Skip to content

Commit

Permalink
Merge pull request #162 from pauldmccarthy/mnt/propagate-exceptions-e…
Browse files Browse the repository at this point in the history
…verywhere

MNT: Expand exception preservation to more scenarios
  • Loading branch information
pauldmccarthy authored Nov 26, 2024
2 parents adef89d + 7dc92f7 commit 71a3a79
Show file tree
Hide file tree
Showing 6 changed files with 166 additions and 72 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
# `indexed_gzip` changelog


## 1.9.3 (November 27th 2024)


* Expanded exception preservation to more scenarios (#161, #162).


## 1.9.2 (November 18th 2024)


Expand Down
2 changes: 1 addition & 1 deletion indexed_gzip/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,4 @@
"""


__version__ = '1.9.2'
__version__ = '1.9.3'
75 changes: 40 additions & 35 deletions indexed_gzip/indexed_gzip.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -469,8 +469,9 @@ cdef class _IndexedGzipFile:
window_size=window_size,
readbuf_size=readbuf_size,
flags=flags):
exc = get_python_exception()
raise ZranError('zran_init returned error (file: '
'{})'.format(self.errname))
'{})'.format(self.errname)) from exc

log.debug('%s.__init__(%s, %s, %s, %s, %s, %s, %s)',
type(self).__name__,
Expand All @@ -486,45 +487,36 @@ cdef class _IndexedGzipFile:
self.import_index(index_file)


@contextlib.contextmanager
def __file_handle(self):
"""This method is used as a context manager whenever access to the
underlying file stream is required. It makes sure that ``index.fd``
field is set appropriately, opening/closing the file handle as
necessary (depending on the value of :attr:`drop_handles`).
"""

# Errors occur with Python 2.7 and
# Cython < 0.26 when decorating
# cdef-class methods. This workaround
# can be removed when you are happy
# dropping support for cython < 0.26.
@contextlib.contextmanager
def proxy():

# If a file handle already exists,
# return it. This clause makes this
# context manager reentrant.
if self.index.fd is not NULL:
yield
# If a file handle already exists,
# return it. This clause makes this
# context manager reentrant.
if self.index.fd is not NULL:
yield

# If a file-like object exists (without an associated
# file descriptor, since self.index.fd is NULL),
# also return it.
elif self.pyfid is not None:
yield
# If a file-like object exists (without an associated
# file descriptor, since self.index.fd is NULL),
# also return it.
elif self.pyfid is not None:
yield

# otherwise we open a new
# file handle on each access
else:
try:
self.index.fd = fopen(self.filename.encode(), 'rb')
yield

finally:
fclose(self.index.fd)
self.index.fd = NULL
# otherwise we open a new
# file handle on each access
else:
try:
self.index.fd = fopen(self.filename.encode(), 'rb')
yield

return proxy()
finally:
fclose(self.index.fd)
self.index.fd = NULL


def seek_points(self):
Expand Down Expand Up @@ -667,8 +659,10 @@ cdef class _IndexedGzipFile:
ret = zran.zran_build_index(&self.index, 0, 0)

if ret != zran.ZRAN_BUILD_INDEX_OK:
exc = get_python_exception()
raise ZranError('zran_build_index returned error: {} (file: {})'
.format(ZRAN_ERRORS.ZRAN_BUILD[ret], self.errname))
.format(ZRAN_ERRORS.ZRAN_BUILD[ret],
self.errname)) from exc

log.debug('%s.build_full_index()', type(self).__name__)

Expand Down Expand Up @@ -716,8 +710,10 @@ cdef class _IndexedGzipFile:
'{})'.format(self.errname))

elif ret not in (zran.ZRAN_SEEK_OK, zran.ZRAN_SEEK_EOF):
exc = get_python_exception()
raise ZranError('zran_seek returned error: {} (file: {})'
.format(ZRAN_ERRORS.ZRAN_SEEK[ret], self.errname))
.format(ZRAN_ERRORS.ZRAN_SEEK[ret],
self.errname)) from exc

offset = self.tell()

Expand Down Expand Up @@ -826,6 +822,9 @@ cdef class _IndexedGzipFile:
cdef void *vbuf
cdef int64_t ret

# reference to any exceptions that are raised
exc = None

# Create a Py_Buffer which allows
# us to access the memory managed
# by the provided buf
Expand All @@ -838,6 +837,9 @@ cdef class _IndexedGzipFile:
with self.__file_handle():
with nogil:
ret = zran.zran_read(index, vbuf, bufsz)
# Something in __file_handle seems to clear
# the exception state, so we look now in
# case an exception has occurred
exc = get_python_exception()

# release the py_buffer
Expand All @@ -847,7 +849,8 @@ cdef class _IndexedGzipFile:
# see how the read went
if ret == zran.ZRAN_READ_FAIL:
raise ZranError('zran_read returned error: {} (file: {})'
.format(ZRAN_ERRORS.ZRAN_READ[ret], self.errname)) from exc
.format(ZRAN_ERRORS.ZRAN_READ[ret],
self.errname)) from exc

# This will happen if the current
# seek point is not covered by the
Expand Down Expand Up @@ -1020,9 +1023,10 @@ cdef class _IndexedGzipFile:
fd = NULL
ret = zran.zran_export_index(&self.index, fd, <PyObject*>fileobj)
if ret != zran.ZRAN_EXPORT_OK:
exc = get_python_exception()
raise ZranError('export_index returned error: {} (file: '
'{})'.format(ZRAN_ERRORS.ZRAN_EXPORT[ret],
self.errname))
self.errname)) from exc

finally:
if close_file:
Expand Down Expand Up @@ -1070,9 +1074,10 @@ cdef class _IndexedGzipFile:
fd = NULL
ret = zran.zran_import_index(&self.index, fd, <PyObject*>fileobj)
if ret != zran.ZRAN_IMPORT_OK:
exc = get_python_exception()
raise ZranError('import_index returned error: {} (file: '
'{})'.format(ZRAN_ERRORS.ZRAN_IMPORT[ret],
self.errname))
self.errname)) from exc

self.skip_crc_check = True

Expand Down
20 changes: 15 additions & 5 deletions indexed_gzip/tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,8 +114,11 @@ def compress_with_gzip_command():
chunk = inf.read(buflen)

# Use python gzip module on windows,
# can't assume gzip exists
if sys.platform.startswith("win"):
# as we can't assume the gzip command
# exists.
onwin = sys.platform.startswith("win")

if onwin:
target = compress_with_gzip_module

# If not windows, assume that gzip command
Expand All @@ -124,9 +127,16 @@ def compress_with_gzip_command():
else:
target = compress_with_gzip_command

cmpThread = threading.Thread(target=target)
cmpThread.start()
poll(lambda : not cmpThread.is_alive())
# Some kind of corruption also seems to
# occur on windows+free-threaded builds,
# so don't thread on windows. Threading
# here is just for progress reporting.
if onwin:
target()
else:
cmpThread = threading.Thread(target=target)
cmpThread.start()
poll(lambda : not cmpThread.is_alive())


def compress_inmem(data, concat):
Expand Down
132 changes: 102 additions & 30 deletions indexed_gzip/tests/test_indexed_gzip.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from __future__ import print_function

import gc
import os
import os.path as op
import itertools as it
Expand Down Expand Up @@ -509,8 +510,7 @@ def test_read_beyond_end(concat, drop):
os.remove(testfile)


@pytest.mark.parametrize('use_readinto', [False, True])
def test_read_exception(testfile, nelems, use_readinto):
def test_exception_preserved(testfile, nelems):
"""When wrapping a python file object, if it raises an exception
it should be preserved.
"""
Expand All @@ -519,47 +519,119 @@ def test_read_exception(testfile, nelems, use_readinto):
# because it's read-only, so skip it.
return

f = None
gzf = None

MY_ERROR = "This error should be preserved"

# We'll use a weakref to check that we are handling reference counting
# correctly, and you cannot weakref an Exception, so we need a subclass.
# We'll use a weakref to check that we are handling
# reference counting correctly, and you cannot
# weakref an Exception, so we need a subclass.
class MyException(Exception):
pass
my_err_weak = [None]
def my_error_fn(*args, **kwargs):
err = MyException(MY_ERROR)
my_err_weak[0] = weakref.ref(err)

# Function which raises an exception. This is
# is patched into the python file-like to test
# that the igzip object preserves the exception.
# We store a weakref to each exception to check
# that no references to them are kept.
all_errors = []
error_msg = "This error should be preserved"

def error_fn(name, *args, **kwargs):
err = MyException(error_msg + ": " + name)
all_errors.append(weakref.ref(err))
raise err

try:
with open(testfile, 'rb') as f2:
f = BytesIO(f2.read())
gzf = igzip._IndexedGzipFile(fileobj=f)
f.read = my_error_fn
# Below we check that each of these scenarios
# raises an appropriate exception
def test_create(pyf):
pyf.seek = ft.partial(error_fn, "test_create")
gzf = igzip.IndexedGzipFile(fileobj=pyf)
return gzf

def test_build_full_index(pyf):
pyf.read = ft.partial(error_fn, "test_build_full_index")
pyf.seek = ft.partial(error_fn, "test_build_full_index")
gzf = igzip.IndexedGzipFile(fileobj=pyf)
gzf.build_full_index()
return gzf

def test_seek(pyf):
pyf.seek = ft.partial(error_fn, "test_seek")
gzf = igzip.IndexedGzipFile(fileobj=pyf)
gzf.seek(5)
return gzf

def test_read(pyf):
# Test _IndexedGzipFile.read, as the buffered
# IndexedGzipFile.read method uses readinto
pyf.read = ft.partial(error_fn, "test_read")
gzf = igzip._IndexedGzipFile(fileobj=pyf)
gzf.read(1)
return gzf

def test_readinto(pyf):
pyf.read = ft.partial(error_fn, "test_readinto")
gzf = igzip.IndexedGzipFile(fileobj=pyf)
ba = bytearray(1)
gzf.readinto(ba)
return gzf

def test_export_index(pyf):
gzf = igzip.IndexedGzipFile(fileobj=pyf)
gzf.build_full_index()
idxf = BytesIO()
idxf.write = ft.partial(error_fn, "test_export_index")
gzf.export_index(fileobj=idxf)
return gzf

def test_import_index(pyf):
gzf = igzip.IndexedGzipFile(fileobj=pyf)
idxf = BytesIO()
gzf.build_full_index()
gzf.export_index(fileobj=idxf)
gzf.close()
del gzf
gzf = igzip.IndexedGzipFile(fileobj=pyf)
idxf.read = ft.partial(error_fn, "test_import_index")
gzf.import_index(fileobj=idxf)
return gzf

tests = [test_create, test_build_full_index, test_seek,
test_read, test_readinto, test_export_index,
test_import_index]

# Call the given function, making sure that
# the exception originating from the python
# file-like is preserved
def check_error_preserved(func):
pyf = None
gzf = None
with open(testfile, 'rb') as f:
pyf = BytesIO(f.read())
try:
if use_readinto:
ba = bytearray(1)
gzf.readinto(ba)
else:
gzf.read(1)
gzf = func(pyf)
except Exception as e:
assert MY_ERROR in str(e) or MY_ERROR in str(e.__cause__), "Exception was not preserved; got {}".format(e)
fname = func.__name__
estr = str(e) + " " + str(e.__cause__)
assert error_msg in estr, \
"Exception was not preserved; got {}".format(estr)
assert fname in estr, \
"Exception didn't come from {}; got {}".format(fname, estr)
del e
else:
assert False, "Expected an exception to be raised"


finally:
if gzf is not None: gzf.close()
if f is not None: f .close()
del f
if pyf is not None: pyf.close()
del gzf
assert (my_err_weak[0] is None or my_err_weak[0]() is None,
"Exception was not garbage collected")
del pyf

# Run the tests
for test in tests:
check_error_preserved(test)

# Make sure there are no dangling
# references to any exception objects
gc.collect()
for e in all_errors:
assert (e() is None), "Exception was not garbage collected"

def test_seek(concat):
with tempdir() as tdir:
Expand Down
3 changes: 2 additions & 1 deletion indexed_gzip/zran_file_util.c
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,8 @@ size_t _fwrite_python(const void *ptr,
goto fail;
if ((data = PyObject_CallMethod(f, "write", "(O)", input)) == NULL)
goto fail;

if (PyErr_Occurred())
goto fail;
#if PY_MAJOR_VERSION >= 3
if ((len = PyLong_AsLong(data)) == -1 && PyErr_Occurred())
goto fail;
Expand Down

0 comments on commit 71a3a79

Please sign in to comment.