Skip to content

Commit

Permalink
Merge pull request #87 from pauldmccarthy/bf/crcerror
Browse files Browse the repository at this point in the history
Bf/crcerror
  • Loading branch information
pauldmccarthy authored Oct 18, 2021
2 parents 9538b8d + 5c973cd commit deee4a4
Show file tree
Hide file tree
Showing 5 changed files with 69 additions and 1 deletion.
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
# `indexed_gzip` changelog


## 1.6.4 (October 18th 2021)


* Fixed a bug related to buffering input data, which was causing a spurious
`CrcError` (#80, #87).



## 1.6.3 (September 14th 2021)


Expand Down
2 changes: 1 addition & 1 deletion indexed_gzip/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,4 @@
"""


__version__ = '1.6.3'
__version__ = '1.6.4'
49 changes: 49 additions & 0 deletions indexed_gzip/tests/ctest_zran.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1445,3 +1445,52 @@ def test_inflateInit_leak_on_error():
# it should remain stable
mem = mem[5:]
assert np.all(mem == mem[0])


# pauldmccarthy/indexed_gzip#80
def test_read_eof_memmove_rotate_bug(seed):

# This bug was triggered by the read buffer rotation
# that takes place in zran.c::_zran_read_data_from_file,
# and occurs when the file is at EOF, and the
# stream->next_in pointer is ahead of index->readbuf by
# less than stream->avail_in bytes. In this case, the
# source and dest pointers passed to memmove are
# overlapping, so the area pointed to by next_in is
# modified. The bug was that, when at EOF, the
# stream->next_in pointer was not being reset to point
# to the beginning of readbuf, so the subsequent read
# of the gzip footer in _zran_validate_stream was
# reading from the wrong location.
#
# We can trigger this situation by generating a file
# which has compressed file size (X * readbuf_size) + Y,
# for any integer x, and for 9 <= Y < 16

cdef zran.zran_index_t index
cdef FILE *cfid

with tempdir():
nelems = np.random.randint(524288, 525000, 1)[0]
data = np.random.random(nelems)
with gzip.open('test.gz', 'wb') as f:
f.write(data.tobytes())

fsize = os.stat('test.gz').st_size
readbuf_size = fsize - 10

with open('test.gz', 'rb') as pyfid:
cfid = fdopen(pyfid.fileno(), 'rb')
assert not zran.zran_init(&index,
cfid,
NULL,
4194304,
32768,
readbuf_size,
zran.ZRAN_AUTO_BUILD)

eof = nelems * 8 - 1
got = zran.zran_seek(&index, eof, SEEK_SET, NULL)

assert got == zran.ZRAN_SEEK_OK, got
assert zran.zran_tell(&index) == eof
3 changes: 3 additions & 0 deletions indexed_gzip/tests/test_zran.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,3 +130,6 @@ def test_standard_usage_with_null_padding(concat):

def test_inflateInit_leak_on_error():
ctest_zran.test_inflateInit_leak_on_error()

def test_read_eof_memmove_rotate_bug(seed):
ctest_zran.test_read_eof_memmove_rotate_bug(seed)
8 changes: 8 additions & 0 deletions indexed_gzip/zran.c
Original file line number Diff line number Diff line change
Expand Up @@ -1394,6 +1394,14 @@ static int _zran_read_data_from_file(zran_index_t *index,

zran_log("End of file, stopping inflation\n");

/*
* Reset next_in pointer to beginning of
* read buffer, as we rotated it above,
* and the area that next_in was pointing
* to may have been overwritten by memmove.
*/
stream->next_in = index->readbuf;

/*
* we have uncompressed everything,
* so we now know its size.
Expand Down

0 comments on commit deee4a4

Please sign in to comment.