Skip to content

Commit

Permalink
Merge pull request #19 from pauldmccarthy/bf/export
Browse files Browse the repository at this point in the history
Bf/export
  • Loading branch information
pauldmccarthy authored May 14, 2019
2 parents 16554e6 + d9e5b5e commit aba0358
Show file tree
Hide file tree
Showing 10 changed files with 154 additions and 23 deletions.
2 changes: 1 addition & 1 deletion .ci/run_32bit_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
PYV=$PYTHON_VERSION

apt-get update
apt-get install -y libssl-dev openssl wget build-essential
apt-get install -y libssl-dev openssl wget build-essential libffi-dev
cd /
wget https://www.python.org/ftp/python/$PYV/Python-$PYV.tar.xz
tar xf Python-$PYV.tar.xz
Expand Down
8 changes: 5 additions & 3 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@ branches:
except:
- /^v[0-9]\.[0-9]\.[0-9]/


dist: xenial

sudo: required

language: python
Expand Down Expand Up @@ -80,15 +83,14 @@ matrix:
env: TEST_SUITE=zran_test EXTRA_ARGS="--use_mmap" NITERS=25 NELEMS=805306368 TEST_PATTERN="test_seek_then_read_block"
- python: 3.6
env: TEST_SUITE=zran_test EXTRA_ARGS="--use_mmap --concat" NITERS=25 NELEMS=805306368 TEST_PATTERN="test_seek_then_read_block"
- python: 3.6
- env: TEST_SUITE=32bittest


# Tests covering the indexed_gzip module
python:
- 2.7
- 3.4
- 3.5
- 3.6
- 3.7

env:
- TEST_SUITE=indexed_gzip_test NITERS=5000 NELEMS=50000
Expand Down
9 changes: 8 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,17 @@
# `indexed_gzip` changelog


## 0.8.9 (May 14th 2019)


* The `IndexedGzipFile.import_index` method and `zran_import_index` function
can handle index files which do not contain any index points (#18).


## 0.8.8 (November 22nd 2018)

* Fixed bug affecting files which were an exact multiple of the read buffer
size.
size (#15).


## 0.8.7 (August 3rd 2018)
Expand Down
22 changes: 12 additions & 10 deletions appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,6 @@ environment:
MINICONDA: "C:\\Miniconda-x64"
PYTHON_ARCH: "64"

- PYTHON_VERSION: "3.4"
PYTHON: "C:\\Miniconda34"
MINICONDA: "C:\\Miniconda34"
PYTHON_ARCH: "32"

- PYTHON_VERSION: "3.4"
PYTHON: "C:\\Miniconda34-x64"
MINICONDA: "C:\\Miniconda34-x64"
PYTHON_ARCH: "64"

- PYTHON_VERSION: "3.5"
PYTHON: "C:\\Miniconda35"
MINICONDA: "C:\\Miniconda35"
Expand All @@ -43,6 +33,18 @@ environment:
MINICONDA: "C:\\Miniconda36-x64"
PYTHON_ARCH: "64"

- PYTHON_VERSION: "3.7"
PYTHON: "C:\\Miniconda37"
MINICONDA: "C:\\Miniconda37"
PYTHON_ARCH: "32"

- PYTHON_VERSION: "3.7"
PYTHON: "C:\\Miniconda37-x64"
MINICONDA: "C:\\Miniconda37-x64"
PYTHON_ARCH: "64"




install:
- "set PATH=%MINICONDA%;%MINICONDA%\\Scripts;%PATH%"
Expand Down
2 changes: 1 addition & 1 deletion indexed_gzip/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@
ZranError)


__version__ = '0.8.8'
__version__ = '0.8.9'
5 changes: 4 additions & 1 deletion indexed_gzip/indexed_gzip.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,9 @@ cdef class _IndexedGzipFile:
with an open file handle (``fileobj``), or with a ``filename``. If the
former, the file must have been opened in ``'rb'`` mode.
.. note:: The ``auto_build`` behaviour only takes place on calls to
:meth:`seek`.
:arg filename: File name.
:arg mode: Opening mode. Must be either ``'r'`` or ``'rb``.
Expand All @@ -145,7 +148,7 @@ cdef class _IndexedGzipFile:
:arg fid: Deprecated, use ``fileobj`` instead.
:arg auto_build: If ``True`` (the default), the index is
automatically built on seeks/reads.
automatically built on calls to :meth:`seek`.
:arg spacing: Number of bytes between index seek points.
Expand Down
70 changes: 70 additions & 0 deletions indexed_gzip/tests/ctest_zran.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,13 @@ import itertools as it
import subprocess as sp
import sys
import time
import gzip
import shutil
import random
import hashlib
import tempfile
import threading
import contextlib

import numpy as np

Expand Down Expand Up @@ -56,6 +59,21 @@ cimport indexed_gzip.zran as zran
np.import_array()


@contextlib.contextmanager
def tempdir():
"""Returns a context manager which creates and returns a temporary
directory, and then deletes it on exit.
"""
testdir = tempfile.mkdtemp()
prevdir = os.getcwd()
try:
os.chdir(testdir)
yield testdir
finally:
os.chdir(prevdir)
shutil.rmtree(testdir)


cdef read_element(zran.zran_index_t *index, element, nelems, seek=True):

cdef void *buffer
Expand Down Expand Up @@ -867,3 +885,55 @@ def test_export_then_import(testfile):

zran.zran_free(&index1)
zran.zran_free(&index2)


def test_export_import_no_points():

cdef zran.zran_index_t index
cdef void *buffer

data = np.random.randint(1, 255, 100, dtype=np.uint8)
buf = ReadBuffer(100)
buffer = buf.buffer

with tempdir():

with gzip.open('data.gz', 'wb') as f:
f.write(data.tostring())

with open('data.gz', 'rb') as pyfid:
cfid = fdopen(pyfid.fileno(), 'rb')
assert zran.zran_init(&index,
cfid,
1048576,
32768,
131072,
0) == 0
assert zran.zran_read(&index, buffer, 100) == 100

pybuf = <bytes>(<char *>buffer)[:100]
assert np.all(np.frombuffer(pybuf, dtype=np.uint8) == data)

with open('data.gz.index', 'wb') as pyidxfid:
cidxfid = fdopen(pyidxfid.fileno(), 'wb')
assert zran.zran_export_index(&index, cidxfid) == 0
zran.zran_free(&index)

with open('data.gz', 'rb') as pyfid:
cfid = fdopen(pyfid.fileno(), 'rb')
assert zran.zran_init(&index,
cfid,
1048576,
32768,
131072,
0) == 0

with open('data.gz.index', 'rb') as pyidxfid:
cidxfid = fdopen(pyidxfid.fileno(), 'rb')
assert zran.zran_import_index(&index, cidxfid) == 0
assert index.npoints == 0

assert zran.zran_read(&index, buffer, 100) == 100
pybuf = <bytes>(<char *>buffer)[:100]
assert np.all(np.frombuffer(pybuf, dtype=np.uint8) == data)
zran.zran_free(&index)
1 change: 1 addition & 0 deletions indexed_gzip/tests/test_zran.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,4 @@ def test_read_all_sequential( testfile, nelems): ctest
def test_build_then_read( testfile, nelems, seed, use_mmap): ctest_zran.test_build_then_read( testfile, nelems, seed, use_mmap)
def test_readbuf_spacing_sizes( testfile, nelems, niters, seed): ctest_zran.test_readbuf_spacing_sizes( testfile, nelems, niters, seed)
def test_export_then_import( testfile): ctest_zran.test_export_then_import( testfile)
def test_export_import_no_points(): ctest_zran.test_export_import_no_points()
23 changes: 19 additions & 4 deletions indexed_gzip/zran.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
*
* Author: Paul McCarthy <[email protected]>
*/

#include <math.h>
#include <stdio.h>
#include <stdlib.h>
Expand Down Expand Up @@ -36,6 +37,13 @@ static int is_readonly(FILE *fd)
{
return (fcntl(fileno(fd), F_GETFL) & O_ACCMODE) == O_RDONLY;
}


static uint32_t max(uint32_t a, uint32_t b) {

if (a > b) return a;
else return b;
}
#endif

#include "zran.h"
Expand Down Expand Up @@ -416,7 +424,7 @@ uint32_t ZRAN_INFLATE_STOP_AT_BLOCK = 64;
* parameters are respectively updated to contain the total number of
* compressed bytes that were read from the file, and total number of
* decompressed bytes that were copied to the data buffer.
*
* - ZRAN_INFLATE_OK: Inflation was successful and the requested
* number of bytes were copied to the provided
* data buffer.
Expand Down Expand Up @@ -2698,8 +2706,11 @@ int zran_import_index(zran_index_t *index,
* At this step, the number of points is known. Allocate space for new list
* of points. This pointer should be cleaned up before exit in case of
* failure.
*
* The index file is allowed to contain 0 points, in which case we
* initialise the point list to 8 (same as in zran_init).
*/
new_list = calloc(1, sizeof(zran_point_t) * npoints);
new_list = calloc(1, sizeof(zran_point_t) * max(npoints, 8));

if (new_list == NULL) goto memory_error;

Expand Down Expand Up @@ -2836,8 +2847,12 @@ int zran_import_index(zran_index_t *index,
index->list = new_list;
index->npoints = npoints;

/* Let's not forget to update the size as well. */
index->size = npoints;
/*
* Let's not forget to update the size as well.
* If npoints is 0, the list will have been
* initialised to allow space for 8 points.
*/
index->size = max(npoints, 8);

zran_log("zran_import_index: done\n");

Expand Down
35 changes: 33 additions & 2 deletions indexed_gzip/zran.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,9 @@ struct _zran_index {
size_t uncompressed_size;

/*
* Spacing size in bytes, relative to the compressed
* data stream, between adjacent index points
* Spacing size in bytes, relative to the
* uncompressed data stream, between adjacent
* index points.
*/
uint32_t spacing;

Expand Down Expand Up @@ -319,6 +320,36 @@ enum {
*
* See zran_import_index for importing.
*
* A zran index file is a binary file which has the following header
* structure. All fields are assumed to be stored with little-endian
* ordering:
*
* | Offset | Length | Description |
* | 0 | 7 | File header (GZIDX\00\00) |
* | 7 | 8 | Compressed file size (uint64) |
* | 15 | 8 | Uncompressed file size (uint64) |
* | 23 | 4 | Index point spacing (uint32) |
* | 27 | 4 | Index window size W (uint32) |
* | 31 | 4 | Number of index points (uint32) |
*
* The header is followed by the offsets for each index point:
*
* | Offset | Length | Description |
* | 0 | 8 | Compressed offset for point 0 (uint64) |
* | 8 | 8 | Uncompressed offset for point 0 (uint64) |
* | 16 | 1 | Bit offset for point 0 (uint8) |
* | ... | ... | ... |
* | N*17 | 8 | Compressed offset for point N (uint64) |
* | ... | ... | ... |
*
* Finally the window data for every index point is concatenated
* (W represents the index window size):
*
* | Offset | Length | Description |
* | 0 | W | Window data for index point N |
* | ... | ... | ... |
* | N*W | W | Window data for index point N |
*
* Returns:
* - ZRAN_EXPORT_OK for success.
*
Expand Down

0 comments on commit aba0358

Please sign in to comment.