Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for extra-chroms (string chromosomes) #30

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,33 @@ html/
*.egg-info
__pycache__
dist

# editor settings
.vscode/
.spyproject/

# cmake output files from CMake.gitignore
CMakeLists.txt.user
CMakeCache.txt
CMakeFiles
CMakeScripts
Testing
Makefile
cmake_install.cmake
install_manifest.txt
compile_commands.json
CTestTestfile.cmake
_deps

# Additional CMake files
CPackConfig.cmake
CPackSourceConfig.cmake

# file objects
*.a

# compiled binaries
tests/bed_test
tests/bim_test
tests/fam_test
tests/plinkio_test
46 changes: 32 additions & 14 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,25 +20,43 @@ Project rationales:

Installing this library is easy, just **configure** and **make**. This will also install Python bindings for the active interpeter.

*NEWS* The python extension can now be installed by
*NEWS* The python extension can now be installed
[from pypi](https://pypi.org/project/plinkio/) by:

pip install plinkio

To compile and install from source code:

python setup.py build
python setup.py install

You require [tox](https://pypi.org/project/tox/) to test plinkio properly.
By calling:

tox

in your project directory, you will download dependencies, compile the library
and run tests. If you get stuck with compiled libraries, adding `-r` option
will reset the test environment (this means wiping out the testing environment
and re-initialize it from scratch by downloading dependencies and compiling again)

### Installing to a standard location

mkdir build
cd build
../configure
make && make check && sudo make install
[CMake](https://cmake.org/) is required in order to compile the C libraries (mind
to the final `.` after *CMake*, which stands for your current *project* local
directory):

You can also pass the --disable-tests flag to **configure** to avoid building the unit tests and the dependency to libcmockery. Note howerver, in this case **make check** will not do anything.
cmake .
make
make test
make install

### Installing to a custom location

mkdir build
cd build
../configure --prefix=/path/to/plinkio
make && make check && make install
cmake -DCMAKE_INSTALL_PREFIX:PATH=/path/to/plinkio .
make
make test
make install

### Linking to your program

Expand All @@ -56,7 +74,7 @@ The genotypes are coded 0, 1, 2, and 3. The numbers 0-2 represent the number of

## Using in C

For specific information look at http://mfranberg.github.com/libplinkio/index.html
For specific information look at https://mfranberg.github.io/libplinkio/index.html

The following C program prints the genotypes of all individuals. Note, that it is not recommended to run this program on a big plink file since it will fill your screen with data.

Expand Down Expand Up @@ -175,9 +193,9 @@ struct pio_locus_t
size_t pio_id;

/**
* Chromosome number starting from 1.
* Chromosome as strings.
*/
unsigned char chromosome;
char *chromosome;

/**
* Name of the SNP.
Expand Down Expand Up @@ -302,7 +320,7 @@ class Sample:
class Locus:
def __init__(self, chromosome, name, position, bp_position, allele1, allele2):
##
# Chromosome number starting from 1
# Chromosome string
#
self.chromosome = chromosome

Expand Down
13 changes: 7 additions & 6 deletions py-plinkio/cplinkio.c
Original file line number Diff line number Diff line change
Expand Up @@ -393,11 +393,11 @@ int parse_locus(PyObject *py_locus, struct pio_locus_t *locus)
PyObject *allele1_object;
PyObject *allele2_object;

PyObject *chromosome_string;
PyObject *name_string;
PyObject *allele1_string;
PyObject *allele2_string;

int chromosome;
float position;
int bp_position;

Expand All @@ -410,16 +410,16 @@ int parse_locus(PyObject *py_locus, struct pio_locus_t *locus)
allele1_object = PyObject_GetAttrString( py_locus, "allele1" );
allele2_object = PyObject_GetAttrString( py_locus, "allele2" );

chromosome = PyInt_AsLong( chromosome_object );
chromosome_string = PyObject_Str( chromosome_object );
name_string = PyObject_Str( name_object );
position = PyFloat_AsDouble( position_object );
bp_position = PyInt_AsLong( bp_position_object );
allele1_string = PyObject_Str( allele1_object );
allele2_string = PyObject_Str( allele2_object );

if( chromosome == -1 && PyErr_Occurred( ) )
if( chromosome_string == NULL )
{
PyErr_SetString( PyExc_TypeError, "Error chromosome field must be an integer." );
PyErr_SetString( PyExc_TypeError, "Error chromosome field must be a string" );
ret = 0;
}
else if( name_string == NULL )
Expand Down Expand Up @@ -449,14 +449,15 @@ int parse_locus(PyObject *py_locus, struct pio_locus_t *locus)
}

/* The strings wont get freed by plinkio so remove const qualifier */
locus->chromosome = PyInt_AsLong( chromosome_object );
locus->chromosome = (char *) PyString_AsString( chromosome_string );
locus->name = (char *) PyString_AsString( name_string );
locus->position = PyFloat_AsDouble( position_object );
locus->bp_position = PyInt_AsLong( bp_position_object );
locus->allele1 = (char *) PyString_AsString( allele1_string );
locus->allele2 = (char *) PyString_AsString( allele2_string );

locus_error:
Py_DECREF( chromosome_string );
Py_DECREF( name_string );
Py_DECREF( allele1_string );
Py_DECREF( allele2_string );
Expand Down Expand Up @@ -639,7 +640,7 @@ plinkio_get_loci(PyObject *self, PyObject *args)
{
struct pio_locus_t *locus = pio_get_locus( &c_plink_file->file, i );

PyObject *args = Py_BuildValue( "BsfLss",
PyObject *args = Py_BuildValue( "ssfLss",
locus->chromosome,
locus->name,
locus->position,
Expand Down
2 changes: 1 addition & 1 deletion py-plinkio/plinkio/plinkfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ class Locus:
def __init__(self, chromosome, name, position, bp_position, allele1, allele2):
# pylint: disable = too-many-arguments
##
# Chromosome number starting from 1
# Chromosome string
#
self.chromosome = chromosome

Expand Down
16 changes: 13 additions & 3 deletions py-plinkio/tests/write_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,19 @@ def test_read_write():
with tempfile.TemporaryDirectory() as temp_dir:
plink_prefix = os.path.join(temp_dir, "test")

samples = [Sample("fid1", "iid1", "0", "0", 0, 0), Sample("fid2", "iid2", "0", "0", 0, 1)]
loci = [Locus(1, "chr1:1", 1.0, 1, "A", "C"), Locus(2, "chr1:2", 2.0, 2, "G", "T")]
rows = [[0, 1], [1, 2]]
samples = [
Sample("fid1", "iid1", "0", "0", 0, 0),
Sample("fid2", "iid2", "0", "0", 0, 1),
]

loci = [
Locus("1", "chr1:1", 1.0, 1, "A", "C"),
Locus("2", "chr1:2", 2.0, 2, "G", "T"),
Locus("X", "chrX:3", 1.0, 3, "A", "G"),
Locus("Contig123456", "Contig123456:4", 1.0, 4, "T", "C"),
]

rows = [[0, 1], [1, 2], [1, 1], [0, 0]]

writer = plinkfile.create(plink_prefix, samples)

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
# Versions should comply with PEP440. For a discussion on single-sourcing
# the version across setup.py and the project code, see
# https://packaging.python.org/en/latest/single_source_version.html
version="0.9.8",
version="0.9.9.dev0",
description="A library for parsing plink genotype files",
long_description=long_description,
# The project's main homepage.
Expand Down
6 changes: 5 additions & 1 deletion src/bim.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ utarray_locus_dtor(void *element)
{
struct pio_locus_t *locus = (struct pio_locus_t *) element;

if( locus->chromosome != NULL )
{
free( locus->chromosome );
}
if( locus->name != NULL )
{
free( locus->name );
Expand Down Expand Up @@ -103,7 +107,7 @@ bim_write(struct pio_bim_file_t *bim_file, struct pio_locus_t *locus)
if( write_locus( bim_file->fp, locus ) == PIO_OK )
{
locus_copy.pio_id = bim_num_loci( bim_file );
locus_copy.chromosome = locus->chromosome;
locus_copy.chromosome = strdup( locus->chromosome );
locus_copy.name = strdup( locus->name );
locus_copy.position = locus->position;
locus_copy.bp_position = locus->bp_position;
Expand Down
29 changes: 2 additions & 27 deletions src/bim_parse.c
Original file line number Diff line number Diff line change
Expand Up @@ -99,31 +99,6 @@ parse_str(const char *field, size_t length, pio_status_t *status)
}
}

/**
* Parses a chromosome number and returns it.
*
* @param field Csv field.
* @param length Length of the field.
* @param status Status of the conversion.
*
* @return The parsed csv field, or 0 if it could
* not be parsed.
*/
static unsigned char
parse_chr(const char *field, size_t length, pio_status_t *status)
{
char *endptr;
unsigned char chr = (unsigned char) strtol( field, &endptr, 10 );
if( length > 0 && ( endptr == NULL || *endptr == '\0' ) )
{
*status = PIO_OK;
return chr;
}

*status = PIO_ERROR;
return 0;
}

/**
* Parses a genetic distance (float).
*
Expand Down Expand Up @@ -204,7 +179,7 @@ new_field(void *field, size_t field_length, void *data)
switch( state->field )
{
case 0:
state->cur_locus.chromosome = parse_chr( buffer, field_length, &status );
state->cur_locus.chromosome = parse_str( buffer, field_length, &status );
break;
case 1:
state->cur_locus.name = parse_str( buffer, field_length, &status );
Expand Down Expand Up @@ -286,7 +261,7 @@ pio_status_t
write_locus(FILE *bim_fp, struct pio_locus_t *locus)
{
int bytes_written = fprintf( bim_fp,
"%d\t%s\t%f\t%lld\t%s\t%s\n",
"%s\t%s\t%f\t%lld\t%s\t%s\n",
locus->chromosome,
locus->name,
locus->position,
Expand Down
4 changes: 2 additions & 2 deletions src/plinkio/bim.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@ struct pio_locus_t
size_t pio_id;

/**
* Chromosome number starting from 1.
* Chromosome as strings.
*/
unsigned char chromosome;
char *chromosome;

/**
* Name of the SNP.
Expand Down
27 changes: 23 additions & 4 deletions tests/bim_test.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,30 @@ test_parse_position(void **state)
void
test_parse_chr(void **state)
{
const char *TEST_STRING = "16";
const char *TEST_STRING1 = "16";
pio_status_t status;

assert_int_equal( parse_chr( TEST_STRING, strlen( TEST_STRING ), &status ), 16 );
char *chrom1 = parse_str( TEST_STRING1, strlen( TEST_STRING1 ), &status );

assert_string_equal( chrom1, TEST_STRING1 );
assert_int_equal( status, PIO_OK );
free(chrom1);

const char *TEST_STRING2 = "X";

char *chrom2 = parse_str( TEST_STRING2, strlen( TEST_STRING2 ), &status );

assert_string_equal( chrom2, TEST_STRING2 );
assert_int_equal( status, PIO_OK );
free(chrom2);

const char *TEST_STRING3 = "Contig123456";

char *chrom3 = parse_str( TEST_STRING3, strlen( TEST_STRING3 ), &status );

assert_string_equal( chrom3, "Contig123456" );
assert_int_equal( status, PIO_OK );
free(chrom3);
}

/**
Expand All @@ -58,15 +77,15 @@ test_parse_multiple_loci(void **state)
assert_int_equal( bim_num_loci( &bim_file ), 2 );

locus = *bim_get_locus( &bim_file, 0 );
assert_int_equal( locus.chromosome, 1 );
assert_string_equal( locus.chromosome, "1" );
assert_string_equal( locus.name, "rs1" );
assert_true( fabs( locus.position - 0.0 ) <= 1e-6 );
assert_int_equal( locus.bp_position, 1234567 );
assert_string_equal( locus.allele1, "A" );
assert_string_equal( locus.allele2, "C" );

locus = *bim_get_locus( &bim_file, 1 );
assert_int_equal( locus.chromosome, 1 );
assert_string_equal( locus.chromosome, "1" );
assert_string_equal( locus.name, "rs2" );
assert_true( fabs( locus.position - 0.23 ) <= 1e-6 );
assert_int_equal( locus.bp_position, 7654321 );
Expand Down