Skip to content

Commit

Permalink
Merge pull request #5 from rbutleriii/pandas_update
Browse files Browse the repository at this point in the history
bugfixes for dependencies py3.7
  • Loading branch information
Robert Butler authored May 9, 2019
2 parents a3ad494 + 6e1828e commit 3ce3ee1
Show file tree
Hide file tree
Showing 6 changed files with 49 additions and 10 deletions.
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,10 @@ The warnings, as well as some additional information can be stored in the log fi
### Dependencies
The following from my pipenv:
The following dependencies:
```
biopython==1.70
biopython==1.73
- numpy [required: Any, installed: 1.14.0]
pandas==0.22.0
- numpy [required: >=1.9.0, installed: 1.14.0]
Expand All @@ -99,7 +99,7 @@ pandas==0.22.0
- pytz [required: >=2011k, installed: 2018.3]
```
Numpy *should* work >= 1.9.0 and pandas >= 0.20.0, but install more recent versions if possible. For python 3.4 versions, a pip installation of pandas may give error [#20723](https://github.com/pandas-dev/pandas/issues/20723). If so, specify 'pandas<0.21'.
Numpy *should* work >= 1.9.0 and pandas >= 0.20.0, but install more recent versions if possible. For python 3.4 versions, a pip installation of pandas may give error [#20723](https://github.com/pandas-dev/pandas/issues/20723). If so, specify 'pandas<0.21'. As or December 2018, the batch query limits appear to have tightened upgrade to biopython 1.73 see issue [#1867](https://github.com/biopython/biopython/issues/1867).
### Memory/System requirements
Expand Down
2 changes: 1 addition & 1 deletion clinotator/global_vars.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
See main, eventually tests will be added for this module
'''

__version__ = "1.2.2"
__version__ = "1.2.3"


### getncbi.py global variables
Expand Down
10 changes: 5 additions & 5 deletions clinotator/vcf.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,7 @@ def parse_header(file_object, outprefix):
with open('{}.anno.vcf'.format(outprefix), 'w') as outfile:
header = []
info_list = []
for index, line in enumerate(
next(file_object) for x in range(g.max_vcf_header_size)):
for index, line in zip(range(g.max_vcf_header_size), file_object):
m = re.match('##([\w\-\.]+)=', line)

if m and m.group(1) == 'INFO':
Expand Down Expand Up @@ -63,7 +62,8 @@ def parse_header(file_object, outprefix):
# process input vcf file, return rsids for query and vcf_tbl for output
def vcf_prep(file_object, outprefix):
header_count = parse_header(file_object, outprefix)
vcf_tbl = pd.read_table(file_object, skiprows=header_count, dtype=str)
vcf_tbl = pd.read_csv(file_object, sep='\t', skiprows=header_count,
dtype=str)
logging.debug('vcf_tbl shape -> {}'.format(vcf_tbl.shape))
vcf_list = vcf_tbl.ID.values[vcf_tbl.ID.values != '.'].tolist()
return vcf_list, vcf_tbl
Expand All @@ -84,7 +84,7 @@ def cat_info_column(info, rsid, alt, out_tbl):
'CVDS': {',': '%2C', ';': '%3B'}},
regex=True, inplace=True)
new_info = ['{}={}'.format(x, info_tbl[x]
.to_csv(header=None, index=False, na_rep='.')
.to_csv(header=False, index=False, na_rep='.')
.strip('\n')) for x in info_columns]
new_info = [string.replace('\n', ',') for string in new_info]
logging.debug('{} had a match: {}'.format(rsid, new_info))
Expand All @@ -102,7 +102,7 @@ def cat_info_column(info, rsid, alt, out_tbl):
with open('../test/test.vcf', 'r') as file_object:
vcf_list, vcf_tbl = vcf_prep(file_object, 'test_header')
logging.debug('vcf_list -> {}'.format(vcf_list))
sample_tbl = pd.read_table('../test/test.tbl', dtype=str)
sample_tbl = pd.read_csv('../test/test.tbl', sep='\t', dtype=str)
info_list = cat_info_column('NS=3;DP=11;AF=0.017', 'rs34376836', 'A',
sample_tbl)
info_list = cat_info_column('NS=3;DP=11;AF=0.017', '.', 'A',
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
pandas
biopython
biopython>=1.73
8 changes: 8 additions & 0 deletions test/test.tbl
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
VID CVVT rsID CVAL vcfmatch CVCS CVSZ CVNA CVDS CVLE CTRS CTAA CTPS CTRR
7 Haplotype 200401432 A ['200401432|A', '118161496|C'] Pathogenic 1 1 Mitochondrial complex I deficiency(P) 2017-09-01 . 1.0 . .
7 Haplotype 118161496 C ['200401432|A', '118161496|C'] Pathogenic 1 1 Mitochondrial complex I deficiency(P) 2017-09-01 . 1.0 . .
13979 Simple 180177040 C . Pathogenic 3 7 Cardio-facio-cutaneous syndrome(P);Cardio-facio-cutaneous syndrome(P);Inborn genetic diseases(P);Cardiofaciocutaneous syndrome 1(P);not provided(P);Cardio-facio-cutaneous syndrome(P);Rasopathy(P) 2017-04-03 37.74 2.0 Pathogenic 0
50317 Simple 118161496 C . Conflicting interpretations of pathogenicity 1 5 not provided(P);Inborn genetic diseases(P);not provided(US);not provided(US);Mitochondrial complex I deficiency(LP) 2017-08-14 12.36 2.2 Pathogenic/Likely pathogenic 3
55794 Simple 180177040 G . Likely pathogenic 0 0 . 2016-05-31 . . . .
211895 Simple 766877230 T . Uncertain significance 2 2 not specified(US);not specified(US) 2016-11-02 -0.48 3.0 Uncertain significance 0
262232 Simple 34376836 A . Benign 2 2 Brown-Vialetto-Van Laere syndrome 1(B);not specified(B) 2017-11-08 -12.0 1.0 Benign/Likely benign 1
31 changes: 31 additions & 0 deletions test/test_header.anno.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
##fileformat=VCFv4.3
##fileDate=20090805
##source=myImputationProgramV3.1
##reference=file:///seq/references/1000GenomesPilot-NCBI36.fasta
##contig=<ID=20,length=62435964,assembly=B36,md5=f126cdf8a6e0c7f379d618ff66beb2da,species="Homo sapiens",taxonomy=x>
##phasing=partial
##annotation=CLINOTATORv1.2.3_run_2019-05-08
##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data">
##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency">
##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral Allele">
##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP membership, build 129">
##INFO=<ID=H2,Number=0,Type=Flag,Description="HapMap2 membership">
##INFO=<ID=VID,Number=1,Type=Integer,Description="ClinVar variation ID">
##INFO=<ID=CVVT,Number=A,Type=String,Description="ClinVar variant type">
##INFO=<ID=CVAL,Number=A,Type=String,Description="ClinVar alternate allele">
##INFO=<ID=CVCS,Number=A,Type=String,Description="ClinVar clinical significance">
##INFO=<ID=CVSZ,Number=A,Type=Integer,Description="ClinVar stars">
##INFO=<ID=CVNA,Number=A,Type=Integer,Description="ClinVar number of clinical assertions">
##INFO=<ID=CVDS,Number=A,Type=String,Description="ClinVar conditions">
##INFO=<ID=CVLE,Number=A,Type=String,Description="ClinVar last evaluated">
##INFO=<ID=CTRS,Number=A,Type=Float,Description="Clinotator raw score">
##INFO=<ID=CTAA,Number=A,Type=Float,Description="Clinotator average clinical assertion age">
##INFO=<ID=CTPS,Number=A,Type=String,Description="Clinotator predicted significance">
##INFO=<ID=CTRR,Number=A,Type=String,Description="Clinotator reclassification recommendation">
##FILTER=<ID=q10,Description="Quality below 10">
##FILTER=<ID=s50,Description="Less than 50% of samples have data">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
##FORMAT=<ID=HQ,Number=2,Type=Integer,Description="Haplotype Quality">

0 comments on commit 3ce3ee1

Please sign in to comment.