Skip to content

Commit

Permalink
cpoly cpulldown cascertain memory error fixed
Browse files Browse the repository at this point in the history
  • Loading branch information
mengyao committed Aug 10, 2015
1 parent e42174e commit ccaecd1
Show file tree
Hide file tree
Showing 6 changed files with 76 additions and 106 deletions.
33 changes: 15 additions & 18 deletions src/cascertain.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
* cascertain.c: Pull down the SNPs that match the ascertain criterion.
* Author: Nick Patterson
* Revised by: Mengyao Zhao
* Last revise date: 2015-05-07
* Last revise date: 2015-08-10
* Contact: [email protected]
*/

Expand Down Expand Up @@ -30,13 +30,10 @@ typedef struct {
char *table_path = NULL;
char *regname = NULL ;
char *snpname = NULL ;
//char *iubfile = NULL;
//char *iubmaskfile = NULL;
char *iubfile = "/home/mz128/cteam/dblist/hetfa_postmigration.dblist" ;
char *iubmaskfile = "/home/mz128/cteam/dblist/mask_postmigration.dblist" ;

char *parname = NULL ;
//int pagesize = 20*1000*1000 ; // page size for getiub
int pagesize = 1000*1000 ; // page size for getiub
int minfilterval = 1 ;

Expand Down Expand Up @@ -480,15 +477,17 @@ int loadfa(char **poplist, int npops, FATYPE ***pfainfo, char *reg, int lopos, i
sprintf(region, "%s:%d-%d", reg, lo, hipos);


if (db == 0) refname = strcat(table_path, "Href.fa");
else getdbname(iubfile, "Href", &refname);
if (db == 0) {
strcpy(refname, table_path);
strcat(refname, "Href.fa");
} else getdbname(iubfile, "Href", &refname);

if (ncall==1) {
ZALLOC(falist, npops, char *) ;
ZALLOC(famasklist, npops, char *) ;
if (db == 0) {
numfalist = setfalist(poplist, npops, ".fa", falist) ;
t = setfalist(poplist, npops, ".filter.fa", famasklist) ;
numfalist = setfalist(poplist, npops, ".ccomp.fa.rz", falist) ;
t = setfalist(poplist, npops, ".ccompmask.fa.rz", famasklist) ;
} else {
numfalist = getfalist(poplist, npops, iubfile, falist) ; // set falist with the absolute path of hetfa files in .dblist file; falist contains the iubfile names
t = getfalist(poplist, npops, iubmaskfile, famasklist) ;
Expand All @@ -508,6 +507,7 @@ int loadfa(char **poplist, int npops, FATYPE ***pfainfo, char *reg, int lopos, i
hasmask[k] = NO ;
continue ;
}
// fprintf(stderr, "famasklist[%d]: %s\n", k, famasklist[k]);
t = strcmp(famasklist[k], "NULL") ;
if (t==0) {
hasmask[k] = NO ;
Expand All @@ -534,7 +534,6 @@ int loadfa(char **poplist, int npops, FATYPE ***pfainfo, char *reg, int lopos, i
printf("loading: %s\n", fapt -> faname) ;
printnl() ;
}
// fprintf(stderr, "faname1: %s\n", fapt->faname);

fapt -> fai = fai_load(fapt -> faname) ;
fapt -> popnum = k ;
Expand Down Expand Up @@ -631,7 +630,6 @@ int getiub(char *cc, char *ccmask, FATYPE **fainfo, char *reg, int pos)
char regbuff[128] ;
static long ncnt = 0 ;
static long ncall = 0 ;
//fprintf(stderr, "!in getiub!\n");
++ncall ;
fapt = fainfo[0] ;
lastreg = fapt -> regname ;
Expand Down Expand Up @@ -661,7 +659,6 @@ int getiub(char *cc, char *ccmask, FATYPE **fainfo, char *reg, int pos)
if (pos < lastlo) newpage = YES ;
if (pos > lasthi) newpage = YES ;
if (ncall == 1) newreg = YES ;
//fprintf(stderr, "half getiub\n");
if (newreg == YES) {
fflush(stdout) ;
freestring(&regname) ;
Expand All @@ -685,7 +682,6 @@ int getiub(char *cc, char *ccmask, FATYPE **fainfo, char *reg, int pos)
if (pos < fapt -> lopos) return -2 ;
if (pos > fapt -> hipos) return -2 ;
cc[k] = getfacc(fapt, pos, 1) ; // genotype at pos; cc[k] is an iub code
//fprintf(stderr, "pos: %d\tcc[%d]: %c\n", pos, k, cc[k]);
if (hasmask[k]) ccmask[k] = getfacc(fapt, pos, 2) ; // mask at pos
else ccmask[k] = '9' ;
}
Expand All @@ -697,7 +693,6 @@ int getiub(char *cc, char *ccmask, FATYPE **fainfo, char *reg, int pos)
}

++ncnt ;
// fprintf(stderr, "ncnt: %d\n", ncnt);
if (ncnt == 1) {
printf("zz pos: %s %s\n", cc, ccmask) ;
for (k=0; k<npops; ++k) {
Expand Down Expand Up @@ -800,13 +795,15 @@ int setfalist(char **poplist, int npops, char *dbfile, char **iublist) {
int t;
for (t = 0; t < npops; ++t) {
iublist[t] = strdup(table_path);
iublist[t] = (char*) realloc(iublist[t], 64);
iublist[t] = (char*) realloc(iublist[t], strlen(iublist[t]) + strlen(poplist[t]) + strlen(dbfile) + 1);
iublist[t] = strcat(iublist[t], poplist[t]);
if ((!strcmp (poplist[t], "Chimp") || !strcmp (poplist[t], "Href")) && strcmp (dbfile, ".fa")) {
free (iublist[t]);
// fprintf(stderr, "poplist[%d]: %s\ndbfile: %s\n", t, poplist[t], dbfile);

if ((!strcmp (poplist[t], "Chimp") || !strcmp (poplist[t], "Href")) && !strcmp (dbfile, ".ccomp.fa.rz"))
iublist[t] = strcat(iublist[t], ".fa");
else if ((!strcmp (poplist[t], "Chimp") || !strcmp (poplist[t], "Href")) && !strcmp (dbfile, ".ccompmask.fa.rz"))
iublist[t] = "NULL";
} else
iublist[t] = strcat(iublist[t], dbfile);
else iublist[t] = strcat(iublist[t], dbfile);
}
return npops;
}
Expand Down
34 changes: 20 additions & 14 deletions src/cmakefilter.c
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Revised by Mengyao Zhao on 2015-03-12
// Revised by Mengyao Zhao on 2015-08-06

#include <libgen.h>
#include <nicksam.h>
Expand Down Expand Up @@ -699,6 +699,7 @@ int readfa(char **falist, char **fasta, int *flen, int n)
flen[k] = len ;
}
}

void *setvcff(char *vcffile, char **vcfn)
// vcfffile should not exist
{
Expand All @@ -711,23 +712,27 @@ void *setvcff(char *vcffile, char **vcfn)
sprintf(iname, "%s.gz", vcffile) ;
if (ftest(iname)) {
printf("got %s\n", iname) ;
bname = basename(vcffile) ;
sprintf(outname, "%s/%s:%s.vcf", wkdir,sampname,regname ) ;
sprintf(ss, "cp %s %s.gz", iname, outname) ;
printf("zzcopy: %s\n",ss) ;
system(ss) ;
sprintf(ss, "gunzip %s.gz", outname) ;
system(ss) ;
sprintf(ss, "chmod 664 %s", outname) ;
system(ss) ;
if (ftest(outname) == NO) fatalx("unzip fails\n") ;
}
else {
fatalx("gzfetch fails\n") ;
else {
sprintf(outname, "%s.bgz", vcffile) ;
// sprintf(iname, "%s.bgz", vcffile) ;
// if (ftest(iname)) printf("got %s\n", iname) ;
// else fatalx("gzfetch and bgzfetch fail\n") ;
// else {
// fatalx("gzfetch fails\n") ;
}
bname = basename(vcffile) ;
sprintf(outname, "%s/%s:%s.vcf", wkdir,sampname,regname ) ;
sprintf(ss, "cp %s %s.gz", iname, outname) ;
printf("zzcopy: %s\n",ss) ;
system(ss) ;
sprintf(ss, "gunzip %s.gz", outname) ;
system(ss) ;
sprintf(ss, "chmod 664 %s", outname) ;
system(ss) ;
if (ftest(outname) == NO) fatalx("unzip fails\n") ;

*vcfn = strdup(outname) ;

}

int readvcf(char *vcffile, int **fasta, int reflen, int *flen)
Expand All @@ -748,6 +753,7 @@ int readvcf(char *vcffile, int **fasta, int reflen, int *flen)

if (ftest(vcffile) == NO) {
setvcff(vcffile, &vcftmp) ;
fprintf(stderr, "vcf: %s\n", vcftmp);
openit(vcftmp, &vcffp, "r") ; // aborts on error
}
else {
Expand Down
69 changes: 21 additions & 48 deletions src/cpoly.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
* cpoly.c: This program is used to extract heterozygote SNPs from multiple samples
* Author: Nick Patterson
* Revised by: Mengyao Zhao
* Last revise date: 2015-05-07
* Last revise date: 2015-08-10
* Contact: [email protected]
*/

Expand All @@ -26,13 +26,7 @@ typedef struct {
} ASC ;

char *table_path = NULL;
//char *iubfile = NULL ;
//char *iubmaskfile = NULL ;

char *regname = NULL ;
//char *parflist = "/home/np29/biology/neander/nickdir/xwdir/may12src/parfxlm" ;
//char *iubfile = "/home/np29/cteam/release/hetfaplus.dblist" ; // default database
//char *iubmaskfile = "/home/np29/cteam/release/maskplus.dblist" ;
char *iubfile = "/home/mz128/cteam/dblist/hetfa_postmigration.dblist" ;
char *iubmaskfile = "/home/mz128/cteam/dblist/mask_postmigration.dblist" ;
char *parname = NULL ;
Expand Down Expand Up @@ -166,7 +160,7 @@ int main(int argc, char **argv)
else xchrom = atoi(regname) ;
}

fprintf(stderr, "call numlines in [main]\n");
// fprintf(stderr, "call numlines in [main]\n");
npops = numlines(indivname) ;
ZALLOC(poplist, npops, char *) ;
npops = getss(poplist, indivname) ;
Expand Down Expand Up @@ -209,7 +203,6 @@ int main(int argc, char **argv)
lo = lopos ;
hi = MIN(hipos, lo+pagesize) ;
loadfa(poplist, npops, &fainfo, reg, lo, hi) ;
//fprintf(stderr, "main: fapt->fai: %p\n", fainfo[0]->fai);
printf("npops: %d\n", npops) ;
for (k=0; k< npops; ++k) {
fapt = fainfo[k] ;
Expand All @@ -234,7 +227,6 @@ int main(int argc, char **argv)
reg = regname ;

for (pos = lopos ; pos <= hipos; ++pos) {
//t = getiub(cc, ccmask, fainfo, reg, pos) ;
t = getiub(cc, ccmask, fainfo, ss, pos) ;
if (t==-5) break ;
if (t<0) continue ;
Expand Down Expand Up @@ -307,9 +299,7 @@ void prints(FILE *fff, int pos, char c1, char c2)
fprintf(fff, "%15s ", sss) ;
fprintf(fff, "%3s 0 %12d ", regname, pos) ;
fprintf(fff, "%c %c\n", c1, c2) ;

return ;

}

int checktriallelic(char *pc1, char *pc2, char x1, char x2)
Expand Down Expand Up @@ -391,7 +381,7 @@ int getdbname(char *dbase, char *name, char **pfqname)
char ***names ;
int n, k, t, i ;

fprintf(stderr, "call numlines in [getdbname]\n");
// fprintf(stderr, "call numlines in [getdbname]\n");
n = numlines(dbase) ;

ZALLOC(names, 3, char **) ;
Expand Down Expand Up @@ -435,15 +425,19 @@ int loadfa(char **poplist, int npops, FATYPE ***pfainfo, char *reg, int lopos, i
region = (char*)malloc((23+strlen(reg))*sizeof(char));
sprintf(region, "%s:%d-%d", reg, lo, hipos);

if (db == 0) refname = strcat(table_path, "Href.fa");
else getdbname(iubfile, "Href", &refname);
if (db == 0) {
strcpy(refname, table_path);
strcat(refname, "Href.fa");
} else getdbname(iubfile, "Href", &refname);

if (ncall==1) {
ZALLOC(falist, npops, char *) ;
ZALLOC(famasklist, npops, char *) ;
if (db == 0) {
numfalist = setfalist(poplist, npops, ".fa", falist) ;
t = setfalist(poplist, npops, ".filter.fa", famasklist) ;
numfalist = setfalist(poplist, npops, ".ccomp.fa.rz", falist) ;
t = setfalist(poplist, npops, ".ccompmask.fa.rz", famasklist) ;
//numfalist = setfalist(poplist, npops, ".fa", falist) ;
//t = setfalist(poplist, npops, ".filter.fa", famasklist) ;
} else {
numfalist = getfalist(poplist, npops, iubfile, falist) ; // set falist with the absolute path of hetfa files in .dblist file
t = getfalist(poplist, npops, iubmaskfile, famasklist) ;
Expand Down Expand Up @@ -524,7 +518,6 @@ int loadfa(char **poplist, int npops, FATYPE ***pfainfo, char *reg, int lopos, i
if (len_s==0) fatalx("bad fetch %s %s\n", fapt->faname, region); // fetch fai

len = len_r < len_s ? len_r : len_s;
// len = len_s;
if (rz == 1) // raziped
for (i = 0; i < len; ++i)
if (fapt->rstring[i] == 'Q') fapt->rstring[i] = ref[i];
Expand Down Expand Up @@ -620,7 +613,7 @@ int getiub(char *cc, char *ccmask, FATYPE **fainfo, char *reg, int pos)

if (newreg == YES) {

printf("zznewrrr %s :: %s %d %d %d\n",lastreg, regbuff, pos, lastlo, lasthi) ; fflush(stdout) ;
// printf("zznewrrr %s :: %s %d %d %d\n",lastreg, regbuff, pos, lastlo, lasthi) ; fflush(stdout) ;
fflush(stdout) ;
freestring(&regname) ;

Expand All @@ -632,18 +625,17 @@ int getiub(char *cc, char *ccmask, FATYPE **fainfo, char *reg, int pos)
}

if (newpage == YES) {
fprintf(stderr, "pos-getiub: %d\n", pos);
//fprintf(stderr, "pos-getiub: %d\n", pos);
fflush(stdout) ;
lastlo = pos ;
lasthi = pos + pagesize ;
lastlo = MAX(lastlo, lopos) ;
lasthi = MAX(lasthi, hipos) ;
lasthi = MIN(lasthi, lastlo+pagesize) ;
printf("calling loodfa %s %d %d \n", regname, lastlo, lasthi) ;
// printf("calling loodfa %s %d %d \n", regname, lastlo, lasthi) ;
fflush(stdout) ;
//fprintf(stderr, "getiub: fapt->fai: %p\n", fainfo[0]->fai);
loadfa(poplist, npops, &fainfo, regname, lastlo, lasthi) ;
printf("newpage: %d %p %d %d\n", pos, topheap(), lastlo, lasthi) ;
// printf("newpage: %d %p %d %d\n", pos, topheap(), lastlo, lasthi) ;

fflush(stdout) ;
}
Expand Down Expand Up @@ -769,13 +761,14 @@ int setfalist(char **poplist, int npops, char *dbfile, char **iublist) {
int t;
for (t = 0; t < npops; ++t) {
iublist[t] = strdup(table_path);
iublist[t] = (char*) realloc(iublist[t], 64);
iublist[t] = (char*) realloc(iublist[t], strlen(iublist[t]) + strlen(poplist[t]) + strlen(dbfile) + 1);
iublist[t] = strcat(iublist[t], poplist[t]);
if ((!strcmp (poplist[t], "Chimp") || !strcmp (poplist[t], "Href")) && strcmp (dbfile, ".fa")) {
free (iublist[t]);

if ((!strcmp (poplist[t], "Chimp") || !strcmp (poplist[t], "Href")) && !strcmp (dbfile, ".ccomp.fa.rz"))
iublist[t] = strcat(iublist[t], ".fa");
else if ((!strcmp (poplist[t], "Chimp") || !strcmp (poplist[t], "Href")) && !strcmp (dbfile, ".ccompmask.fa.rz"))
iublist[t] = "NULL";
} else
iublist[t] = strcat(iublist[t], dbfile);
else iublist[t] = strcat(iublist[t], dbfile);
}
return npops;
}
Expand Down Expand Up @@ -817,25 +810,8 @@ int getfalist(char **poplist, int npops, char *dbfile, char **iublist)

fclose(fff) ;
return nx ;

}
/*
char *myfai_fetch(faidx_t *fai, char *reg, int *plen)
{
char *treg, *s ;
treg = strdup(reg) ;

if (fai==NULL) fatalx("(my_fai_fetch): fai NULL\n") ;
s = fai_fetch(fai, treg, plen) ;
if (*plen > 0) {
free(treg) ;
return s ;
}
free(treg) ;
return NULL ;
}
*/
void clearfainfo(FATYPE *fapt, int mode)
{

Expand Down Expand Up @@ -924,7 +900,6 @@ int abx(int a, int b)
}

int abxok(int abx, int abxmode) {

int t ;

if (abxmode >= 10) {
Expand All @@ -933,8 +908,6 @@ int abxok(int abx, int abxmode) {
return NO ;
}



switch (abxmode) {
case 0:
return YES ;
Expand Down
Loading

0 comments on commit ccaecd1

Please sign in to comment.