Skip to content

Commit

Permalink
Correct pattern match error for genotypes
Browse files Browse the repository at this point in the history
  • Loading branch information
speleonut committed Dec 6, 2023
1 parent ff0137b commit 07f0f11
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 10 deletions.
4 changes: 2 additions & 2 deletions familyKeyMatchingAfterANNOVAR_hg38.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def usage():
dfCore=coreTable
for s in samples: # Maybe this loop could be an apply function?
currentSampleList=ANNOVARtable[[s]]
homList=currentSampleList[currentSampleList[s].str.match(pat = '(1/1)|(1|1)')]
homList=currentSampleList[currentSampleList[s].str.match(pat = '(1/1)|(1\|1)')]
dfCore = pd.concat([dfCore,homList], axis=1, join='inner') # Add , sort='False' once Ubuntu is upgraded

dfCore.to_csv("ibdAndXl."+inputFile, sep='\t')
Expand All @@ -100,7 +100,7 @@ def usage():
dfCore=coreTable
for s in samples:
currentSampleList=ANNOVARtable[[s]]
homList=currentSampleList[currentSampleList[s].str.match(pat = '(0/1)|(0|1)')]
homList=currentSampleList[currentSampleList[s].str.match(pat = '(0/1)|(0\|1)|(1\|0)')]
dfCore = pd.concat([dfCore,homList], axis=1, join='inner') # Add , sort='False' once Ubuntu is upgraded

dfCore.to_csv("het."+inputFile, sep='\t')
Expand Down
6 changes: 3 additions & 3 deletions preConceptionTesting.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,17 +78,17 @@ def bestGeneCandidatesFilter(df):
ANNOVARtable=pd.read_csv(inputFile, sep='\t', index_col = num_cols)
samples = [mumID, dadID]

hetList=ANNOVARtable[ANNOVARtable[samples[0]].str.match(pat = '(0/1)|(0|1)') & ANNOVARtable[samples[1]].str.match(pat = '(0/1)|(0|1)')]
hetList=ANNOVARtable[ANNOVARtable[samples[0]].str.match(pat = '(0/1)|(0\|1)|(1\|0)') & ANNOVARtable[samples[1]].str.match(pat = '(0/1)|(0\|1)|(1\|0)')]
hetList.to_csv("allSharedHetCalls."+inputFile, sep='\t')

#Generic filters for most likely pathogenic
hetList=bestGeneCandidatesFilter(df=hetList)
hetList.to_csv("allSharedHetCalls.BestGeneCandidates."+inputFile, sep='\t')

# Compound het calls
mNotfHets=ANNOVARtable[ANNOVARtable[samples[0]].str.match(pat = '(0/1)|(0|1)') & ANNOVARtable[samples[1]].str.contains('|'.join(nullAlelles))]
mNotfHets=ANNOVARtable[ANNOVARtable[samples[0]].str.match(pat = '(0/1)|(0\|1)|(1\|0)') & ANNOVARtable[samples[1]].str.contains('|'.join(nullAlelles))]
mGenes=pd.unique(mNotfHets['Gene.refGene'])
fNotmHets=ANNOVARtable[ANNOVARtable[samples[0]].str.contains('|'.join(nullAlelles)) & ANNOVARtable[samples[1]].str.match(pat = '(0/1)|(0|1)')]
fNotmHets=ANNOVARtable[ANNOVARtable[samples[0]].str.contains('|'.join(nullAlelles)) & ANNOVARtable[samples[1]].str.match(pat = '(0/1)|(0\|1)|(1\|0)')]
fGenes=pd.unique(fNotmHets['Gene.refGene'])
seriesCHgenes=pd.Series(mGenes.tolist() + fGenes.tolist())
chGenes=seriesCHgenes[seriesCHgenes.duplicated()]
Expand Down
10 changes: 5 additions & 5 deletions trioKeyMatchingAfterANNOVAR_hg38.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,25 +84,25 @@ def bestGeneCandidatesFilter(df):
samples = [mumID, dadID, childID]

# de novo
dnList=ANNOVARtable[ANNOVARtable[samples[0]].str.contains('|'.join(nullAlelles)) & ANNOVARtable[samples[1]].str.contains('|'.join(nullAlelles)) & ANNOVARtable[samples[2]].str.match(pat = '(0/1)|(0|1)')]
dnList=ANNOVARtable[ANNOVARtable[samples[0]].str.contains('|'.join(nullAlelles)) & ANNOVARtable[samples[1]].str.contains('|'.join(nullAlelles)) & ANNOVARtable[samples[2]].str.match(pat = '(0/1)|(0\|1)|(1\|0)')]
dnList.to_csv(childID+".dn."+inputFile, sep='\t')
spliceCandidates=dnList[dnList['Func.refGene'].isin(ncSpliceTerms)]
spliceCandidates.to_csv(childID+".dn.SpliceCandidates."+inputFile, sep='\t')
dnList=bestGeneCandidatesFilter(df=dnList)
dnList.to_csv(childID+".dn.BestGeneCandidates."+inputFile, sep='\t')

# AR, identical by descent and X-linked
homList=ANNOVARtable[~ANNOVARtable[samples[0]].str.match(pat = '(1/1)|(1|1)') & ~ANNOVARtable[samples[1]].str.match(pat = '(1/1)|(1|1)') & ANNOVARtable[samples[2]].str.match(pat = '(1/1)|(1|1)')]
homList=ANNOVARtable[~ANNOVARtable[samples[0]].str.match(pat = '(1/1)|(1\|1)') & ~ANNOVARtable[samples[1]].str.match(pat = '(1/1)|(1\|1)') & ANNOVARtable[samples[2]].str.match(pat = '(1/1)|(1\|1)')]
homList.to_csv(childID+".ibdAndXl."+inputFile, sep='\t')
spliceCandidates=homList[homList['Func.refGene'].isin(ncSpliceTerms)]
spliceCandidates.to_csv(childID+".ibdAndXl.SpliceCandidates."+inputFile, sep='\t')
homList=bestGeneCandidatesFilter(df=homList)
homList.to_csv(childID+".ibdAndXl.BestGeneCandidates."+inputFile, sep='\t')

# Compound het calls
mNotfHets=ANNOVARtable[ANNOVARtable[samples[0]].str.match(pat = '(0/1)|(0|1)') & ANNOVARtable[samples[1]].str.contains('|'.join(nullAlelles)) & ANNOVARtable[samples[2]].str.match(pat = '(0/1)|(0|1)')]
mNotfHets=ANNOVARtable[ANNOVARtable[samples[0]].str.match(pat = '(0/1)|(0\|1)|(1\|0)') & ANNOVARtable[samples[1]].str.contains('|'.join(nullAlelles)) & ANNOVARtable[samples[2]].str.match(pat = '(0/1)|(0\|1)|(1\|0)')]
mGenes=pd.unique(mNotfHets['Gene.refGene'])
fNotmHets=ANNOVARtable[ANNOVARtable[samples[0]].str.contains('|'.join(nullAlelles)) & ANNOVARtable[samples[1]].str.match(pat = '(0/1)|(0|1)') & ANNOVARtable[samples[2]].str.match(pat = '(0/1)|(0|1)')]
fNotmHets=ANNOVARtable[ANNOVARtable[samples[0]].str.contains('|'.join(nullAlelles)) & ANNOVARtable[samples[1]].str.match(pat = '(0/1)|(0\|1)|(1\|0)') & ANNOVARtable[samples[2]].str.match(pat = '(0/1)|(0\|1)|(1\|0)')]
fGenes=pd.unique(fNotmHets['Gene.refGene'])
seriesCHgenes=pd.Series(mGenes.tolist() + fGenes.tolist())
chGenes=seriesCHgenes[seriesCHgenes.duplicated()]
Expand All @@ -124,7 +124,7 @@ def bestGeneCandidatesFilter(df):
compHets.to_csv(childID+".ch.BestGeneCandidates."+inputFile, sep='\t')

# AD
hetList=ANNOVARtable[ANNOVARtable[samples[2]].str.match(pat = '(0/1)|(0|1)')]
hetList=ANNOVARtable[ANNOVARtable[samples[2]].str.match(pat = '(0/1)|(0\|1)|(1\|0)')]
#hetList.to_csv("allHets."+inputFile, sep='\t') #Not likely to be worth writing out
spliceCandidates=hetList[hetList['Func.refGene'].isin(ncSpliceTerms)]
spliceCandidates.to_csv(childID+".allHets.SpliceCandidates."+inputFile, sep='\t')
Expand Down

0 comments on commit 07f0f11

Please sign in to comment.