-
Notifications
You must be signed in to change notification settings - Fork 2
/
getCommonSNPs.py
35 lines (29 loc) · 1.06 KB
/
getCommonSNPs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# - reading dbSNP rsXXX IDs from a file (one line each) and storing in a set
# - reading a VCF line, extract its rs ID
# - if rsXXX is in the previous set, write out line
import click
@click.command(context_settings = dict( help_option_names = ['-h', '--help'] ))
@click.option('--rsids', '-r', type=str, help='List of rsXXX IDs from dbSNP, one a line ', required=True)
@click.option('--vcf', '-v', type=str, help='VCF to extract from', required=True)
def selectSNPs(rsids,vcf):
rsIDs = getIDs(rsids)
printIDsOnly(vcf,rsIDs)
def getIDs(rsids):
idSet = set()
with open(rsids,'r') as rsidsFile:
for line in rsidsFile:
rsID = line.rstrip()
idSet.add(rsID)
return idSet
def printIDsOnly(vcf,rsIDs):
with open(vcf,'r') as vcfFile:
for line in vcfFile:
line = line.rstrip()
if line.startswith("#"):
print line
else:
cols = line.split()
if cols[2] in rsIDs:
print line
if __name__ == '__main__':
selectSNPs()