-
Notifications
You must be signed in to change notification settings - Fork 2
/
plinkfreq2treemix_old.py
44 lines (37 loc) · 1.34 KB
/
plinkfreq2treemix_old.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# -*- coding: utf-8 -*-
"""
Created on Thu Jul 4 09:32:17 2019
@author: YudongCai
@Email: [email protected]
"""
import click
import numpy as np
import pandas as pd
@click.command()
@click.option('--infile', help='plink --freq output file (.frq.strat(.gz))')
@click.option('--outfile', help='treemix.frq.gz')
def main(infile, outfile):
"""
convert plink freq file to treemix input file
plink --bfile chrAuto --chr-set 29 --chr 1-29 --freq --missing --double-id --out popfreq --within Pop.cluster
"""
df = pd.read_csv(infile, sep='\s+', usecols=['SNP', 'CLST', 'MAC', 'NCHROBS'],
dtype={'SNP': str,
'CLST': str,
'MAC': np.int16,
'NCHROBS': np.int16})
print('plink file loaded')
print(df.info())
df['count2'] = df['NCHROBS'].values - df['MAC'].values
print('allele2 counted')
print(df.info())
del(df['NCHROBS'])
df['treemix'] = df['MAC'].astype(str) + ',' + df['count2'].astype(str)
del(df['MAC'])
del(df['count2'])
print('treemix counted')
print(df.info())
df.pivot_table(index='SNP', columns='CLST', values='treemix', aggfunc=lambda x: x)\
.to_csv(outfile, index=False, sep=' ', compression='gzip')
if __name__ == '__main__':
main()