-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathQKcartographer_segregation.py
141 lines (98 loc) · 4.23 KB
/
QKcartographer_segregation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
#! /usr/bin/python
"""
%prog prefix population_type genotypes
Assesses segregation distortion within a population
prefix denotes the population identifier
population_type must adher to QTL Cartographer experimental design codes
Author: Matthew Moscou <[email protected]>
This performs the following:
1. Imports genotypic data
2. Generates R plot of segregation distortion
Improvements from existing script set (13 June 2017):
Future improvements to include:
"""
# modules
import commands
import math
from math import *
import optparse
from optparse import OptionParser
import os
import string
# global variables
paletteAHB = ["#E69F00", "#939393", "#56B4E9", "#D55E00"]
paletteAH = ["#E69F00", "#939393", "#D55E00"]
paletteBH = ["#E69F00", "#939393", "#D55E00"]
paletteAB = ["#E69F00", "#56B4E9", "#D55E00"]
color_name = ['Orange', 'Grey', 'Sky blue', 'Vermillion']
allele_name = {'A':'A', 'H':'H', 'B':'B', '-':'Missing'}
# import arguments and options
usage = "usage: %prog prefix population_type genotypes"
parser = OptionParser(usage=usage)
(options, args) = parser.parse_args()
# import population experimental design code
population_type = args[1]
# import genetic map
genetic_map_file = open(args[2], 'r')
mapped_markers = []
genetic_map = []
marker_distance = {}
chromosome_markers = {}
chromosomes = []
truth = False
for line in genetic_map_file.readlines():
line = string.replace(line, '\n', '')
sline = string.split(line, '\t')
if truth:
mapped_markers.append(sline[0])
marker_distance[sline[0]] = float(sline[2])
if sline[1] not in chromosome_markers.keys():
chromosome_markers[sline[1]] = []
chromosomes.append(sline[1])
chromosome_markers[sline[1]].append(sline[0])
genetic_map.append([sline[0], sline[3:]])
if population_size != len(sline[3:]):
print 'Unequal progeny in population at marker ' + sline[0]
population_size = len(sline[3:])
else:
population_size = len(sline[3:])
truth = True
genetic_map_file.close()
# set population type for analysis
if args[1] in ['RI0', 'RI1']:
alleles = ['A', 'B', '-']
palette = paletteAB
elif args[1] in ['B1']:
alleles = ['A', 'H', '-']
palette = paletteAH
elif args[1] in ['B2']:
alleles = ['B', 'H', '-']
palette = paletteBH
elif args[1][0] in ['S']:
alleles = ['A', 'H', 'B', '-']
palette = paletteAHB
# generate R plot
R_input_file = open(args[0] + '_segregation_distortion.txt', 'w')
R_input_file.write('Chromosome' + '\t' + 'cM' + '\t' + 'Allele' + '\t' + 'Frequency' + '\n')
for chromosome in chromosomes:
for marker in chromosome_markers[chromosome]:
for allele in alleles:
R_input_file.write(chromosome + '\t' + str(marker_distance[marker]) + '\t' + allele_name[allele] + '\t' + str((genetic_map[mapped_markers.index(marker)][1].count(allele) * 1.0) / population_size) + '\n')
R_input_file.close()
named_alleles = []
for allele in alleles:
named_alleles.append(allele_name[allele])
dir_path = os.path.dirname(os.path.realpath(__file__))
R_analysis_file = open(args[0] + '_segregation_distortion.R', 'w')
R_analysis_file.write('library(ggplot2)' + '\n')
R_analysis_file.write('setwd("' + dir_path + '")\n')
R_analysis_file.write('data = read.table(file="' + args[0] + '_segregation_distortion.txt", header=T)' + '\n')
R_analysis_file.write('data = data.frame(data)' + '\n')
R_analysis_file.write('data$Allele <- factor(data$Allele, levels=c("' + '","'.join(named_alleles) + '"))' + '\n')
#R_analysis_file.write('postscript(file="' args[0] + '_segregation_distortion.ps", width=9, height=3)' + '\n')
R_analysis_file.write('png(file="' + args[0] + '_segregation_distortion.png", width=1500, height=500)' + '\n')
R_analysis_file.write('ggplot(data, aes(cM, group=Chromosome)) + geom_line(aes(x=cM, y=Frequency, group=Allele, color=Allele)) + geom_point(aes(x=cM, y=Frequency, color=Allele)) + facet_grid (.~ Chromosome, scales = "free_x", space="free_x") + scale_colour_manual(values = c("' + '","'.join(palette) + '"))' + '\n')
R_analysis_file.write('dev.off()' + '\n')
R_analysis_file.close()
commands.getstatusoutput('R --vanilla < ' + args[0] + '_segregation_distortion.R > temp.txt')
commands.getstatusoutput('rm temp.txt')