-
Notifications
You must be signed in to change notification settings - Fork 0
/
mutationloadfwdsingles.py
261 lines (200 loc) · 12.1 KB
/
mutationloadfwdsingles.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
"""
Wright-Fisher forward-time population genetic simulation
Uses realistic U and N for humans (~2 and 20,000, respectively)
Efficient computation accomplished by representing genomes as linkage blocks
"""
import os
import fwdpy11
import numpy as np
import sys
import concurrent.futures
import FixedCrossoverInterval
import msprime
import tskit.trees
#This function runs a simulation with the given parameters.
#Current constants: relative fitness, diploidy, multiplicative effects, single value of sd
#It either returns the slope of average population fitness after a burn-in phase,
#or just runs a single simulation.
def runsim(generations, popsize, seed, numberofchromosomes, chromosomesize, Ub, Sb, Ud, Sd, Ubname, Sbname, popsizename, generationsname, chromsizename, Udname, Sdname, seedname):
rawdatafilename = "rawdataforUb" + Ubname + "Sb" + Sbname + "popsize" + popsizename + "generations" + generationsname + "chromosomesize" + chromsizename + "Ud" + Udname + "Sd" + Sdname + "seed" + seedname + ".txt"
rawdatafile = open(rawdatafilename, "w")
summarydatafilename = "summarydataforUb" + Ubname + "Sb" + Sbname + ".txt"
summarydatafile = open(summarydatafilename, "w")
rng = fwdpy11.GSLrng(seed)
fitnesstype = fwdpy11.Multiplicative(2.)
genomesize = numberofchromosomes * chromosomesize
#This should produce a list of recombination intervals to put into recregions later.
chromosomelist = []
for i in range(0, numberofchromosomes-1):
chromosomelist.append(FixedCrossoverInterval.FixedCrossoverInterval(i*chromosomesize, i*chromosomesize + chromosomesize, 2))
class Recorder(object):
def __init__(self):
self.wbar = []
#fwdpy requires that the call function of recorder objects include a sampler variable that would allow for preservation of samples from the tree sequence,
#even though I don't actually need that functionality here.
def __call__(self, popsize, sampler):
if pop.generation >= pop.N:
metadata = np.array(pop.diploid_metadata, copy=False)
self.wbar.append((pop.generation, metadata["w"].mean()))
#The rates goes neutral mutation rate, selected mutation rate, recombination rate in that order.
#Note that the selected mutation rate is the mutation rate per haploid genome, not diploid genome.
#The constant value of Sd needs to be given as a negative number, otherwise it will be beneficial.
pdict = {'gvalue': fitnesstype,
'rates': (0., Ud/2, None),
'nregions': [],
'sregions': [fwdpy11.ConstantS(0, genomesize, 1, Sd)],
'recregions': chromosomelist,
'demography': np.array([popsize]*generations, dtype=np.uint32)
}
params = fwdpy11.ModelParams(**pdict)
pop = fwdpy11.DiploidPopulation(popsize, genomesize)
recorder = Recorder()
fwdpy11.evolvets(rng, pop, params, 100, recorder, suppress_table_indexing=True)
treesequence = pop.dump_tables_to_tskit()
#Print out raw data of interest.
print("Generations,Mean_fitness", file = rawdatafile)
for i in range(popsize, generations):
print("{},{}".format(i, recorder.wbar[i-popsize][1]), file = rawdatafile)
ts = msprime.mutate(treesequence, rate=0.0001, keep=False)
meandiversity = ts.diversity(sample_sets=None, windows=None, mode='site', span_normalise=True)
print("Mean pairwise genetic diversity at end of simulation: {:.10f}".format(meandiversity), file = summarydatafile)
#This function takes in two Sb values and modifies them
#until the first produces a slope of fitness smaller than some root value
#and the second produces a slope larger than the same root value
#It returns an array with the two Sb values (smaller value first)
def BracketRootforSb(generations, popsize, seed, numberofchromosomes, chromosomesize, desiredslope, Ub, Sb1, Sb2, Ud, Sd, Ubname, Sb1name, Sb2name, popsizename, generationsname, chromsizename, Udname, Sdname, seedname, miscfile):
numberoftries = 10
factor = 0.01
currentSb1 = Sb1
currentSb2 = Sb2
currentSb1name = Sb1name
currentSb2name = Sb2name
resultingslope1 = runsim(generations, popsize, seed, numberofchromosomes, chromosomesize, Ub, currentSb1, Ud, Sd, Ubname, currentSb1name, popsizename, generationsname, chromsizename, Udname, Sdname, seedname)
resultingslope2 = runsim(generations, popsize, seed, numberofchromosomes, chromosomesize, Ub, currentSb2, Ud, Sd, Ubname, currentSb2name, popsizename, generationsname, chromsizename, Udname, Sdname, seedname)
if resultingslope1 == resultingslope2:
print("Slopes after first try are the same, equaling {} and {}\n".format(resultingslope1, resultingslope2), file = miscfile)
return 0
if resultingslope1 > 0.0:
print("Slope with sb 0.0 is positive, slope = {}\n".format(resultingslope1), file = miscfile)
return 0
for i in range(0, numberoftries):
if resultingslope1 < desiredslope and resultingslope2 > desiredslope:
return [currentSb1, currentSb2]
elif resultingslope2 <= desiredslope:
currentSb2 += factor
currentSb2name = str(currentSb2)
resultingslope2 = runsim(generations, popsize, seed, numberofchromosomes, chromosomesize, Ub, currentSb2, Ud, Sd, Ubname, currentSb2name, popsizename, generationsname, chromsizename, Udname, Sdname, seedname)
elif resultingslope1 >= desiredslope:
currentSb1 -= factor
currentSb1name = str(currentSb1)
resultingslope1 = runsim(generations, popsize, seed, numberofchromosomes, chromosomesize, Ub, currentSb1, Ud, Sd, Ubname, currentSb1name, popsizename, generationsname, chromsizename, Udname, Sdname, seedname)
print("Failed to bracket slope of {} in 10 tries.\n".format(desiredslope), file = miscfile)
return 0
def FindingSbValueWithGivenSlope(generations, popsize, seed, numberofchromosomes, chromosomesize, desiredslope, Ub, Sb1, Sb2, Ud, Sd, Ubname, Sb1name, Sb2name, popsizename, generationsname, chromsizename, Udname, Sdname, seedname, miscfile):
accuracy = 0.00005
maxtries = 30
currentSb1 = Sb1
currentSb2 = Sb2
currentSb1name = Sb1name
currentSb2name = Sb2name
currentfactor = Sb2 - Sb1
currentroot = Sb1
if currentfactor < 0:
print("Sb2 smaller than Sb1: Sb1 = {}, Sb2 = {}.\n".format(currentSb1, currentSb2), file = miscfile)
return 0
currentslope1 = runsim(generations, popsize, seed, numberofchromosomes, chromosomesize, Ub, currentSb1, Ud, Sd, Ubname, currentSb1name, popsizename, generationsname, chromsizename, Udname, Sdname, seedname)
currentslopemid = runsim(generations, popsize, seed, numberofchromosomes, chromosomesize, Ub, currentSb2, Ud, Sd, Ubname, currentSb2name, popsizename, generationsname, chromsizename, Udname, Sdname, seedname)
#Checks to make sure that the desired slope is properly bracketed.
#The only way I'm really worried about this happening is with significant stochasticity between simulations,
#which so far has not been observed.
#These also specifically check that slope1 is less than the desired slope and slopemid is larger than the desired slope.
#This makes the root-finding algorithm less general but lets me skip some steps I don't want to do.
if ((currentslope1 - desiredslope)*(currentslopemid - desiredslope)) >= 0.0:
print("Slope root not bracketed properly, with starting slopes {} and {}\n".format(currentslope1, currentslopemid), file = miscfile)
return 0
if currentslope1 > desiredslope:
print("Slope 1 larger than desired slope. Slope 1 = {}, desired slope = {}\n".format(currentslope1, desiredslope), file = miscfile)
return 0
if currentslopemid < desiredslope:
print("Slope 2 smaller than desired slope. Slope 2 = {}, desired slope = {}\n".format(currentslope2, desiredslope), file = miscfile)
return 0
for i in range(0, maxtries):
currentfactor *= 0.5
currentSbmid = currentroot + currentfactor
currentSbmidname = str(currentSbmid)
currentslopemid = runsim(generations, popsize, seed, numberofchromosomes, chromosomesize, Ub, currentSbmid, Ud, Sd, Ubname, currentSbmidname, popsizename, generationsname, chromsizename, Udname, Sdname, seedname)
if currentslopemid <= desiredslope:
currentroot = currentSbmid
if currentfactor < accuracy or currentSbmid == 0.0:
return currentroot
print("Root not found. Root after 30 tries was: {}\n".format(currentroot), file = miscfile)
#The actual main body starts below!
if __name__ == "__main__":
#Following lines load input parameters.
generations = int(sys.argv[1])
popsize = int(sys.argv[2])
deleteriousmutationrate = float(sys.argv[3]) #this is the genome-wide rate, so not compatible with the c version
Sd = float(sys.argv[4])
chromosomesize = int(sys.argv[5])
numberofchromosomes = int(sys.argv[6]) #this is number of chromosomes, not ploidy -- all individuals are diploid
beneficialmutationrate = float(sys.argv[7]) #genome-wide again I guess
Sb = float(sys.argv[8])
typeofrun = sys.argv[9]
slopeforcontourline = float(sys.argv[10])
randomnumberseed = int(sys.argv[11])
beneficialson = bool(sys.argv[12])
#Following lines save some parameters as characters for naming data files later.
generationsname = sys.argv[1]
popsizename = sys.argv[2]
Udname = sys.argv[3]
Sdname = sys.argv[4]
chromosomesizename = sys.argv[5]
Ubname = sys.argv[7]
Sbname = sys.argv[8]
seedname = sys.argv[11]
#Following lines open some files to which to print debugging data.
#There might be a better way to do debugging in python, like with exceptions and stuff, but I don't know how to do it yet.
miscfile = open("miscellaneous", "w")
verbosefile = open("verbose", "w")
#Following lines make a new directory to store data from the simulations and changes directories into it. (I think)
#The final directory has slightly different names depending on what type of run is called.
if typeofrun == 'single':
if beneficialson:
directoryname = 'dataforUb' + sys.argv[7] + 'Sb' + sys.argv[8] + 'popsize' + sys.argv[2] + 'chromosomesize' + sys.argv[5] + 'Ud' + sys.argv[3] + 'Sd' + sys.argv[4] + 'seed' + sys.argv[11]
os.mkdir(directoryname)
os.chdir(directoryname)
else:
directoryname = 'datafornobeneficialspopsize' + sys.argv[2] + 'chromosomesize' + sys.argv[5] + 'Ud' + sys.argv[3] + 'Sd' + sys.argv[4] + 'seed' + sys.argv[11]
os.mkdir(directoryname)
os.chdir(directoryname)
#Now that we're in the right directory, just run the simulation
runsim(generations, popsize, randomnumberseed, numberofchromosomes, chromosomesize, beneficialmutationrate, Sb, deleteriousmutationrate, Sd, sys.argv[7], sys.argv[8], sys.argv[2], sys.argv[1], sys.argv[5], sys.argv[3], sys.argv[4], sys.argv[11])
elif typeofrun == 'root':
if beneficialson:
directoryname = 'dataforUb' + sys.argv[7] + 'slope' + sys.argv[10] + 'popsize' + sys.argv[2] + 'chromosomesize' + sys.argv[5] + 'seed' + sys.argv[11]
os.mkdir(directoryname)
os.chdir(directoryname)
else:
directoryname = 'datafornobeneficialsslope' + sys.argv[10] + 'popsize' + sys.argv[2] + 'chromosomesize' + sys.argv[5] + 'seed' + sys.argv[11]
os.mkdir(directoryname)
os.chdir(directoryname)
#Now that we're in the right directory, start the root-finding algorithm
Sb1 = 0.0
Sb2 = Sb
Sb1name = "0.0"
Sb2name = Sbname
#Bracket Sb values on either side of the desired slope.
bracketedSbs = BracketRootforSb(generations, popsize, randomnumberseed, numberofchromosomes, chromosomesize, slopeforcontourline, beneficialmutationrate, Sb1, Sb2, deleteriousmutationrate, Sd, sys.argv[7], Sb1name, Sb2name, sys.argv[2], sys.argv[1], sys.argv[5], sys.argv[3], sys.argv[4], sys.argv[11], miscfile)
Sb1 = bracketedSbs[0]
Sb2 = bracketedSbs[1]
#Find an Sb value that produces desired slope.
rootSb = FindingSbValueWithGivenSlope(generations, popsize, randomnumberseed, numberofchromosomes, chromosomesize, slopeforcontourline, beneficialmutationrate, Sb1, Sb2, deleteriousmutationrate, sys.argv[7], Sb1name, Sb2name, sys.argv[2], sys.argv[1], sys.argv[5], sys.argv[3], sys.argv[4], sys.argv[11], miscfile)
finaldatafilename = 'final' + directoryname
finaldatafile = open(finaldatafilename, "w")
print("The value of Sb required to produce an average slope of log fitness of {} is {}.\n".format(slopeforcontourline, rootSb), file = finaldatafile)
finaldatafile.close
else:
print("That type of run is not currently supported.", file = miscfile)
#One day maybe I'll have more types of runs ...
miscfile.close
verbosefile.close