plotdata.py

# -*- coding: utf-8 -*-
"""
Created on Mon Jul 17 13:27:18 2017
@author: Kevin Gomez (Masel Lab)
Script to process data generated by simulations in Mathematica code.
"""

# ************************************************************************************
# ************************************************************************************
# ************************************************************************************
# 1) Times series data for classes, abundances of 2d wave (mathematica)
# ************************************************************************************
# ************************************************************************************
# ************************************************************************************

# import packages needed to process data from mathematica simulations
import pickle
import scipy as sp
import numpy as np
import copy as cpy

# set parameters of simulation and create required variables
[N,s1,s2,U1,U2] = [1e9,1e-2,1e-2,1e-5,1e-5]

# calculate desai and fisher (2007) theoretical "v" and "tau_est"
vU_thry = s1*s1*(2*np.log(N*s1)-np.log(s1/(1*U1)))/((np.log(s1/(1*U1)))**2)
v2U_thry = 0.5*s1*s1*(2*np.log(N*s1)-np.log(s1/(2*U1)))/((np.log(s1/(2*U1)))**2)
tau_est = 0.5*s1/v2U_thry

# section of code for processing new data from Mathematica simulations
data_name = '_N-10p09_c1-0d01_c2-0d01_U1-1x10pn5_U2-1x10pn5_exp1'
#folder_location = 'Documents/kgrel2d/'  # use this location in linux
folder_location = ''     # use this location if windows
[times,genotypes,abundances] = [[],[],[]]

# get simulation data and store genotypes as lists since they vary in dimensions over time
data_file=open('./'+folder_location+'data/pythondata/times'+data_name+'.dat')
times = data_file.read().splitlines()
times = np.array(map(float,times))
data_file.close()

data_file=open('./'+folder_location+'data/pythondata/genotypes'+data_name+'.dat')
genotypes = data_file.read().splitlines()
data_file.close()

data_file=open('./'+folder_location+'data/pythondata/abundances'+data_name+'.dat')
abundances = data_file.read().splitlines()
data_file.close()

del data_file
num_pts = len(times)

# clean up mathematica data's format and convert loaded data into lists of arrays
for i in range(num_pts):
    genotypes[i]='genotypes[i]=np.array(['+genotypes[i].replace('\t',',')+'])'
    genotypes[i]=genotypes[i].replace('{','[')
    genotypes[i]=genotypes[i].replace('}',']')
    exec(genotypes[i])
    abundances[i]='abundances[i]=np.array([['+abundances[i].replace('\t',',')+']])'
    exec(abundances[i])

# times is array, genotypes and abundances are lists of arrays
pickle_file_name = './'+folder_location+'data/pythondata/timesGenosAbund'+data_name+'.pickle'
pickle_file = open(pickle_file_name,'wb') 
pickle.dump([times,genotypes,abundances],pickle_file,pickle.HIGHEST_PROTOCOL)
pickle_file.close()

# compute data for use in plots
rel_fit = cpy.deepcopy(genotypes)
freq = cpy.deepcopy(abundances)
mean_fit = np.zeros((num_pts,2))
fit_var = np.zeros((num_pts,2))
fit_cov = cpy.deepcopy(times)
pop_load = cpy.deepcopy(times)
dcov_dt = cpy.deepcopy(times)

#del genotypes, abundances

for i in range(num_pts):
    num_genos = len(freq[i][0])   
    freq[i] = (1/np.sum(freq[i]))*freq[i]
    mean_fit[i] = freq[i].dot(rel_fit[i])[0]
    
    rel_fit[i] = rel_fit[i]-np.array([mean_fit[i] for j in range(num_genos)])
    rel_fit[i] = rel_fit[i]*np.array([[s1,s2] for j in range(num_genos)])
    
    fit_var[i] = (freq[i].dot(((rel_fit[i])**2)))[0]
    fit_cov[i] = freq[i].dot(rel_fit[i][:,0]*rel_fit[i][:,1])
    dcov_dt[i] = freq[i].dot(rel_fit[i][:,0]**2*rel_fit[i][:,1]+rel_fit[i][:,1]**2*rel_fit[i][:,0])
    
    L1 = np.amax((rel_fit[i]+np.array([[s1,0] for j in range(num_genos)])).dot(np.array([[1],[1]])))
    L2 = np.amax((rel_fit[i]+np.array([[0,s2] for j in range(num_genos)])).dot(np.array([[1],[1]])))
    pop_load[i] = max([L1,L2])

# dump data into a pickle files
pickle_file_name = './'+folder_location+'data/pythondata/distrStats'+data_name+'.pickle'
pickle_file = open(pickle_file_name,'wb') 
pickle.dump([times,mean_fit,fit_var,fit_cov,pop_load,dcov_dt,vU_thry,v2U_thry],pickle_file,pickle.HIGHEST_PROTOCOL)
pickle_file.close()

del N, s1, s2, U1, U2, L1, L2, rel_fit, freq
del vU_thry, v2U_thry, tau_est
del times, mean_fit, fit_var, fit_cov, pop_load, dcov_dt
del pickle_file_name, folder_location, data_name

# ************************************************************************************
# ************************************************************************************
# ************************************************************************************
# 2) Data for time-averaged statistics of 2d wave with varying parameters (mathematica)
# ************************************************************************************
# ************************************************************************************
# ************************************************************************************

# import packages needed for script
import pickle
import scipy as sp
import numpy as np
import copy as cpy

# section of code for processing new data from Mathematica simulations
num_exp = 33       # number of experiments/files

#folder_location = 'Documents/kgrel2d/'  # use this location in linux
folder_location = ''     # use this location if windows
[times,genotypes,abundances,parameters] = [[],[],[],[]]

var = np.ones([num_exp,1])
cov = np.ones([num_exp,1])
vUthry = np.ones([num_exp,1])
v2Uthry = np.ones([num_exp,1])

varp = np.ones([num_exp,1])
covp = np.ones([num_exp,1])
vUthryp = np.ones([num_exp,1])
v2Uthryp = np.ones([num_exp,1])
NsUparam = [[] for l in range(num_exp)]

def get_sample_window(times,start_time,end_time):
# returns: indeces of times that correspond to start_time and end_time
 
    [num_pts,start_indx,end_indx] = [len(times),0,0]
    
    for i in range(num_pts):
        if times[start_indx] <= start_time:
            start_indx = start_indx + 1
        if times[end_indx] <= end_time:
            end_indx = end_indx + 1
    
    return [start_indx,end_indx]
    
for k in range(num_exp):
    print(k+1)
    # get simulation data and store genotypes as lists since they vary in dimensions over time
    data_file=open('./'+folder_location+'data/pythondata/times_exp'+str(k+1)+'.dat')
    times = data_file.read().splitlines()
    times = np.array(map(float,times))
    data_file.close()
    
    data_file=open('./'+folder_location+'data/pythondata/genotypes_exp'+str(k+1)+'.dat')
    genotypes = data_file.read().splitlines()
    data_file.close()
    
    data_file=open('./'+folder_location+'data/pythondata/abundances_exp'+str(k+1)+'.dat')
    abundances = data_file.read().splitlines()
    data_file.close()

    data_file=open('./'+folder_location+'data/pythondata/parameters_exp'+str(k+1)+'.dat')
    parameters = data_file.read().splitlines()
    data_file.close()
    
    del data_file
    num_pts = len(times)
    
    # clean up mathematica data's format and convert loaded data into lists of arrays
    for i in range(num_pts):
        genotypes[i]='genotypes[i]=np.array(['+genotypes[i].replace('\t',',')+'])'
        genotypes[i]=genotypes[i].replace('{','[')
        genotypes[i]=genotypes[i].replace('}',']')
        exec(genotypes[i])
        abundances[i]='abundances[i]=np.array([['+abundances[i].replace('\t',',')+']])'
        exec(abundances[i])
        
    # clean up for parameters variable
    for i in range(len(parameters)):
        parameters[i]='parameters[i]=1.0*'+parameters[i]
        exec(parameters[i])
    
    # times is array, genotypes and abundances are lists of arrays
    pickle_file_name = './'+folder_location+'data/pythondata/data_exp'+str(k+1)+'.pickle'
    pickle_file = open(pickle_file_name,'wb') 
    pickle.dump([times,genotypes,abundances,parameters],pickle_file,pickle.HIGHEST_PROTOCOL)
    pickle_file.close()
    
    # compute data for use in plots
    [N,s,U] = parameters
    vU_thry = s*s*(2*np.log(N*s)-np.log(s/(1*U)))/((np.log(s/(1*U)))**2)
    v2U_thry = 0.5*s*s*(2*np.log(N*s)-np.log(s/(2*U)))/((np.log(s/(2*U)))**2)
    tau_est = 0.5*s/v2U_thry

    rel_fit = cpy.deepcopy(genotypes)
    freq = cpy.deepcopy(abundances)
    mean_fit = np.zeros((num_pts,2))
    fit_var = np.zeros((num_pts,2))
    fit_cov = cpy.deepcopy(times)
    pop_load = cpy.deepcopy(times)
    dcov_dt = cpy.deepcopy(times)
    
    #del genotypes, abundances
    
    for i in range(num_pts):
        num_genos = len(freq[i][0])   
        freq[i] = (1/np.sum(freq[i]))*freq[i]
        mean_fit[i] = freq[i].dot(rel_fit[i])[0]
        
        rel_fit[i] = rel_fit[i]-np.array([mean_fit[i] for j in range(num_genos)])
        rel_fit[i] = rel_fit[i]*np.array([[s,s] for j in range(num_genos)])
        
        fit_var[i] = (freq[i].dot(((rel_fit[i])**2)))[0]
        fit_cov[i] = freq[i].dot(rel_fit[i][:,0]*rel_fit[i][:,1])
        dcov_dt[i] = freq[i].dot(rel_fit[i][:,0]**2*rel_fit[i][:,1]+rel_fit[i][:,1]**2*rel_fit[i][:,0])
        
        L1 = np.amax((rel_fit[i]+np.array([[s,0] for j in range(num_genos)])).dot(np.array([[1],[1]])))
        L2 = np.amax((rel_fit[i]+np.array([[0,s] for j in range(num_genos)])).dot(np.array([[1],[1]])))
        pop_load[i] = max([L1,L2])
    
    # dump data into a pickle files
    pickle_file_name = './'+folder_location+'data/pythondata/stats_exp'+str(k+1)+'.pickle'
    pickle_file = open(pickle_file_name,'wb') 
    pickle.dump([times,mean_fit,fit_var,fit_cov,pop_load,dcov_dt,vU_thry,v2U_thry],pickle_file,pickle.HIGHEST_PROTOCOL)
    pickle_file.close()
    
    [start_indx,end_indx] = get_sample_window(times,10000,1000000)
    fit_var = fit_var[start_indx:end_indx]
    fit_cov = fit_cov[start_indx:end_indx]
    var[k] = np.mean(fit_var[:,0])
    cov[k] = np.mean(fit_cov)
    vUthry[k] = vU_thry
    v2Uthry[k] = v2U_thry
    varp[k] = var[k]/vU_thry
    covp[k] = cov[k]/vU_thry
    vUthryp[k] = vU_thry/vU_thry
    v2Uthryp[k] = v2U_thry/vU_thry
    NsUparam[k] = [N,s,U]

pickle_file_name = './'+folder_location+'data/pythondata/sumdata_exp5.pickle'
pickle_file = open(pickle_file_name,'wb') 
pickle.dump([var, cov, vUthry, v2Uthry, varp, covp, vUthryp, v2Uthryp,NsUparam],pickle_file,pickle.HIGHEST_PROTOCOL)
pickle_file.close()

# ************************************************************************************
# ************************************************************************************
# ************************************************************************************
# 3) Estimates of timescales for variances and covariances
# ************************************************************************************
# ************************************************************************************
# ************************************************************************************

# import packages needed for script
import pickle
import scipy as sp
import numpy as np
import copy as cpy

# section of code for processing new data from Mathematica simulations
num_exp = 33       # number of experiments/files

#folder_location = 'Documents/kgrel2d/'  # use this location in linux
folder_location = ''     # use this location if windows

for k in range(num_exp):
    print(k+1)

    pickle_file_name = './'+folder_location+'data/pythondata/data_exp'+str(k+1)+'.pickle'
    pickle_file = open(pickle_file_name,'rb') 
    [times,genotypes,abundances,parameters] = pickle.load(pickle_file)
    pickle_file.close()
    
    # load time series data of distrStats from plotdata.py output
    pickle_file_name = './'+folder_location+'data/pythondata/stats_exp'+str(k+1)+'.pickle'
    pickle_file = open(pickle_file_name,'rb') 
    [times,mean_fit,fit_var,fit_cov,pop_load,dcov_dt,vU_thry,v2U_thry] = pickle.load(pickle_file)
    pickle_file.close()
    
    # compute both medians and time scale of tau_q's
    cov_times = cpy.deepcopy(times)
    mean_cov = np.mean(fit_cov)
    for i in range(len(times)):
        cov_times[i] = abs((0.1*mean_cov)/dcov_dt[i])
    
    median_cov_time = median(cov_times)
    tau_q = (1/vU_thry)*parameters[1]
    
    pickle_file_name = './'+folder_location+'data/pythondata/timescales_exp'+str(k+1)+'.pickle'
    pickle_file = open(pickle_file_name,'wb') 
    pickle.dump([tau_q,median_cov_time,mean_cov,dcov_dt,cov_times,parameters],pickle_file,pickle.HIGHEST_PROTOCOL)
    pickle_file.close()

# ************************************************************************************
# ************************************************************************************
# ************************************************************************************
# 4) Data on correlation between front and bulk covariance (WIP)
# ************************************************************************************
# ************************************************************************************
# ************************************************************************************

# import packages needed for script
import pickle
import scipy as sp
import numpy as np
import copy as cpy
import matplotlib.pyplot as plt
import plotfunctions as pltfun
    
[N,U,s] = [10**9, 2*10**(-5),1*10**(-2)]      #here U is double the single trait mutation rate
tau_q = ((np.log(s/U))**2)/(s*(2*np.log(N*s)-np.log(s/U)))
q = (2*np.log(N*s))/(np.log(s/U))

# get 2d fitness distribution data
#pickle_file_name = './data/pythondata/timesGenosAbund_N-10p09_c1-0d01_c2-0d01_U1-1x10pn5_U2-1x10pn5_exp1.pickle'   #old mathematica data
pickle_file_name = './data/2dwave_data_time_series_distr_ml-01.pickle'    #new matlab data
pickle_file = open(pickle_file_name,'rb') 
[times,genotypes,abundances] = pickle.load(pickle_file)
pickle_file.close()

# get bulk covariance data
#pickle_file_name = './data/pythondata/distrStats_N-10p09_c1-0d01_c2-0d01_U1-1x10pn5_U2-1x10pn5_exp1.pickle'         #old mathematica data
pickle_file_name = './data/2dwave_data_time_series_stats_ml-01.pickle'         #new matlab data
pickle_file = open(pickle_file_name,'rb') 
[times,mean_fit,fit_var,fit_cov,pop_load,dcov_dt,vU_thry,v2U_thry] = pickle.load(pickle_file)
pickle_file.close()

# compute data for use in plots
num_pts = len(times)
lead_cov = []

# compute covariances for each line of const fitness
for i in range(num_pts):
    lead_cov = lead_cov+[pltfun.get_cov_by_fitness_line(genotypes[i],abundances[i],10**(-2))]

nose_cov = [lead_cov[i][-1][2] for i in range(len(lead_cov))]
tau_fix_avg = (mean(pop_load[10000:-1])/s)*tau_q
times2 = [times[i]+np.floor(tau_fix_avg) for i in range(len(times))]

# get cross-covariances from bulk and nose as function of offset
[t_off,t_cov,new_times,new_covs,new_ncovs]= pltfun.get_cov_cov(times,nose_cov,fit_cov,N,s,U)

# dump data into a pickle files
#pickle_file_name = './data/2dwave_data_time_series_corr_mm-01.pickle'       #old output using mathematica data
pickle_file_name = './data/2dwave_data_time_series_corr_ml-01.pickle'       #new output using matlab data
pickle_file = open(pickle_file_name,'wb') 
pickle.dump([tau_fix_avg,t_off,t_cov],pickle_file,pickle.HIGHEST_PROTOCOL)
pickle_file.close()

# ************************************************************************************
# ************************************************************************************
# ************************************************************************************
# 5) Process new data on timeseries for 2d wave
# ************************************************************************************
# ************************************************************************************
# ************************************************************************************

# import packages needed for script
import pickle
import scipy as sp
import numpy as np
import copy as cpy
import matplotlib.pyplot as plt
import plotfunctions.py as pltfun

# open mathematica file containing simulation data for var-cov, etc
data_file = open('./data/pythondata/sumdata_exp6.dat')
data = data_file.read().splitlines()
data_file.close()

del data_file
num_pts = len(data)

# clean up mathematica data's format and convert loaded data into lists of arrays
for i in range(num_pts):
    data[i]='data[i]=np.array(['+data[i].replace('\t',',')+'])'
    data[i]=data[i].replace('{','[')
    data[i]=data[i].replace('}',']')
    exec(data[i])

data = np.asarray(data)
data = data[:,1:]

data_file = open('./data/pythondata/sumparam_exp6.dat')
NsUparam = data_file.read().splitlines()
data_file.close()

num_pts = len(NsUparam)

# clean up mathematica data's format and convert loaded data into lists of arrays
for i in range(num_pts):
    NsUparam[i]='NsUparam[i]=np.asarray(['+NsUparam[i].replace('\t',',')+'])'
    NsUparam[i]=NsUparam[i].replace('{','[')
    NsUparam[i]=NsUparam[i].replace('}',']')
    NsUparam[i]=NsUparam[i].replace('/','*1.0/')
    exec(NsUparam[i])

NsUparam = np.asarray(NsUparam)

vU_thry = np.asarray([get_vNsU(NsUparam[i,0],NsUparam[i,1],NsUparam[i,2]) for i in range(num_pts)])
v2U_thry = np.asarray([0.5*get_vNsU(NsUparam[i,0],NsUparam[i,1],2*NsUparam[i,2]) for i in range(num_pts)])
var = data[:,2]
cov = data[:,4]
varp = np.asarray([var[i]/vU_thry[i] for i in range(num_pts)])
covp = np.asarray([cov[i]/vU_thry[i] for i in range(num_pts)])
vU_thryp = np.asarray([vU_thry[i]/vU_thry[i] for i in range(num_pts)])
v2U_thryp = np.asarray([v2U_thry[i]/vU_thry[i] for i in range(num_pts)])

pickle_file_name = './data/pythondata/sumdata_exp6.pickle'
pickle_file = open(pickle_file_name,'wb') 
pickle.dump([var, cov, vU_thry, v2U_thry, varp, covp, vU_thryp, v2U_thryp, NsUparam],pickle_file,pickle.HIGHEST_PROTOCOL)
pickle_file.close()

del var, cov, vU_thry, v2U_thry, varp, covp, vU_thryp, v2U_thryp, NsUparam

# ************************************************************************************
# ************************************************************************************
# ************************************************************************************
# 6) Process new data for time-averaged statistics of 2d wave
# ************************************************************************************
# ************************************************************************************
# ************************************************************************************

# import packages needed for script
import pickle
import scipy as sp
import numpy as np
import copy as cpy
import matplotlib.pyplot as plt
import plotfunctions as pltfun

# load existing data of variances and covariances.py output
pickle_file_name = './data/pythondata/sumdata_exp6.pickle'
pickle_file = open(pickle_file_name,'rb') 
[var, cov, vUthry, v2Uthry, varp, covp, vUthryp, v2Uthryp, NsUparam] = pickle.load(pickle_file)
pickle_file.close()

# import new data from additional simulations "parameters" and add to existing datafile
data_file = open('./data/pythondata/NsUparam.dat')
data_par = data_file.read().splitlines()
data_file.close()

num_pts = len(data_par)

# clean up mathematica data's format and convert loaded data into lists of arrays
for i in range(num_pts):
    data_par[i]='data_par[i]=np.array(['+data_par[i].replace('\t',',')+'])'
    data_par[i]=data_par[i].replace('{','[')
    data_par[i]=data_par[i].replace('}',']')
    exec(data_par[i])

data_par = np.asarray(data_par)

# compute new theory and 2U theory arrays
new_vUthry = np.asarray([pltfun.get_vNsU(data_par[i][0],data_par[i][1],data_par[i][2]) for i in range(num_pts)])
new_v2Uthry = np.asarray([0.5*pltfun.get_vNsU(data_par[i][0],data_par[i][1],2*data_par[i][2]) for i in range(num_pts)])
new_vUthryp = np.asarray([new_vUthry[i]/new_vUthry[i] for i in range(num_pts)])
new_v2Uthryp = np.asarray([new_v2Uthry[i]/new_vUthry[i] for i in range(num_pts)])

# import new data from additional simulations "cov data" and add to existing datafile
data_file = open('./data/pythondata/results.dat')
data = data_file.read().splitlines()
data_file.close()

num_pts = len(data)

# clean up mathematica data's format and convert loaded data into lists of arrays
for i in range(num_pts):
    data[i]='data[i]=np.array(['+data[i].replace('\t',',')+'])'
    data[i]=data[i].replace('*^','e')
    data[i]=data[i].replace('`16.','')
    data[i]=data[i].replace('{','[')
    data[i]=data[i].replace('}',']')
    exec(data[i])

data_file.close()

new_var = np.asarray([data[i][0][3] for i in range(num_pts)]) 
new_cov = np.asarray([data[i][0][5] for i in range(num_pts)]) 
new_varp = np.asarray([data[i][0][3]/new_vUthry[i] for i in range(num_pts)])
new_covp = np.asarray([data[i][0][5]/new_vUthry[i] for i in range(num_pts)])

# straighten data
num_exp = len(NsUparam)
num_exp2 = len(data_par)

[start1,start2,start3] = [0,num_exp/3,2*num_exp/3]         
[end1,end2,end3] = [num_exp/3,2*num_exp/3,num_exp]

[start21,start22,start23] = [0,10,20]         
[end21,end22,end23] = [10,20,40]

new_NsUparam = np.concatenate((NsUparam[start1:end1],data_par[start21:end21],NsUparam[start2:end2],data_par[start22:end22],NsUparam[start3:end3],data_par[start23:end23]), axis=0)
new_vUthry = np.concatenate((vUthry[start1:end1],new_vUthry[start21:end21],vUthry[start2:end2],new_vUthry[start22:end22],vUthry[start3:end3],new_vUthry[start23:end23]), axis=0)
new_v2Uthry = np.concatenate((v2Uthry[start1:end1],new_v2Uthry[start21:end21],v2Uthry[start2:end2],new_v2Uthry[start22:end22],v2Uthry[start3:end3],new_v2Uthry[start23:end23]), axis=0)
new_vUthryp = np.concatenate((vUthryp[start1:end1],new_vUthryp[start21:end21],vUthryp[start2:end2],new_vUthryp[start22:end22],vUthryp[start3:end3],new_vUthryp[start23:end23]), axis=0)
new_v2Uthryp = np.concatenate((v2Uthryp[start1:end1],new_v2Uthryp[start21:end21],v2Uthryp[start2:end2],new_v2Uthryp[start22:end22],v2Uthryp[start3:end3],new_v2Uthryp[start23:end23]), axis=0)

new_var = np.concatenate((var[start1:end1],new_var[start21:end21],var[start2:end2],new_var[start22:end22],var[start3:end3],new_var[start23:end23]), axis=0)
new_cov = np.concatenate((cov[start1:end1],new_cov[start21:end21],cov[start2:end2],new_cov[start22:end22],cov[start3:end3],new_cov[start23:end23]), axis=0)
new_varp = np.concatenate((varp[start1:end1],new_varp[start21:end21],varp[start2:end2],new_varp[start22:end22],varp[start3:end3],new_varp[start23:end23]), axis=0)
new_covp = np.concatenate((covp[start1:end1],new_covp[start21:end21],covp[start2:end2],new_covp[start22:end22],covp[start3:end3],new_covp[start23:end23]), axis=0)

# load existing data of variances and covariances.py output
pickle_file_name = './data/pythondata/2dwave_data_time_avg_stats_mm-01.pickle'
pickle_file = open(pickle_file_name,'wb') 
pickle.dump([new_var, new_cov, new_vUthry, new_v2Uthry, new_varp, new_covp, new_vUthryp, new_v2Uthryp, new_NsUparam],pickle_file,pickle.HIGHEST_PROTOCOL)
pickle_file.close()

# ************************************************************************************
# ************************************************************************************
# ************************************************************************************
# 7) Data on time-series for G stability
# ************************************************************************************
# ************************************************************************************
# ************************************************************************************

# import packages needed for script
import pickle
import scipy as sp
import numpy as np
import copy as cpy
import matplotlib.pyplot as plt
import plotfunctions as pltfun

# figure 3: plot of rate of adaptation, variances, covariance and their means
parameters = [1e9,1e-2,1e-5]
[N,s,U] = parameters
[sim_start,sim_end,snapshot] = [5e3,4e4,1.313e4]

# load time series data of distrStats from plotdata.py output
pickle_file_name = './data/pythondata/distrStats_N-10p09_c1-0d01_c2-0d01_U1-1x10pn5_U2-1x10pn5_exp1.pickle'
pickle_file = open(pickle_file_name,'rb') 
[times,mean_fit,fit_var,fit_cov,pop_load,dcov_dt,vU_thry,v2U_thry] = pickle.load(pickle_file)
pickle_file.close()

# select interval of simulation data that will be used for plot
# reduce loaded data to subset corresponding to selected interval
[start_indx,end_indx] = pltfun.get_sample_window(times,sim_start,sim_end)
times = times[start_indx:end_indx]
fit_var = fit_var[start_indx:end_indx]
fit_cov = fit_cov[start_indx:end_indx]
pop_load = pop_load[start_indx:end_indx]
dcov_dt = dcov_dt[start_indx:end_indx]
var_diff = (fit_var[:,0]-fit_var[:,1])
n1 = len(fit_cov)

trG = fit_var[:,0]+fit_var[:,1] 
detG =  np.asarray([fit_var[i,0]*fit_var[i,1]-fit_cov[i]**2 for i in range(n1)])
Gmatr = [np.asarray([[fit_var[i,0],fit_cov[i]],[fit_cov[i],fit_var[i,1]]]) for i in range(n1)]
Xmatr = np.asarray([[1/np.sqrt(2),1/np.sqrt(2)],[1/np.sqrt(2),-1/np.sqrt(2)]])

lambda1 = np.asarray([0.5*(trG[i]-np.sqrt(trG[i]**2-4*detG[i])) for i in range(n1)])
lambda2 = np.asarray([0.5*(trG[i]+np.sqrt(trG[i]**2-4*detG[i])) for i in range(n1)])

[Gvec,Gval,Gang] = [[],[],[]]

# compute the eigenvalues of the G matrix
for i in range(n1):
    A = np.linalg.eig(Gmatr[i])
    if(abs(A[0][0]-lambda2[i])<abs(A[0][0]-lambda1[i])):
        Gval = Gval+[np.asarray([A[0][1],A[0][0]])]
        Gvec = Gvec+[np.fliplr(A[1])]
    else:
        Gval = Gval+[A[0]]
        Gvec = Gvec+[A[1]]

for i in range(n1):
    Ang1 = np.arccos((np.sign(Gvec[i][0,0])*Gvec[i][0,0]*Xmatr[0,0]+np.sign(Gvec[i][1,0])*Gvec[i][1,0]*Xmatr[1,0])/np.linalg.norm(Gvec[i][:,0]))
    Ang1 = np.sign(np.sign(Gvec[i][1,0])*Gvec[i][1,0]-np.sign(Gvec[i][0,0])*Gvec[i][0,0])*Ang1
    Gang = Gang + [Ang1*2/np.pi]

# convert list to array  
Gval = np.asarray(Gval)
Gang = np.asarray(Gang)

# load existing data of variances and covariances.py output
pickle_file_name = './data/pythondata/2dwave_data_time_series_stab_mm-1.pickle'
pickle_file = open(pickle_file_name,'wb') 
pickle.dump([times,fit_var,fit_cov,vU_thry,v2U_thry,lambda1,lambda2,Gang,parameters],pickle_file,pickle.HIGHEST_PROTOCOL)
pickle_file.close()

# ************************************************************************************
# ************************************************************************************
# ************************************************************************************
# 8) New data for time-averaged statistics of 2d wave with varying parameters (matlab)
# ************************************************************************************
# ************************************************************************************
# ************************************************************************************

import pickle
import numpy as np
import plotfunctions as pltfun

# Need to change this
#1. add more points
#2. decrease space between plots
#3. change dots to one dot in legend
#------------------------------------------------------------------------------------

# import new data from Pearce Fisher simulations
data_file = open('./data/2dwave_data_time_avg_stats_ml-01-1.dat')
mydata = data_file.read().splitlines()
data_file.close()

param_file = open('./data/2dwave_data_time_avg_stats_ml-01-0.dat')
NsUparam = param_file.read().splitlines()
data_file.close()

del data_file, param_file
num_pts = len(mydata)

# clean up mathematica data's format and convert loaded data into lists of arrays
for i in range(num_pts):
    mydata[i]='mydata[i]=np.array(['+mydata[i]+'])'
    exec(mydata[i])
    NsUparam[i]='NsUparam[i]=np.array(['+NsUparam[i]+'])'
    exec(NsUparam[i]) 
        
mydata = np.asarray(mydata)
NsUparam = np.asarray(NsUparam)

var = mydata[:,3]
cov = mydata[:,5]

vUthry = []
v2Uthry = []
vUthryp = []
v2Uthryp = []
varp = []
covp = []

for i in range(num_pts):
    vUthry += [pltfun.get_vNsU(NsUparam[i][0],NsUparam[i][1],NsUparam[i][2])]
    v2Uthry += [pltfun.get_vNsU(NsUparam[i][0],NsUparam[i][1],2*NsUparam[i][2])]
    vUthryp += [(1/vUthry[i])*vUthry[i]]
    v2Uthryp += [(1/vUthry[i])*v2Uthry[i]]
    var[i] = NsUparam[i,1]**2*var[i]
    cov[i] = NsUparam[i,1]**2*cov[i]
    varp += [(1/vUthry[i])*var[i]]
    covp += [(1/vUthry[i])*cov[i]]

vUthry = np.asarray(vUthry)
v2Uthry = np.asarray(v2Uthry)
vUthryp = np.asarray(vUthryp)
v2Uthryp = np.asarray(v2Uthryp)
varp = np.asarray(varp)
covp = np.asarray(covp)

# load existing data of variances and covariances.py output
pickle_file_name = './data/pythondata/2dwave_data_time_avg_stats_ml-01.pickle'
pickle_file = open(pickle_file_name,'wb') 
pickle.dump([var, cov, vUthry, v2Uthry, varp, covp, vUthryp, v2Uthryp,NsUparam],pickle_file,pickle.HIGHEST_PROTOCOL)
pickle_file.close()

# ************************************************************************************
# ************************************************************************************
# ************************************************************************************
# 9) New data on time-series using matlab code ()
# ************************************************************************************
# ************************************************************************************
# ************************************************************************************

#The code below takes data generated from Pearce and Fisher's matlab code and changes it
#into the format used for graphs.

# fixing data from pearce fisher code to generate new plots

# import packages needed for script
import pickle
import scipy as sp
import numpy as np
import copy as cpy
import matplotlib.pyplot as plt
import plotfunctions as pltfun

# import new data from additional simulations "parameters" and add to existing datafile

data_file1 = open('./data/2dwave_data_time_series_stats_ml-01-0.txt')
data_file2 = open('./data/2dwave_data_time_series_stats_ml-01-1.txt')
data_file3 = open('./data/2dwave_data_time_series_stats_ml-01-2.txt')
data_file4 = open('./data/2dwave_data_time_series_stats_ml-01-3.txt')

data_parameters = data_file1.read().splitlines()
data_2dwave     = data_file2.read().splitlines()
data_classes    = data_file3.read().splitlines()
data_abundances = data_file4.read().splitlines()

data_file1.close()
data_file2.close()
data_file3.close()
data_file4.close()

exec('data_parameters=np.asarray(['+data_parameters[0]+'])')
parameters = data_parameters
num_pts = len(data_2dwave)

# clean up mathematica data's format and convert loaded data into lists of arrays
for i in range(num_pts):
    data_2dwave[i]='data_2dwave[i]=np.array(['+data_2dwave[i]+'])'
    data_classes[i]='data_classes[i]=np.array(['+data_classes[i][:-1]+'])'
    data_abundances[i]='data_abundances[i]=np.array([['+data_abundances[i][:-1]+']])'
    exec(data_2dwave[i])
    exec(data_classes[i])
    exec(data_abundances[i])

data_2dwave = np.asarray(data_2dwave)

# construct arrays for times genotypes and abundances
times       = data_2dwave[:,0]
genotypes   = data_classes
abundances  = data_abundances

# Summary of data_2dwave columns: 
# timestep,sigmax2,sigmay2,sigmaxy,front_cov,pop_load,L(2,2),L(1,1),Gang,meanfitness,meanfitx,meanfity

# construct arrays for times, mean_fit, fit_var, fit_cov, pop_load, dcov_dt, vU_thry, v2U_thry
fit_var     = parameters[1]**2*data_2dwave[:,1:3]
fit_cov     = parameters[1]**2*data_2dwave[:,3]
mean_fit    = parameters[1]*data_2dwave[:,9]
pop_load    = parameters[1]*data_2dwave[:,5]
dcov_dt     = fit_var[:,0]+fit_cov[:]
vU_thry     = pltfun.get_vNsU(parameters[0],parameters[1],parameters[2])
v2U_thry    = 0.5*pltfun.get_vNsU(parameters[0],parameters[1],2*parameters[2])

lambda1     = data_2dwave[:,6]
lambda2     = data_2dwave[:,7]
Gang        = (1/90.0)*data_2dwave[:,8]

# output new data for time series of times genotypes and abundances
pickle_file_name = './data/2dwave_data_time_series_distr_ml-01.pickle'
pickle_file = open(pickle_file_name,'wb') 
pickle.dump([times,genotypes,abundances],pickle_file,pickle.HIGHEST_PROTOCOL)
pickle_file.close()

# output new data for time series of 2d wave stats
pickle_file_name = './data/2dwave_data_time_series_stats_ml-01.pickle'
pickle_file = open(pickle_file_name,'wb') 
pickle.dump([times,mean_fit,fit_var,fit_cov,pop_load,dcov_dt,vU_thry,v2U_thry],pickle_file,pickle.HIGHEST_PROTOCOL)
pickle_file.close()

# output new data for time series of 2d wave stats
pickle_file_name = './data/2dwave_data_time_series_stab_ml-01.pickle'
pickle_file = open(pickle_file_name,'wb') 
pickle.dump([times,fit_var,fit_cov,vU_thry,v2U_thry,lambda1,lambda2,Gang,parameters],pickle_file,pickle.HIGHEST_PROTOCOL)
pickle_file.close()