/
script for daniel (competion between valley crossing and uphill mutations in asexual population).py
148 lines (136 loc) · 6.94 KB
/
script for daniel (competion between valley crossing and uphill mutations in asexual population).py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
# packages
import numpy as np
import csv
import sys
import time
from numpy import zeros
from matplotlib.pyplot import (legend, plot, show,
title, xlabel, ylabel)
# global variables
pop_size = 1.e6 # population size
num_gen = 1000000 # number of generations
muta = .001 # A mutation rate
mutb = 0#.00001 # B mutation rate
mutc =.00000001 # C mutation rate
sa = 0.08 # Fitness effect of a mutation A
sb = 0.01 # Fitness effect of a mutation B
sc = sb # Fitness effect of a mutation C
count_x = 0 # number of zero zy slices of the simulation grid starting at x = 0
numexp = 1;
simulation_grid = np.zeros((30,100,200)) # grid on which the simulation occurs
num_Amut = np.zeros((num_gen)) # array to save number of available A-mutations
num_Amut_hist = np.zeros((num_gen,30)) # array to save a histogram of A-mutations
increment = 1;
# get fitness of a grid point.
def getFitness(ka,kb,kc):
return (kb*sb+kc*sc-ka*sa)
# incoming -outgoing (x,y,z)
def freq_lineage(simulation_grid,x,y,z):
const = simulation_grid[x,y,z]*(1+getFitness(x,y,z))*(1-muta-mutb-mutc*x)
if x>0 : const_x=simulation_grid[x-1,y,z]*(1+getFitness(x-1,y,z))*muta
else: const_x = 0
if y>0: const_y=simulation_grid[x,y-1,z]*(1+getFitness(x,y-1,z))*mutb
else: const_y=0
if z>0 and x+1<simulation_grid.shape[0]: const_z=simulation_grid[x+1,y,z-1]*(1+getFitness(x+1,y,z-1))*(x+1)*mutc
else: const_z=0
return const+const_x+const_y+const_z
# Compute probablities of offspring given the current size of various lineages
# Note that x=ka y=kb z=kc in the implementation of this function
def probability_offspring(simulation_grid,xl,xh,yl,yh,zl,zh):
sim_grid = np.zeros((xh+1-xl)*(yh+1-yl)*(zh+1-zl))
i=0
for x in range(xl,xh+1):
for y in range(yl,yh+1):
for z in range(zl,zh+1):
sim_grid[i] = freq_lineage(simulation_grid,x,y,z)
i=i+1
sim_grid = sim_grid/sum(sim_grid)
return np.random.multinomial(pop_size, sim_grid)
# Update the simulation grid to reflect a time step
def update_grid(simulation_grid,xl,xh,yl,yh,zl,zh):
i=0
offspring_prob = probability_offspring(simulation_grid,xl,xh,yl,yh,zl,zh)
for x in range(xl,xh+1):
for y in range(yl,yh+1):
for z in range(zl,zh+1):
simulation_grid[x][y][z] = offspring_prob[i]
i=i+1
return
# make sure that the simulation goes out of bounds by increasing the array size appropriately if need be.
def boundConditions(xmax,ymax,zmax):
global simulation_grid,increment
if xmax+1==simulation_grid.shape[0]:
simulation_grid = np.pad(simulation_grid,((0,10*increment),(0,0),(0,0)),mode='constant', constant_values=0)
if ymax+1==simulation_grid.shape[1]:
simulation_grid = np.pad(simulation_grid,((0,0),(0,10*increment),(0,0)),mode='constant', constant_values=0)
if zmax+1==simulation_grid.shape[2]:
simulation_grid = np.pad(simulation_grid,((0,0),(0,0),(0,5*increment)),mode='constant', constant_values=0)
return
#main function which runs the code
def main():
global mutc,simulation_grid,num_Amut
vals = np.zeros((numexp,3))
name = sys.argv[1]
#mutcArr = np.array([1.e-7,10**(-6.67),10**(-6.33),10**(-6),10**(-5.67),10**(-5.33),10**(-5),10**(-4.67),10**(-4.33),10**(-4)])
mutcArr = np.array([0,1.e-6,10**(-5),10**(-4),10**(-3)])
sizes = np.array([[25,180,15],[25,180,15],[25,180,30],[25,165,45],[25,140,80],[24,140,80],[25,90,170],[25,70,250],[25,35,350],[25,35,450]])
mutc = mutcArr[int(sys.argv[1])]
for k in range(0,numexp):
find_time = 0
simulation_grid = np.zeros(sizes[int(sys.argv[1])])
simulation_grid[0][0][0]=pop_size
for i in range(0,num_gen):
#define a time start variable to be used to compute actual run time of simulation in seconds.
start1 = time.time()
#obtain the coords(genomic composition) in the grid where there is a nonzero number of individuals.
nz = np.nonzero(simulation_grid)
# Find the minimum coords and the minimum,maximum coords (smallest/largest number of mutations of each type(A,B,C))
vectm = map(min,nz)
vectx = map(max,nz)
#computing boundary conditions
boundConditions(vectx[0],vectx[1],vectx[2])
#updating the grid
update_grid(simulation_grid,vectm[0],vectx[0]+1,vectm[1],vectx[1]+1,vectm[2],vectx[2]+1)
# Computing the number of available A-mutations , histograms of lineages with x A-mutations.
for x in range(vectm[0],vectx[0]+1):
for y in range(vectm[1],vectx[1]+1):
for z in range(vectm[2],vectx[2]+1):
num_Amut[i] += simulation_grid[x,y,z]*x
num_Amut_hist[i][x]+=simulation_grid[x,y,z];
#compute end of simulation time
end1 = time.time()
#compute total run time
find_time = find_time+ end1 - start1
#obtain the coords(genomic composition) in the grid where there is a nonzero number of individuals.
nz = np.nonzero(simulation_grid)
# Find the minimum coords and the minimum,maximum coords (smallest/largest number of mutations of each type(A,B,C))
vect = map(min,nz)
vals[k][0] = vect[0] # A mutations shared fixed in the population
vals[k][1] = vect[1] # B mutations shared fixed in the population
vals[k][2] = vect[2] # C mutations shared fixed in the population
print vect[0],vect[1],vect[2]
#create an index array (to save time in generations)
index = zeros((num_gen))
#Save the histogram of A-mutations in the population in a comma seperated values file.
np.savetxt("histe6.csv", num_Amut_hist, delimiter=",")
for i in range(0,num_gen):
index[i] = i+1 # populate the index array (to be used as the time-axis in the plot)
# write the number of A mutation as a function of time.
with open(name+'6,Ub=0,Uc=0'+'.csv', 'w') as csvfile: #open a csv file
writer = csv.writer(csvfile, delimiter = ',', lineterminator='\n')
fieldnames = ['time','A muts'] # two columns (time, number of available A-mutations in the population)
writer.writerow(fieldnames) # write the field names in the first row of the csv file
for k in range(0,num_gen):
writer.writerow([k,num_Amut[k]]) # write the (time, number of A-mutations) in the k+1 row
# plotting data
plot(index, num_Amut,'r-',label ='number of A mutations')
legend(loc='upper center', shadow=True)
xlabel('Time (generations)')
ylabel('Available A mutations')
title('Fluctuations',fontsize = 'large')
show()
start = time.time()
main()
end = time.time()
t = (end - start)/60 # time in minutes
print 'total time', t