-
Notifications
You must be signed in to change notification settings - Fork 0
/
fit_timeseries_edu_positive_nucleoids.m
228 lines (192 loc) · 9.49 KB
/
fit_timeseries_edu_positive_nucleoids.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
function fit_timeseries_edu_positive_nucleoids()
% This script fits the measured time series of EdU-positive nucleoids
% with different kinetic models and computes important model constants and
% their errors. Reproduces values given Supplementary Table 1 and Fig. 1f in the
% manuscript.
%
% Required that the nucleoids' positions are determined before and combined
% to give EdU active fraction. Here simply stored in a csv file in the data
% folder.
%
% Time internally is in hours [0-120], displayed also in days
%
% Part of "The TFAM to mtDNA ratio defines inner-cellular nucleoid
% populations with distinct activity levels"
%
% Jan Keller-Findeisen, Dep. NanoBiophotonics, MPI Biophysical Chemsitry,
% Göttingen, Germany
%% parameters
rng(0);
Nbootstrp = 400; % number of repetitions of the fit for bootstraping
% fit options
options = optimoptions('fmincon', 'Display', 'off', 'OptimalityTolerance', 1e-8);
% load experimental data
m = readmatrix('data/edu_positive_nucleoids_timeseries.csv');
data.x = m(:, 1);
data.y = m(:, 2);
data.err = m(:, 3);
x = (0:120).'; % for display a fine grid for the first 120 hours
x2 = (0:0.01:30)*24; % compute model for a long time (30 days) to be able to calculate 95% positive EdU time interval
ci_level = 0.05; % 95% quantiles for the confidence interval level
% average effective growth rate
d = log(2) / (7*24); % division rate set to 7 days
fprintf('division rate: %.2f days\n',d*24);
%% Model A - One subpopulation (fit only first 18h)
fprintf('Model A - one subpopulation\n');
% free model parameter: growth rate
model = @(p, t) 1 - exp(-2*p(1)*t);
ix = data.x <= 18;
minimizer = @(x) sum((model(x, data.x(ix)) - data.y(ix)).^2); % least squares estimation
p0 = 0.05; % initial value
lb = d; % lower bounds (growth rates must at least be the division rate)
ub = Inf;
% minimization
p = fmincon(minimizer, p0, [], [], [], [], lb, ub, [], options);
yA = model(p, x);
%% Model B - Independency model fit
fprintf('Model B - Independency model\n\n');
% free model parameters: fraction in slow pop. (gamma), slow growth rate
% (alpha_s), fast growth rate (alpha_f) corresponding to Eq. 7 in the
% manuscript
model = @(p, t) 1 - p(1)*exp(-2*p(2)*t) -(1-p(1))*exp(-2*p(3)*t);
minimizer = @(x) sum((model(x, data.x) - data.y).^2); % least squares estimation
p0 = [0.5, 0.005, 0.05]; % initial values
lb = [0, d, d]; % lower bounds (both growth rates must at least be the division rate)
ub = [1, Inf, Inf];
% minimization
p = fmincon(minimizer, p0, [], [], [], [], lb, ub, [], options);
yB = model(p, x);
% now the error part
ex = zeros(Nbootstrp, numel(p));
ey = zeros(Nbootstrp, numel(data.y)); % values of all the random models at the data points
ey2 = zeros(Nbootstrp, numel(x2)); % values of all the random models for the first 30 hours
for i = 1 : Nbootstrp
ye = draw_pseudo_data(data.y, data.err);
minimizer = @(x) sum((model(x, data.x) - ye).^2); % LSE
pi = fmincon(minimizer, p0, [], [], [], [], lb, ub, [], options);
ex(i, :) = pi;
ey(i, :) = model(pi, data.x);
ey2(i, :) = model(pi, x2);
end
ci = quantile(ex, [ci_level/2,1-ci_level/2]);
ytt = quantile(ey2, [ci_level/2, 0.5, 1-ci_level/2]);
fprintf('Fast replication (active) population\n');
fprintf(' Size %.0f%% (%.0f%% - %.0f%%)\n', (1-p(1))*100, (1-ci(2, 1))*100, (1-ci(1, 1))*100);
fprintf(' Replication rate (per nucleoid, per day) %.2f (%.2f - %.2f)\n', p(3) * 24, ci(1, 3) * 24, ci(2, 3) * 24);
fprintf(' Degradation rate (per nucleoid, per day) %.2f (%.2f - %.2f)\n', (p(3) - d) * 24, (ci(1, 3) - d) * 24, (ci(2, 3) - d) * 24);
fprintf('\nSlow replication (inactive) population\n');
fprintf(' Size %.0f%% (%.0f%% - %.0f%%)\n', p(1)*100, ci(1, 1)*100, ci(2, 1)*100);
fprintf(' Replication rate (per nucleoid, per day) %.2f (%.2f - %.2f)\n', p(2) * 24, ci(1, 2) * 24, ci(2, 2) * 24);
fprintf(' Degradation rate (per nucleoid, per day) %.2f (%.2f - %.2f)\n', (p(2) - d) * 24, (ci(1, 2) - d) * 24, (ci(2, 2) - d) * 24);
fprintf('\n\n');
% time until 50%/90% EdU positive
tis = [0.5, 0.9];
for i = 1 : numel(tis)
ti = tis(i);
ti1 = log(1 - ti) / (-2 * p(3) * 24);
ti2 = log(1 - ti) / (-2 * ci(2, 3) * 24);
ti3 = log(1 - ti) / (-2 * ci(1, 3) * 24);
fprintf('Time until %.1f%% of the fast replicating (active) population is EdU-labeled: %.2f days (%.2f - %.2f days)\n', ti*100, ti1, ti2, ti3);
ti1 = log(1 - ti) / (-2 * p(2) * 24);
ti2 = log(1 - ti) / (-2 * ci(2, 2) * 24);
ti3 = log(1 - ti) / (-2 * ci(1, 2) * 24);
fprintf('Time until %.1f%% of the slow replicating (inactive) population is EdU-labeled: %.2f days (%.2f - %.2f days)\n', ti*100, ti1, ti2, ti3);
[~, idx] = min(abs(ytt(2, :) - ti)); ti1 = x2(idx) / 24;
[~, idx] = min(abs(ytt(3, :) - ti)); ti2 = x2(idx) / 24;
[~, idx] = min(abs(ytt(1, :) - ti)); ti3 = x2(idx) / 24;
fprintf('Time until %.1f%% of all nucleoids are EdU-labeled: %.2f days (%.2f - %.2f days)\n', ti*100, ti1, ti2, ti3);
end
%% Model C - Inactivation model fit
fprintf('\n\nModel C - Inactivation model\n\n');
% equivalent to the complete model with alpha_s = 0, delta_s = 0 (Eq. 8)
% two free parameters Delta_f (=Alpha_f-Beta_f), Alpha_f
model = @(p, t) 1 - ((p(1)-d)*2*p(2)*exp(-d*t)+d*(2*p(2)-p(1))*exp(-2*p(2)*t))/(p(1)*(2*p(2)-d));
minimizer = @(x) sum((model(x, data.x) - data.y).^2); % least squares estimation
p0 = [0.005, 0.03]; % initial values
lb = [d, d]; % both must be at least the division rate
ub = [Inf, Inf];
p = fmincon(minimizer, p0, [], [], [], [], lb, ub, [], options);
yC = model(p, x);
% derived parameter values
alpha_f = p(2);
beta_f = p(2) - p(1);
tau = p(1) - d; % transfer rate fast to slow is Delta_f - d
gamma = tau / p(1); % fast growing fraction
% now the error part
ex = zeros(Nbootstrp, numel(p));
exf = zeros(Nbootstrp, 4);
ey = zeros(Nbootstrp, numel(data.y));
ey2 = zeros(Nbootstrp, numel(x2));
eeysp = zeros(Nbootstrp, numel(x2));
model_sp = @(x, t) 1 - ((x(4) * (x(1) + x(2)) + x(3))*exp(-d*t)-x(3)*(1-x(4))*exp(-(x(1)+x(2)+x(3)+d)*t))/x(4)/(x(1)+x(2)+x(3)); % model for the slow replicating ones only with parameter x = alpha_f, beta_f, tau, gamma
for i = 1 : Nbootstrp
ye = draw_pseudo_data(data.y, data.err);
minimizer = @(x) sum((model(x, data.x) - ye).^2); % LSE
pi = fmincon(minimizer, p0, [], [], [], [], lb, ub, [], options);
ex(i, :) = pi;
xif = [pi(2), pi(2)-pi(1), pi(1) - d, (pi(1) - d) / pi(1)]; % alpha_f, beta_f, tau, gamma
exf(i, :) = xif;
ey(i, :) = model(pi, data.x);
ey2(i, :) = model(pi, x2);
eeysp(i, :) = model_sp(xif, x2);
end
cif = quantile(exf, [ci_level/2,1-ci_level/2]);
ytt = quantile(ey2, [ci_level/2, 0.5, 1-ci_level/2]);
yttsp = quantile(eeysp, [ci_level/2, 0.5, 1-ci_level/2]);
fprintf('Fast replication (active) population\n');
fprintf(' Size %.0f%% (%.0f%% - %.0f%%)\n', gamma*100, cif(1, 4)*100, cif(2, 4)*100);
fprintf(' Replication rate (per nucleoid, per day) %.2f (%.2f - %.2f)\n', alpha_f * 24, cif(1, 1) * 24, cif(2, 1) * 24);
fprintf(' Degradation rate (per nucleoid, per day) %.2f (%.2f - %.2f)\n', beta_f * 24, cif(1, 2) * 24, cif(2, 2) * 24);
fprintf('\nSlow replication (inactive) population\n');
fprintf(' Size %.0f%% (%.0f%% - %.0f%%)\n', (1-gamma)*100, (1-cif(2, 4))*100, (1-cif(1, 4))*100);
fprintf('\nTransfer from fast to slow\n');
fprintf(' (per nucleoid per day) %.2f (%.2f - %.2f)\n', tau * 24, cif(1, 3) * 24, cif(2, 3) * 24);
fprintf('\n\n');
% fprintf('alpha_f %.2f/d (%.2f - %.2f) / day\n', alpha_f * 24, cif(1, 1) * 24, cif(2, 1) * 24);
% fprintf('beta_f %.2f/d (%.2f - %.2f) / day\n', beta_f * 24, cif(1, 2) * 24, cif(2, 2) * 24);
% fprintf('tau %.2f/d (%.2f - %.2f) / day\n', tau * 24, cif(1, 3) * 24, cif(2, 3) * 24);
% fprintf('gamma %.2f/d (%.2f - %.2f)\n', gamma, cif(1, 3), cif(2, 4));
% fprintf('1-gamma %.2f/d (%.2f - %.2f)\n', 1-gamma, 1-cif(1, 4), 1-cif(2, 3));
% time until 50% labeled, 90% labeled
tx = [0.5, 0.9];
for i = 1 : 2
ti = tx(i);
ti1 = log(1 - ti) / (-2 * alpha_f * 24);
ti2 = log(1 - ti) / (-2 * cif(2, 1) * 24);
ti3 = log(1 - ti) / (-2 * cif(1, 1) * 24);
fprintf('Time until %.1f%% of the fast replicating (active) population is EdU-labeled: %.2f days (%.2f - %.2f days)\n', ti*100, ti1, ti2, ti3);
[~, idx] = min(abs(yttsp(2, :) - ti)); ti1 = x2(idx) / 24;
[~, idx] = min(abs(yttsp(3, :) - ti)); ti2 = x2(idx) / 24;
[~, idx] = min(abs(yttsp(1, :) - ti)); ti3 = x2(idx) / 24;
fprintf('Time until %.1f%% of the slow replicating (inactive) population is EdU-labeled: %.2f days (%.2f - %.2f days)\n', ti*100, ti1, ti2, ti3);
[~, idx] = min(abs(ytt(2, :) - ti)); ti1 = x2(idx) / 24;
[~, idx] = min(abs(ytt(3, :) - ti)); ti2 = x2(idx) / 24;
[~, idx] = min(abs(ytt(1, :) - ti)); ti3 = x2(idx) / 24;
fprintf('Time until %.1f%% of all nucleoids are EdU-labeled: %.2f days (%.2f - %.2f days)\n', ti*100, ti1, ti2, ti3);
end
%% recreate Figure 1f from the manuscript
lw = 1.5;
fig = figure();
hold on;
plot(x, yA*100, 'k--', 'LineWidth', lw);
plot(x, yB*100, 'r-', 'LineWidth', lw);
plot(x, yC*100, 'b-', 'LineWidth', lw);
errorbar(data.x, data.y*100, data.err*100, 'ko');
xlabel('EdU-Incubation [h]');
xlim([0, 120]);
xticks(0:20:120);
ylabel('EdU-positive nucleoids (%)');
ylim([0, 100]);
yticks(0:20:100);
box on;
grid on;
title('reproduction of Fig. 1f');
end
function ye = draw_pseudo_data(y, e)
% draws random pseudo data given a mean (measured mean value) and an error
% assuming that the measured data would follow a normal distribution with
% this mean and standard deviation (used for artificial data generation in
% the bootstraping procedure)
ye = y + randn(size(e)) .* e;
% ye = max(ye, 0); % not necessary because LSE
end