-
Notifications
You must be signed in to change notification settings - Fork 9
/
GLUE.~
287 lines (226 loc) · 12.1 KB
/
GLUE.~
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
####The main function to automatically realize the GLUE procedure for parameter estimation for DSSAT model.####
#################Step 1: Get the fundamental information for GLUE procedure.##############
ncol(finf <- file.info(dir()))# at least six
## Not run: finf # the whole list
## Those that are more than 100 days old :
finf[difftime(Sys.time(), finf[,"mtime"], units="days") > 100 , 1:4]
file.info("no-such-file-exists")
## (1) Get the name of the batch file that was generated with GenSelect.
#CultivarBatchFile<-"Prisma_GC_Avg.MZC";
CultivarBatchFile<-"F_MIX.PNC";
##Which batch file generated by GenSelect should be used? This information should be given to GLUE when
##model users decide to use this program.
## (2) Get the information about the working directory of GLUE program (WD),
#model output storage directory (OD) and DSSAT (DSSATD).
WorkDirectory<-getwd();
WorkDirectorySubstrings<-unlist(strsplit(WorkDirectory,"DSSAT48"));
DSSATD<-paste(WorkDirectorySubstrings[1],"DSSAT48", sep=""); #Get the working directory of DSSAT.
GLUED<-"/Tools/GLUE";#This one is the postion of GLUE under DSSAT and fixed by model developer.
OutputD<-"/GLWork"; #This one is the position of DSSAT output and fixed by model developer.
GenotypeD<-"/Genotype"; #This one is the position of Genotype file.
WD<-paste(DSSATD, GLUED, sep="");
OD<-paste(DSSATD, OutputD, sep="");
GD<-paste(DSSATD, GenotypeD, sep="");
eval(parse(text=paste("ModelRunIndicatorPath='",OD,"/ModelRunIndicator.txt'",sep = '')));
##Path of the model run indicator file, which indicates which component of GLUE is finished so far.
##WD represents working directory. This is very important, because it is used to tell the main funtion where
##the sub-functions are. DSSATD represents the DSSAT directory. GLUED represents the GLUE directory.
##OutputD represents the output directory under DSSAT, while OD means the final output directory.
##GD ir the directory of genotype files in DSSAT.
## (3) Read the number of model runs and GLUE control flag from the "Simulation Control" file.
#library(xlsReadWrite);
#eval(parse(text = paste("SimulationControl<-read.xls('",WD,
#"/SimulationControl.xls',sheet = 'Sheet 1', rowNames = T, colNames=T)",sep = '')));
eval(parse(text = paste("SimulationControl<-read.csv('",WD,
"/SimulationControl.csv', header=T)",sep = '')));
newRownames <- SimulationControl[ , 1];
rownames(SimulationControl) <- newRownames;
NumberOfModelRun<-as.numeric(SimulationControl["NumberOfModelRun", "Value"]);
write(c("The number of model runs = ",NumberOfModelRun), file = ModelRunIndicatorPath, ncolumns=2, append = F);
#print(NumberOfModelRun);
#Read the number of model running.
GLUEFlag<-as.numeric(SimulationControl[2,"Value"]);
#Set the flag for whole GLUE procedure. If GLUEFlag==1, it means coefficients relative both to phenology and
#growth will be evaluated; GLUEFlag==2, only phenology will be evaluated; GLUEFlag==3, only growth will be evaluated.
## (4) Get the number of round of GLUE.
if (GLUEFlag==1)
{
StartRoundOfGLUE=1
TotalRoundOfGLUE=2;
write("GLUE will run for both phenology and growth.", file = ModelRunIndicatorPath, append = T);
} else if (GLUEFlag==2)
{
StartRoundOfGLUE=1
TotalRoundOfGLUE=1;
write("GLUE will run for phenology only.", file = ModelRunIndicatorPath, append = T);
} else
{
StartRoundOfGLUE=2
TotalRoundOfGLUE=2;
write("GLUE will run for growth only.", file = ModelRunIndicatorPath, append = T);
}
#write("Which round of GLUE to be done is known...", file = ModelRunIndicatorPath, append = T);
#In default, totally two rounds of GLUE will be conducted. In the first round, only the genetic coefficients (P1, P2, p5, PHINT)
#that can influence phenology such as anthesis date and maturity date will be estimated, while other parameters (G2, G3)
#will be fixed at their mean values derived from DSSAT database.In the second round of GLUE, G2 and G3 will be estimated based on the likelihood values derived from growth outputs
#such LAIX, HWAM, CWAM. Finally, the two partial parameter sets will be conbined together to give us a final optimal parameter set.
#If only estimated phenology dates, then only the first round GLUE will be conducted; if only estimated growth, then only the
#second round GLUE will be conducted.
## (5) Get the name of the genotype file of current crop and the name of current model
eval(parse(text=paste("BatchFile<-readLines('",OD,"/",CultivarBatchFile,"',n=-1)",sep = '')));
CropNameAddress<-grep('BATCH', BatchFile);
CropNameStart<-18; #
CropNameEnd<-19; #
CropName<-substr(BatchFile[CropNameAddress], CropNameStart, CropNameEnd);
write(c("Crop =",CropName), file = ModelRunIndicatorPath, ncolumns=2, append = T);
#Get the crop name in this model run.
CultivarIDStart<-20; #
CultivarIDEnd<-25; #Get the Cultivar ID number such as, "IB0001";
CultivarNameStart<-20;
CultivarNameEnd<-nchar(BatchFile[CropNameAddress]); #Get the Cultivar name such as, "ZA0002 Prisma GC Avg";
CultivarID<-substr(BatchFile[CropNameAddress], CultivarIDStart, CultivarIDEnd);
CultivarName<-substr(BatchFile[CropNameAddress], CultivarNameStart, CultivarNameEnd);
write(c("Cultivar ID =",CultivarName), file = ModelRunIndicatorPath, ncolumns=2, append = T);
#Get the cultivar ID and name.
## "$BATCH(CULTIVAR):MZIM0003 APPOLO" is an example to show how to get the crop name and cultivar ID.
##From this line, we can get to know the crop name "MZ" (18-19), the cultivar ID "IM0003" (20-25).
# Model name
DSSATPro = readLines(paste0(DSSATD,"/DSSATPRO.v48"))
LineNo = grep(paste0("M",CropName),DSSATPro)
LineSplit = unlist(strsplit(DSSATPro[LineNo]," "))
ModelSelect = LineSplit[length(LineSplit)]
write(c("Model Name =",ModelSelect), file = ModelRunIndicatorPath, ncolumns=2, append = T);
# Get the genotype file name
GenotypeFilePath<-GD;
CurrentGenotypeFile<-paste0(GD, "/",CropName,substr(ModelSelect,3,8),".CUL")
#Get the names of the genotype file template that will be used, which shoud start with crop name such as "MZ",
#and end with extension name ".CUL". Since there are two genotype files starting with "MZ" and ending with
#".CUL" under the "Genotype" folder of DSSAT, it was set as "MZCER048.CUL" as default value.
StringLength<-nchar(CurrentGenotypeFile);
GenotypeFileNameStartPosition<-(StringLength-(8+4)+1);
GenotypeFileNameEndPosition<-StringLength-4;
#Where 4 is the length of character ".CUL",
#while 8 is the length of cultivar file name, such as "MZCER048".
GenotypeFileName<-substr(CurrentGenotypeFile, GenotypeFileNameStartPosition, GenotypeFileNameEndPosition);
write(c("Genotype file name =",GenotypeFileName), file = ModelRunIndicatorPath, ncolumns=2, append = T);
#Get the name of the genotype file used currently.
## (6) Set up batch file.
##Only copy the information below @FILEX in the genotype file generated by GenSelect, such as "APPOLO.MZC",
##to the batch file template "DSSBatch.template". Thus a new batch file can be generated and save it as
##"DSSBatch.v48" in the output directory.
eval(parse(text=paste("source('",WD,"/BatchFileSetUp.r')",sep = '')));
BatchFileSetUp(WD, OD, CultivarBatchFile);
write(c("DSSAT batch file =",CultivarBatchFile), file = ModelRunIndicatorPath, ncolumns=2, append = T);
## (7) Get the parameter property file (miminum, maximum, and flg values) and the number of parameters.
CulFile.origin= readLines(paste0(GD,"/",GenotypeFileName,".CUL"))
caliLine = CulFile.origin[which(substr(CulFile.origin,1,12) == "!Calibration")]
CulFile = CulFile.origin[-which(substr(CulFile.origin,1,1) == "!")] #ignore lines starting with !
CulFile = c(CulFile,caliLine)
#locate parameter properties
LineNo.title = grep("@VAR#",CulFile)[1]
LineNo.min = grep("999991 MINIMA",CulFile)
LineNo.max = grep("999992 MAXIMA",CulFile)
LineNo.cal = grep("!Calibration",CulFile)
Lineno.thiscul = grep(CultivarID,CulFile)
LineNo.all = c(LineNo.cal[1],LineNo.title,LineNo.min,LineNo.max,Lineno.thiscul)
if(length(LineNo.cal)>1){
LineNo.cal = grep("!Calibration",CulFile)[1]
warning("Please use the first !Calibration line to define the calibration switch!")
}
CulFile = CulFile[LineNo.all]
write(c("Parameter property:",CulFile ), file = ModelRunIndicatorPath, append = T);
# discard content after !, and remove the extra space at the end of lines
# LineNo.exc = grep("!",CulFile)
#
#
# # if (length(LineNo.exc)>1){
# #
# # for (itemp in 1:length(LineNo.exc)-1){
# #
# # ExcSplit = unlist(strsplit(CulFile[LineNo.exc[itemp]],"!"))
# #
# # }
#
#
# # for (itemp in 1:length(LineNo.exc)-1){
# #
# # ExcSplit = unlist(strsplit(CulFile[LineNo.exc[itemp]],"!"))
# # CulFile[LineNo.exc[itemp]] = ExcSplit[1]
# #
# # Loc.space = unlist(stringr::str_locate_all(ExcSplit[1]," "))
# #
# # for (iLoc in length(Loc.space):1) {
# #
# # if(Loc.space[iLoc]-Loc.space[iLoc-1] > 1){
# #
# # No.space = length(Loc.space) - iLoc + 1
# # break
#
# } else {
#
# No.space = 0
#
# }
#
# }
# CulFile[LineNo.exc[itemp]] = substr(ExcSplit[1],1,nchar(ExcSplit[1])-No.space)
#
# # print(No.space)
# # print(CulFile)
# }
#
# }
# convert text to dataframe
CulFile.df = paste0(substr(CulFile,1,6), substr(CulFile,30,nchar(CulFile)[1]))
header = unlist(strsplit(CulFile.df[2]," "))
header = header[-which(nchar(header)<1)]
CulData = read.table(textConnection(CulFile.df[-c(1,2)]),header=F)
colnames(CulData) = header
Cali = unlist(strsplit(CulFile[1]," "))
Cali.reshape = paste(Cali[1],"placeholder",Cali[2],sep = " ")
Cali.df = read.table(textConnection(Cali.reshape),header = F)
colnames(Cali.df) = header
CulData = rbind(CulData,Cali.df)
#CulData
ncol.predefined = which(header=="ECO#")
TotalParameterNumber = ncol(CulData) - ncol.predefined #Get the total number of the parameters.
ParameterNames = header[-c(1:ncol.predefined)]
write(c("Parameters =",ParameterNames), file = ModelRunIndicatorPath, append = T)
#################Step 2: Begin the GLUE procedure.#################
for (i in StartRoundOfGLUE:TotalRoundOfGLUE)
{
RoundOfGLUE<-i;
## (1) Generate random values for the paramter set concerned.
eval(parse(text = paste("source('",WD,"/RandomGeneration.r')",sep = '')));
RandomMatrix<-RandomGeneration(WD,CulData, TotalParameterNumber, ncol.predefined,NumberOfModelRun, RoundOfGLUE, GLUEFlag);
write("Random parameter sets have been generated...", file = ModelRunIndicatorPath, append = T);
write("Model runs are starting...", file = ModelRunIndicatorPath, append = T);
## (2) Create new genotype files with the generated parameter sets and run the DSSAT model with them.
eval(parse(text = paste("source('",WD,"/ModelRun.r')",sep = '')));
ModelRun(WD, OD, DSSATD, GD, CropName, GenotypeFileName, CultivarID, RoundOfGLUE, TotalParameterNumber, NumberOfModelRun, RandomMatrix);
write("Model run is finished...", file = ModelRunIndicatorPath, append = T)
write("Likelihood calculation is starting...", file = ModelRunIndicatorPath, append = T);
## (3) Calculate the likelihood values for each parameter set.
eval(parse(text = paste("source('",WD,"/LikelihoodCalculation.r')",sep = '')));
LikelihoodCalculation(WD, OD, CropName, ParameterNames, RoundOfGLUE);
write("Likelihood calculation is finished...", file = ModelRunIndicatorPath, append = T);
write("Starting calculation of posterior distribution...", file = ModelRunIndicatorPath, append = T);
## (4) Derivation of posterior distribution.
eval(parse(text = paste("source('",WD,"/PosteriorDistribution.r')",sep = '')));
PosteriorDistribution(WD, OD, ParameterNames, ParameterProperty, CropName, RoundOfGLUE);
write("Posterior distribution is derived...", file = ModelRunIndicatorPath, append = T);
## (5) Indicator of model running to show the round of GLUE is finished.
if (RoundOfGLUE==1)
{
Indicator<-'The first round of GLUE is finished.';
write(Indicator, file = ModelRunIndicatorPath, append = T);
} else
{
Indicator<-'The second round of GLUE is finished.';
write(Indicator, file = ModelRunIndicatorPath, append = T);
}
}
#################Step 3: Get a final optimal parameter set.##############
eval(parse(text = paste("source('",WD,"/OptimalParameterSet.r')",sep = '')));
OptimalParameterSet(GLUEFlag, OD, DSSATD, CropName, CultivarID, CultivarName, GenotypeFileName, TotalParameterNumber);
options(show.error.message=T)