-
Notifications
You must be signed in to change notification settings - Fork 0
/
.Rhistory
120 lines (120 loc) · 4.03 KB
/
.Rhistory
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
install.packages('tidyverse')
install.packages('sommer')
install.packages('devtools')
install.packages('factoextra')
x = read.csv('c://Users/zl/Downloads/Raw.csv')
library(tidyverse)
x1 = x %>% separate(Mutation, 'A','B',s sep=1)
x1 = x %>% separate(Mutation, 'A','B',sep=1)
x1 = x %>% separate(Mutation, c('A','B'),sep=1)
View(x1)
x1 = x %>% separate(Mutation, c('A','B'),sep=1) %>% separate(B, c('C','D'), sep=-1)
View(x1)
View(x1)
x1 = x %>% separate(Mutation, c('A','B'),sep=1) %>% separate(B, c('C','D'), sep=-1) %>% select(-D)
View(x1)
x1 = x %>% separate(Mutation, c('A','B'),sep=1) %>% separate(B, c('C','D'), sep=-1) %>% select(-D) %>%
distinct()
View(x1)
seq = x1$A
seq
seq = paste(seq)
seq
seq = x1$A
seq = paste(unlist(seq),collapse="")
seq
install.packages('randomForest')
library(tidyverse)
library(randomForest)
set.seed(2)
num.vars<-8
num.obs<-10000
require(clusterGeneration)
install.packages('clusterGeneration')
require(clusterGeneration)
cov.mat<-genPositiveDefMat(num.vars,covMethod=c("unifcorrmat"))$Sigma
rand.vars<-mvrnorm(num.obs,rep(0,num.vars),Sigma=cov.mat)
parms<-runif(num.vars,-10,10)
y<-rand.vars %*% matrix(parms) + rnorm(num.obs,sd=20)
View(y)
y<-data.frame((y-min(y))/(max(y)-min(y)))
names(y)<-'y'
rand.vars<-data.frame(rand.vars)
View(rand.vars)
cov.mat<-genPositiveDefMat(num.vars,covMethod=c("unifcorrmat"))$Sigma
rand.vars<-mvrnorm(num.obs,rep(0,num.vars),Sigma=cov.mat)
parms<-runif(num.vars,-10,10)
y<-rand.vars %*% matrix(parms) + rnorm(num.obs,sd=20)
y<-data.frame((y-min(y))/(max(y)-min(y)))
names(y)<-'y'
rand.vars<-data.frame(rand.vars)
gc()
rm(list = ls(all.names = TRUE))
gc()
gc(reset = TRUE)
library(tidyverse)
library(neuralnet)
setwd('E:/ALLFED/WheatYield_LOS/')
##1. read in the cleaned file, and the selected the variables
df = read.csv('data/dwheat_fit_elim.csv')
df_rf = train %>% dplyr::select(Y, n_total, irrigation_tot,
mechanized, pesticides_H, thz_class,
mst_class, soil_class)
# Scale the Data
scale01 = function(x){(x - min(x)) / (max(x) - min(x))}
train_rf = train_rf %>% mutate_all(scale01)
train_rf = df_rf %>% mutate_all(scale01)
df_rf = df %>% dplyr::select(Y, n_total, irrigation_tot,
mechanized, pesticides_H, thz_class,
mst_class, soil_class)
# Scale the Data
scale01 = function(x){(x - min(x)) / (max(x) - min(x))}
train_rf = df_rf %>% mutate_all(scale01)
View(train_rf)
# Build Neural Network
n = names(train_rf)
f = as.formula(paste("Y ~", paste(n[!n %in% "Y"], collapse = " + ")))
print(f)
nn = readRDS('output/NeuralNetworkResults.rds')
View(train_rf)
## model performance on the training
pr.nn <- compute(nn,train_rf[,-1])
# Compute mean squared error
pred_train = pr.nn$net.result * (max(train_rf$Y) - min(train_rf$Y)) + min(train_rf$Y)
obs_train = (train_rf$Y) * (max(train_rf$Y) - min(train_rf$Y)) + min(train_rf$Y)
MSE.nn = sum((obs_train - pred_train)^2) / nrow(train_rf)
print(MSE.nn)
# Plot the neural network
plot(nn)
# Plot regression line/Check the model performance
reg_train = lm(train_rf$Y~pred_train)
r2 = summary(reg_train)$adj.r.squared
r2
png(filename="output/Figure1a-ModelPerformanceNN_train.png")
plot(train_rf$Y, pred_train, xlab='Wheat Production (Kg/ha)',ylab='Wheat Prediction (Kg/ha)',
xlim = c(0,1), ylim=c(0,1))
abline(0, 1, lwd = 2)
mylabel = bquote(italic(R)^2 == .(format(r2, digits = 3)))
text(x = 0.5, y = 0.8, labels = mylabel)
dev.off()
require(devtools)
#import 'gar.fun' from Github
source_gist('6206737')
par(mar=c(3,4,1,1),family='serif')
plotdata = gar.fun('Y',nn)
plotdata + ylim(-1,1)+ylab('Relative Importance')
ggsave(('output/Figure2-VariableImportanceNN.png'))
rm(list=ls())
library(tidyverse)
library(randomForest)
library(psych)
setwd('E:/ALLFED/WheatYield_LOS/')
##1. read in the cleaned file, and select the variables under consideration
df = read.csv('data/dwheat_fit_elim.csv')
df_rf = df %>% dplyr::select(Y, n_total, p_fertilizer, irrigation_tot,
mechanized, pesticides_H, thz_class,
mst_class, soil_class)
##check the data type of each column, if factor or character,
##need to be changed to numeric
str(df_rf)
train_rf = df_rf %>% dplyr::select(-p_fertilizer) %>% filter(complete.cases(.))