-
Notifications
You must be signed in to change notification settings - Fork 0
/
reclassFunction_final.R
186 lines (130 loc) · 6.28 KB
/
reclassFunction_final.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
## FUNCTION to make a reclassification matrix given Albeke model output data
# parameters: df = dataframe of "is" data
# parameters: classColumn: text string of identifying column where "is" data is
# OUTPUT: reclassification matrix with two columns: "is" and "becomes"
reclassFunction <- function(df, classColumn = "TimeSeriesClass") {
# split all integers
strMat <- str_extract_all(df[, classColumn], "", simplify = TRUE)
## FUNCTION to define the counts of each code
countVec <- function(vec, dfID) {
# fill dataframe with the count of unique codes
countDF <- data.frame(row.names = dfID,
count1 = length(which(vec == "1")),
count2 = length(which(vec == "2")),
count3 = length(which(vec == "3")),
count4 = length(which(vec == "4")),
count5 = length(which(vec == "5")),
count7 = length(which(vec == "7")))
return(countDF)
}
# define output matrix
finMat <- matrix(0, nrow = nrow(df), ncol = 2,
dimnames = list(NULL, c("is", "becomes")))
## iterate every row of df and determine what that row becomes
for(j in 1:nrow(df)) {
# get the unique set of codes
vec <- strMat[j, ]
vec <- vec[!vec == ""]
# get the counts of each integer
cv <- countVec(vec, dfID = df[j, "id"])
## take everything through ONE "if" statement
if(all(sapply(vec, FUN = identical, vec[1])) == TRUE) { # TEST FOR UNIFORMITY
# finalize
finMat[j, "is"] <- as.numeric(df[j, "TimeSeriesClass"])
finMat[j, "becomes"] <- as.numeric(vec[1])
} else if(any(cv > (length(vec) * .65 ))) { # TEST FOR 65$ MAJORITY
# finalize by majority
finMat[j, "is"] <- as.numeric(df[j, "TimeSeriesClass"])
# get column value of the vector majority
finMat[j, "becomes"] <- as.numeric(
strsplit(names(cv[which(cv > (length(vec) * 0.65))]), "t")[[1]][2])
} else if(all(sapply(vec, FUN = identical, vec[1])) == FALSE &
any(cv > (length(vec) * .65 )) == FALSE) {
# check for absence of bare/fallow/crop
if(!any(vec %in% c(1, 2, 3))) { # TEST FOR ABSENCE OF BARE/FALLOW/CROP
# if absent: finalize by majority
cv.sort <- sort(colSums(cv), decreasing = TRUE)
finMat[j, "is"] <- df[j, "TimeSeriesClass"]
finMat[j, "becomes"] <- as.numeric(
strsplit(names(cv.sort[1]), "t")[[1]][2]
)
} else {
# get the end of the vector
endVec <- vec[length(vec) - 0]
# test for presence of crop or fallow
if(any(vec %in% c(2, 3))) {
if(endVec == 2 | vec[1] == 2) { # TEST FOR ENDING IN CROP OR STARTNG IN CROP
# If the last integer or first integer is CROP, finalize as crop
finMat[j, "is"] <- as.numeric(df[j, "TimeSeriesClass"])
finMat[j, "becomes"] <- 2
} else if(length(which(vec == 2)) > 2) { # TEST FOR MORE THAN ONE CROP
# If there are more than 2 CROP integers, finalize as crop
finMat[j, "is"] <- as.numeric(df[j, "TimeSeriesClass"])
finMat[j, "becomes"] <- 2
} else if(vec[1] %in% c(1, 3, 7) & any(vec == 2)) { # TEST FOR STARTING AS FALLOW, BARE, OR WATER FOLLOWED BY CROP
# If the first integer is fallow or water followed by crop, finalize as crop
finMat[j, "is"] <- as.numeric(df[j, "TimeSeriesClass"])
finMat[j, "becomes"] <- 2
} else if(all(vec != 2) & any(vec == 3)) { # TEST FOR NO CROP BUT FALLOW EXISTS
# if crop does not exist but fallow does, finalize as fallow
finMat[j, "is"] <- as.numeric(df[j, "TimeSeriesClass"])
finMat[j, "becomes"] <- 3
} else {
# if none of the above, finalize by majority
cv.sort <- sort(colSums(cv), decreasing = TRUE)
finMat[j, "is"] <- df[j, "TimeSeriesClass"]
finMat[j, "becomes"] <- as.numeric(
strsplit(names(cv.sort[1]), "t")[[1]][2]
)
}
} else {
# if there is NO crop or fallow
if(length(which(cv >= 1)) > 2) {
# if there are more than 2 seasonal changes and no
# finalize as "confused"
finMat[j, "is"] <- as.numeric(df[j, "TimeSeriesClass"])
finMat[j, "becomes"] <- 99
} else {
# if none of the above, finalize by majority
cv.sort <- sort(colSums(cv), decreasing = TRUE)
finMat[j, "is"] <- df[j, "TimeSeriesClass"]
finMat[j, "becomes"] <- as.numeric(
strsplit(names(cv.sort[1]), "t")[[1]][2]
)
}
}
}
}
# quality check: is there are NAs, stop the function
if(any(is.na(finMat[j,]))) {
stop(paste0("NAs induced at iteration ", j))
}
# print every 500 iterations
iteration.count <- 1000
if(j %% iteration.count == 0) cat("loop 1: iteration ", j, "complete\n")
} # close the loop
# print status check: how many are confused?
cat("\ntotal percent of confused tiles:" ,
round(
length(which(finMat[, "becomes"] == 99)) / nrow(df) * 100, 2))
# print status check: how many are reclassed
cat("\npercent of data reclassed:" ,
round(
length(which(finMat[, "is"] != 0)) / nrow(df) * 100, 2))
# return the finalized reclassification matrix
return(finMat)
} # close the function
### --- 1984 data ----
require(tidyverse)
# read file of model output
input.df <- read.table(
file = "C:/Users/emily/OneDrive - University of Wyoming/Summer2019/MajorityMergedClasses1994.txt",
header = TRUE,
stringsAsFactors = FALSE)
input.df <- input.df %>% mutate(id = row.names(input.df))
# make lookup table of definitions
luClasses <- data.frame(
cover = c("bareSoil", "crop", "fallow", "forest", "grassShrub", "water"),
code = c(1, 2, 3, 4, 5, 7)
)
reclass1984 <- reclassFunction(df = input.df, classColumn = "TimeSeriesClass")