-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrsRemoveEmpty.R
65 lines (64 loc) · 1.93 KB
/
rsRemoveEmpty.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
# Remove variables from a data frame df that havecontain no data (NA)
# and/or that contain only a constant datum, if removeConstant is true (default)
# @param data.frame df
# @param logical removeConstant Not only remove NA-only-variables, but also variables with constant data
# @returns data.frame
removeEmpty = function(df, removeConstant=T) {
n = nrow(df)
# Can only identify constant values if having at least two columns
if (n < 2) {
removeConstant = F
}
if (n < 1) {
return(df)
}
skipList = NULL
for (key in names(df)) {
# Quick check
if (removeConstant) {
if (is.na(df[[1, key]]) && is.na(df[[n, key]])) {
# Same
} else if (is.na(df[[1, key]]) || is.na(df[[n, key]])) {
# Different
next
} else if (df[[1, key]] != df[[n, key]]) {
next
}
} else {
if (!is.na(df[1, key])) {
next
}
}
# Long check: NA
data = df[[key]]
if (!any(!is.na(data))) {
df[[key]] = NULL
skipList = c(skipList, key)
next
}
# Long check: Constants
if (!removeConstant) {
next
}
v0 = data[[1]]
if (is.na(v0)) {
# Cannot be constant NA any more, due to above filter
next
}
anyDiff = F
for (v1 in data[2:n]) {
if (is.na(v1) || (v1 != v0)) {
anyDiff = T
break
}
}
if (!anyDiff) {
df[[key]] = NULL
skipList = c(skipList, key)
}
}
if (length(skipList) > 0) {
message(paste("removeEmpty() removed ", length(skipList), " Variables from data.frame: ", paste(skipList, collapse=", "), sep=""))
}
return(df)
}