-
Notifications
You must be signed in to change notification settings - Fork 2
/
GenerateHU12.R
executable file
·121 lines (109 loc) · 3.42 KB
/
GenerateHU12.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
###############################################
#load spectral clustering functions
source("main.R")
###############################################
# Read data
dataTerr <- read.csv("data/terrData.csv", header=T)
dataFW <- read.csv("data/freshData.csv", header=T)
i <- which(colnames(dataFW)=="hu12_states")
dataTerrFW <- merge(dataTerr, dataFW[-i], by.x="zoneid", by.y="zoneid")
NB18876 <- read.csv("data/NB_18876.csv", header=T)
islands <- read.csv("data/islandIdx.csv", header=T)
latLong18876 <- read.csv("data/latLong18876.csv", header=T)
rm(i)
##################################################
# function definition
NBindex <- function(index){
id <- which(index)
NB <- data.frame()
for( i in 1:nrow(NB18876) ){
if( (NB18876[i,"row"] %in% id) &
(NB18876[i,"neighbor"] %in% id)){
NB <- rbind(NB,NB18876[i,c("row","neighbor")])
}
}
hash <- 1:length(id)
names(hash) <- id
for(i in 1:nrow(NB)){
NB[i,1] <- hash[as.character(NB[i,1])]
NB[i,2] <- hash[as.character(NB[i,2])]
}
return( NB)
}
generateData <-function(type, islandsIn =F , states= vector(), conFactor=1 ){
# Generate the data for clustering
#
#
# Args:
# type: three options "dataTerr", "dataFW", and "dataTerrFW"
# islandIn: if T the islands will be included
# states: a vector of states names that have to be included
# conFactor: contiguity constraint factor
# Returns:
# out: a list with three elements: data, conMatrix, and latLong
#
# Error handeling
#type
if(!identical(type, dataFW)&
!identical(type, dataTerrFW) &
!identical(type, dataTerr) ){
stop("Wrong input for type variable: Make sure to choose one of
dataFW, dataTerr, or dataTerrFW
")
}
#islandsIn
if(!is.logical(islandsIn)){
stop("islandsIn must be logical variable.")
}
#states
allStates <- levels(type$hu12_states)
allStates <- allStates[nchar(allStates)==2]
if(!is.vector(states)){
print(allStates)
stop("The state variable must be a vectore containing a subset of
the above state list")
}
if(sum(states %in% allStates)!= length(states)){
stop("The state variable must be a vectore containing a subset of
the above state list")
}
####################################################
#############################################
# finding the row index
index <- rep(T, nrow(type))
# make index of islands False if islands are not included
if(islandsIn==F){
index [c(islands)$x] <- F
}
if(length(states)>0){
id <- rep(F, nrow(type))
outStates <- allStates[!allStates %in% states]
for(i in 1:length(outStates)){
state <- outStates[i]
id <- grepl(state,type$hu12_states)|id
}
index <- index & !id
}
#############################################
# generate the output data
data <- type[index,-c(1,2)]
n <- nrow(data)
m <- ncol(data)
data <- as.matrix(data)
data <- as.numeric(data)
data <- matrix(data,nrow=n,ncol=m)
# Delete the constant columns
colSum <- apply(data,2,var)
constants <- which(colSum==0)
if(length(constants)!=0){
data <- data[,-constants]
}
#
latLong <- latLong18876[index,]
NB <- NBindex(index)
conMatrix <- neighborMatrix(NB,conFactor = conFactor)
out <- list(data=data, conMatrix = conMatrix, latLong = latLong)
return(out)
}
# Second Load sepectralClustering file
save.image(file="HU12SpectralClustering.RData")