-
Notifications
You must be signed in to change notification settings - Fork 0
/
get_guiz1_data.r
62 lines (55 loc) · 3.06 KB
/
get_guiz1_data.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
## https is not supported
# temp <- tempfile()
# download.file("https://d396qusza40orc.cloudfront.net/rprog%2Fdata%2Fquiz1_data.zip",temp)
data <- read.csv(unz("dat/quiz1_data.zip", "hw1_data.csv"),header=T)
class(data[[1]]) # integer
class(data[[2]]) # integer
class(data[[3]]) # numeric
class(data[[4]]) # integer
class(data[[5]]) # integer
class(data[[6]]) # integer
# unlink(temp)
print( data[1:6,] ) # first six rows
print( data[1:2] ) # first two columns
print( data[1:2,] ) # first two rows
good <- complete.cases(data) # filter for good data only
print(good)
print( data[good, ][1:6, ] ) # filtered data only
print(head(data[good,])) # head of good data
print(tail(data[good,])) # tail of good data
print( data[1:6][1:5,] ) # first set governs columns, second the rows
print( data[1][1:5,] ) # the ozone column only
print( data[1][47,] ) # first column, 47th item
print( data[good,] ) # only good data
print( data[good,1] ) # only good data of first column
print( data["Ozone"]) # first column by name
print( data[47,"Ozone"]) # row 47 of Ozone
print( nrow(data) ) # the number of rows
print( ncol(data) ) # the number of cols
print( rownames(data) ) # row names
print( colnames(data) ) # col names
print( tail(data, n=2)) # last two rows
data[good,"Ozone"] # good data from Ozone column
length(data[good,"Ozone"]) # number of good elements in Ozone column
nrow(data)-length(data[good,"Ozone"]) # number of NA is different in Ozone (another method is needed)
bad.ozone <- is.na(data["Ozone"]) # all NA will be TRUE
print(bad.ozone)
data[bad,"Ozone"] # only NA is kept
length(data[bad,"Ozone"]) # 37 has arrived
## 148:
length(data[good,"Ozone"])+length(data[bad,"Ozone"])
length(data["Ozone"]) # 1 (col is counted)
length(data[,"Ozone"]) # nu of data is counted
round(mean(data[good,"Ozone"]),1) # the mean of "Ozone" values rounded to one decimal
ozone.data <- data[,"Ozone"]
ozone.data[ozone.data > 31] # unfortunately, NA is also included
good.data <- data[good,] # only the good data is kept
good.data[ good.data$"Ozone" > 31, ]
good.data[ good.data$"Temp" > 90, ]
filtered.good.data <- good.data[ which(good.data$"Ozone" > 31 & good.data$"Temp" > 90),]
print(filtered.good.data)
mean(filtered.good.data[,"Solar.R"])
mean(data[data$"Month" == 6,][,"Temp"]) # What is the mean of "Temp" when "Month" is equal to 6?
mean(good.data[good.data$"Month" == 6,][,"Temp"]) # no need to filter these
## the good is needed here
max(good.data[good.data$"Month" == 5,][,"Ozone"]) # What was the maximum ozone value in the month of May