From d8d2edb421145201fe5a2b1732d1fa27231dee40 Mon Sep 17 00:00:00 2001 From: absolutoslo Date: Sun, 12 Apr 2015 15:20:57 +0200 Subject: [PATCH 1/2] intermediateupdate --- R w-1 assignment.mdown | 68 +++++++++++++++++++++++++++++++++++++++++ diet_data.zip | Bin 1124 -> 0 bytes 2 files changed, 68 insertions(+) create mode 100644 R w-1 assignment.mdown delete mode 100644 diet_data.zip diff --git a/R w-1 assignment.mdown b/R w-1 assignment.mdown new file mode 100644 index 0000000..3314943 --- /dev/null +++ b/R w-1 assignment.mdown @@ -0,0 +1,68 @@ +#R assignment w-1 + +## Assignment Week 1 +https://github.com/absolutoslo/practice_assignment/blob/master/practice_assignment.rmd +## Unzip Specdata in R working directory +## check files +list.files("specdata") +[1] "001.csv" "002.csv" "003.csv" "004.csv" "005.csv" "006.csv" "007.csv" "008.csv" + [9] "009.csv" "010.csv" "011.csv" "012.csv" "013.csv" "014.csv" "015.csv" "016.csv" + [17] "017.csv" "018.csv" "019.csv" "020.csv" "021.csv" "022.csv" "023.csv" "024.csv" + ## inspect one file + read.csv("specdata/001.csv") + 1458 2006-12-28 NA NA 1 +1459 2006-12-29 NA NA 1 +1460 2006-12-30 NA NA 1 +1461 2006-12-31 NA NA 1 +## assing name to file +first=read.csv("specdata/001.csv") +## look for columns name +names(first) +## check length of the table (nrow) +length(first$Date) +[1] 1461 +## check dim (nrow,ncol) +dim(first) +[1] 1461 4 +summary(first) + Date sulfate nitrate ID + 2003-01-01: 1 Min. : 0.613 Min. :0.1180 Min. :1 + 2003-01-02: 1 1st Qu.: 2.210 1st Qu.:0.2835 1st Qu.:1 + 2003-01-03: 1 Median : 2.870 Median :0.4530 Median :1 + 2003-01-04: 1 Mean : 3.881 Mean :0.5499 Mean :1 + 2003-01-05: 1 3rd Qu.: 4.730 3rd Qu.:0.6635 3rd Qu.:1 + 2003-01-06: 1 Max. :19.100 Max. :1.8300 Max. :1 + (Other) :1455 NA's :1344 NA's :1339 + str(first) +'data.frame': 1461 obs. of 4 variables: + $ Date : Factor w/ 1461 levels "2003-01-01","2003-01-02",..: 1 2 3 4 5 6 7 8 9 10 ... + $ sulfate: num NA NA NA NA NA NA NA NA NA NA ... + $ nitrate: num NA NA NA NA NA NA NA NA NA NA ... + $ ID : int 1 1 1 1 1 1 1 1 1 1 ... + ## check first value of a column in data.frame(first) + first[1, "Date"] +[1] 2003-01-01 +1461 Levels: 2003-01-01 2003-01-02 2003-01-03 2003-01-04 2003-01-05 ... 2006-12-31 +## check final date +first[1461, "Date"] +[1] 2006-12-31 +1461 Levels: 2003-01-01 2003-01-02 2003-01-03 2003-01-04 2003-01-05 ... 2006-12-31 +## create a subset of data +first[which(first$Date == 2006-12-31), "sulfate"] +## or +first[which(first[, "Date"] == 2006-12-31), "sulfate"] +numeric(0) +## or +subset() function +subset(...) + +## back to all files - assign vector to list of files +> files = list.files("specdata") +> files + [1] "001.csv" "002.csv" "003.csv" "004.csv" "005.csv" "006.csv" "007.csv" "008.csv" + [9] "009.csv" "010.csv" "011.csv" "012.csv" "013.csv" "014.csv" "015.csv" "016.csv" + [17] "017.csv" "018.csv" "019.csv" "020.csv" "021.csv" "022.csv" "023.csv" "024.csv" + > files[1] +[1] "001.csv" +> files[1:5] +[1] "001.csv" "002.csv" "003.csv" "004.csv" "005.csv" diff --git a/diet_data.zip b/diet_data.zip deleted file mode 100644 index 7c5e1649c98b59376c24713cfbfa1b83f6857188..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1124 zcmWIWW@Zs#U|`^2_!hXq#n$uv+8QA53nNH`fx$5^rBW}sxGeUZHs4_bftJL$mE|uO z#csSd?+J;SDjL0NDvQbqmF@it(+@QG-*Eu;N`zwt?Tx)1+)|vUf=rg;;&y)5wG6Y zM7`Q>FUYIKSo9~c?Na|2^(5|KQtpvgJ)le!1hb_2*-+qY;c*O`dO?B=*Y9s;PlGR$OF6ef+8q5 zT>>G&c_iDqEMe)Qrd2vO4tOz55}16)uFaTRY>#XFXZ_sO?$v)!#Xa2{GV}GeHNJLF ztL*kYuj*K}L*%I8)R!|GC1)xd8yXiINj7gZ4ol9L=~--e91 z9fA`DBpnZ{e6)SUhH#%HFfg0ga`h{K?t?|QZ)SEXEV`Wx1>6$-56MLw=qUR&Yoba7 z2Vbn=P7{_8-rf7d%jyb`JU_SjeB%Df$}j6LmKs;xym4=G-rst=yT^}fuy-km*xod* zd^XkV=;kUj|BoT(B@PIldB75P{mY65?H!G*YwpQ_1h&lTG~NLfc=_WtBf@pgK-Zm{ z_i=p}Qd9?*q?W;*he(ZV71#T3YzQh0UAf>!!c?{`C%UHGK3>AE^TU3_tZ@0+e~-Vt zS>N_M>&K04b?kC~cf{R0T;_Z(;H*H_(!O=VLZ!>})+vj<^18c1)!;`f*GFMiAxWX; zcu9rUgKwv>t>o!-Xl}MTf6z$Y*rn~s0*B4bm8}Q$eJ`B4!0{-iiYdUGkx7IBcm4so z9tae`BqBE<>%x|%A<99z76CClg(GXjmbwty{DJ8Wme!DUVT*HwuJ=G)uoQr-3tLqs6yA=* From e950b7866644631dccdccfbb2fccc570009a2941 Mon Sep 17 00:00:00 2001 From: absolutoslo Date: Sun, 12 Apr 2015 15:36:40 +0200 Subject: [PATCH 2/2] update --- R w-1 assignment.mdown | 50 +++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/R w-1 assignment.mdown b/R w-1 assignment.mdown index 3314943..aee2fc4 100644 --- a/R w-1 assignment.mdown +++ b/R w-1 assignment.mdown @@ -4,27 +4,27 @@ https://github.com/absolutoslo/practice_assignment/blob/master/practice_assignment.rmd ## Unzip Specdata in R working directory ## check files -list.files("specdata") +```list.files("specdata") [1] "001.csv" "002.csv" "003.csv" "004.csv" "005.csv" "006.csv" "007.csv" "008.csv" [9] "009.csv" "010.csv" "011.csv" "012.csv" "013.csv" "014.csv" "015.csv" "016.csv" - [17] "017.csv" "018.csv" "019.csv" "020.csv" "021.csv" "022.csv" "023.csv" "024.csv" - ## inspect one file - read.csv("specdata/001.csv") + [17] "017.csv" "018.csv" "019.csv" "020.csv" "021.csv" "022.csv" "023.csv" "024.csv"``` +## inspect one file + ```read.csv("specdata/001.csv") 1458 2006-12-28 NA NA 1 1459 2006-12-29 NA NA 1 1460 2006-12-30 NA NA 1 -1461 2006-12-31 NA NA 1 +1461 2006-12-31 NA NA 1``` ## assing name to file -first=read.csv("specdata/001.csv") +```first=read.csv("specdata/001.csv")``` ## look for columns name -names(first) +```names(first)``` ## check length of the table (nrow) -length(first$Date) -[1] 1461 +```length(first$Date) +[1] 1461``` ## check dim (nrow,ncol) -dim(first) -[1] 1461 4 -summary(first) +```dim(first) +[1] 1461 4``` +```summary(first) Date sulfate nitrate ID 2003-01-01: 1 Min. : 0.613 Min. :0.1180 Min. :1 2003-01-02: 1 1st Qu.: 2.210 1st Qu.:0.2835 1st Qu.:1 @@ -32,36 +32,36 @@ summary(first) 2003-01-04: 1 Mean : 3.881 Mean :0.5499 Mean :1 2003-01-05: 1 3rd Qu.: 4.730 3rd Qu.:0.6635 3rd Qu.:1 2003-01-06: 1 Max. :19.100 Max. :1.8300 Max. :1 - (Other) :1455 NA's :1344 NA's :1339 - str(first) + (Other) :1455 NA's :1344 NA's :1339 ``` + ```str(first) 'data.frame': 1461 obs. of 4 variables: $ Date : Factor w/ 1461 levels "2003-01-01","2003-01-02",..: 1 2 3 4 5 6 7 8 9 10 ... $ sulfate: num NA NA NA NA NA NA NA NA NA NA ... $ nitrate: num NA NA NA NA NA NA NA NA NA NA ... - $ ID : int 1 1 1 1 1 1 1 1 1 1 ... + $ ID : int 1 1 1 1 1 1 1 1 1 1 ...``` ## check first value of a column in data.frame(first) - first[1, "Date"] + ```first[1, "Date"] [1] 2003-01-01 -1461 Levels: 2003-01-01 2003-01-02 2003-01-03 2003-01-04 2003-01-05 ... 2006-12-31 +1461 Levels: 2003-01-01 2003-01-02 2003-01-03 2003-01-04 2003-01-05 ... 2006-12-31``` ## check final date -first[1461, "Date"] +```first[1461, "Date"] [1] 2006-12-31 -1461 Levels: 2003-01-01 2003-01-02 2003-01-03 2003-01-04 2003-01-05 ... 2006-12-31 +1461 Levels: 2003-01-01 2003-01-02 2003-01-03 2003-01-04 2003-01-05 ... 2006-12-31``` ## create a subset of data -first[which(first$Date == 2006-12-31), "sulfate"] +```first[which(first$Date == 2006-12-31), "sulfate"]``` ## or -first[which(first[, "Date"] == 2006-12-31), "sulfate"] -numeric(0) +```first[which(first[, "Date"] == 2006-12-31), "sulfate"] +numeric(0)``` ## or subset() function -subset(...) +```subset(...)``` ## back to all files - assign vector to list of files -> files = list.files("specdata") +```> files = list.files("specdata") > files [1] "001.csv" "002.csv" "003.csv" "004.csv" "005.csv" "006.csv" "007.csv" "008.csv" [9] "009.csv" "010.csv" "011.csv" "012.csv" "013.csv" "014.csv" "015.csv" "016.csv" - [17] "017.csv" "018.csv" "019.csv" "020.csv" "021.csv" "022.csv" "023.csv" "024.csv" + [17] "017.csv" "018.csv" "019.csv" "020.csv" "021.csv" "022.csv" "023.csv" "024.csv"``` > files[1] [1] "001.csv" > files[1:5]