From bb5b73beffefe0a8fce036786d1988afd42a459b Mon Sep 17 00:00:00 2001
From: jkc2155 <jkc2155@columbia.edu>
Date: Thu, 29 Sep 2016 12:11:10 -0400
Subject: [PATCH 1/2] Playing with data functions

---
 .gitignore               |  4 ++++
 Class 7 Instructions.Rmd | 20 ++++++++++++--------
 2 files changed, 16 insertions(+), 8 deletions(-)
 create mode 100644 .gitignore

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..5b6a065
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,4 @@
+.Rproj.user
+.Rhistory
+.RData
+.Ruserdata
diff --git a/Class 7 Instructions.Rmd b/Class 7 Instructions.Rmd
index 5ae641a..8c06f6b 100644
--- a/Class 7 Instructions.Rmd	
+++ b/Class 7 Instructions.Rmd	
@@ -18,7 +18,7 @@ library(tidyr, dplyr)
 
 ##Upload wide format instructor data (instructor_activity_wide.csv)
 ```{r}
-data_wide <- read.table("~/Documents/NYU/EDCT2550/Assignments/Assignment 3/instructor_activity_wide.csv", sep = ",", header = TRUE)
+data_wide <- read.table("~/R/class7/instructor_activity_wide.csv", sep = ",", header = TRUE)
 
 #Now view the data you have uploaded and notice how its structure: each variable is a date and each row is a type of measure.
 View(data_wide)
@@ -38,7 +38,7 @@ The gather command requires the following input arguments:
 - ...: Names of source columns that contain values
 
 ```{r}
-data_long <- gather(data_wide, date, variables)
+data_long <- tidyr::gather(data_wide, date, variables)
 #Rename the variables so we don't get confused about what is what!
 names(data_long) <- c("variables", "date", "measure")
 #Take a look at your new data, looks weird huh?
@@ -53,13 +53,15 @@ The spread function requires the following input:
 - value: Name of column containing values
 
 ```{r}
-instructor_data <- spread(data_long, variables, measure)
+instructor_data <- tidyr::spread(data_long, variables, measure)
 ```
 
 ##Now we have a workable instructor data set!The next step is to create a workable student data set. Upload the data "student_activity.csv". View your file once you have uploaded it and then draw on a piece of paper the structure that you want before you attempt to code it. Write the code you use in the chunk below. (Hint: you can do it in one step)
 
 ```{r}
-
+Student_Data_Wide <- read.table("~/R/class7/student_activity.csv", sep = ",", header = TRUE)
+Student_Data_Spread <- tidyr::spread(data_long, variables, measure)
+View(Student_Data_Spread)
 ```
 
 ##Now that you have workable student data set, subset it to create a data set that only includes data from the second class. 
@@ -69,13 +71,14 @@ To do this we will use the dplyr package (We will need to call dplyr in the comm
 Notice that the way we subset is with a logical rule, in this case date == 20160204. In R, when we want to say that something "equals" something else we need to use a double equals sign "==". (A single equals sign means the same as <-).
 
 ```{r}
-student_data_2 <- dplyr::filter(student_data, date == 20160204)
+student_data_Spread_filt1 <- dplyr::filter(Student_Data_Spread, date == 'X20160204')
+View(student_data_Spread_filt1)
 ```
 
 Now subset the student_activity data frame to create a data frame that only includes students who have sat at table 4. Write your code in the following chunk:
 
 ```{r}
-
+student_data_wide_class2_t4 <- dplyr::filter(student_data_Spread_filt1, variable == 'table', measure == 4.00)
 ```
 
 ##Make a new variable
@@ -89,7 +92,8 @@ instructor_data <- dplyr::mutate(instructor_data, total_sleep = s_deep + s_light
 Now, refering to the cheat sheet, create a data frame called "instructor_sleep" that contains ONLY the total_sleep variable. Write your code in the following code chunk:
 
 ```{r}
-
+instructor_sleep1 <- instructor+data$total_sleep
+instructor_sleep <- data.frame(instructor_sleep1)
 ```
 
 Now, we can combine several commands together to create a new variable that contains a grouping. The following code creates a weekly grouping variable called "week" in the instructor data set:
@@ -100,7 +104,7 @@ instructor_data <- dplyr::mutate(instructor_data, week = dplyr::ntile(date, 3))
 
 Create the same variables for the student data frame, write your code in the code chunk below:
 ```{r}
-
+sdata_wide <- dplyr::mutate(sdata_wide, week = dplyr::ntile(date, 3))
 ```
 
 ##Sumaraizing

From 45dad7e2d0a3d112332d31e9cd333a7f15719db0 Mon Sep 17 00:00:00 2001
From: jkc2155 <jkc2155@columbia.edu>
Date: Thu, 29 Sep 2016 12:14:53 -0400
Subject: [PATCH 2/2] Committed for review

---
 Class 7 Instructions.Rmd | 28 ++++++++++++++++++++--------
 class7.Rproj             | 13 +++++++++++++
 2 files changed, 33 insertions(+), 8 deletions(-)
 create mode 100644 class7.Rproj

diff --git a/Class 7 Instructions.Rmd b/Class 7 Instructions.Rmd
index 8c06f6b..c8cb962 100644
--- a/Class 7 Instructions.Rmd	
+++ b/Class 7 Instructions.Rmd	
@@ -60,7 +60,7 @@ instructor_data <- tidyr::spread(data_long, variables, measure)
 
 ```{r}
 Student_Data_Wide <- read.table("~/R/class7/student_activity.csv", sep = ",", header = TRUE)
-Student_Data_Spread <- tidyr::spread(data_long, variables, measure)
+Student_Data_Spread <- tidyr::spread(Student_Data_Wide, variable, measure)
 View(Student_Data_Spread)
 ```
 
@@ -71,14 +71,14 @@ To do this we will use the dplyr package (We will need to call dplyr in the comm
 Notice that the way we subset is with a logical rule, in this case date == 20160204. In R, when we want to say that something "equals" something else we need to use a double equals sign "==". (A single equals sign means the same as <-).
 
 ```{r}
-student_data_Spread_filt1 <- dplyr::filter(Student_Data_Spread, date == 'X20160204')
+student_data_Spread_filt1 <- dplyr::filter(Student_Data_Spread, date == 20160204)
 View(student_data_Spread_filt1)
 ```
 
 Now subset the student_activity data frame to create a data frame that only includes students who have sat at table 4. Write your code in the following chunk:
 
 ```{r}
-student_data_wide_class2_t4 <- dplyr::filter(student_data_Spread_filt1, variable == 'table', measure == 4.00)
+student_data_wide_class2_t4 <- dplyr::filter(student_data_Spread_filt1, table == 4)
 ```
 
 ##Make a new variable
@@ -94,6 +94,7 @@ Now, refering to the cheat sheet, create a data frame called "instructor_sleep"
 ```{r}
 instructor_sleep1 <- instructor+data$total_sleep
 instructor_sleep <- data.frame(instructor_sleep1)
+View(instructor_sleep)
 ```
 
 Now, we can combine several commands together to create a new variable that contains a grouping. The following code creates a weekly grouping variable called "week" in the instructor data set:
@@ -104,24 +105,26 @@ instructor_data <- dplyr::mutate(instructor_data, week = dplyr::ntile(date, 3))
 
 Create the same variables for the student data frame, write your code in the code chunk below:
 ```{r}
-sdata_wide <- dplyr::mutate(sdata_wide, week = dplyr::ntile(date, 3))
+sdata_wide <- dplyr::mutate(Student_Data_Spread, week = dplyr::ntile(date, 3))
+View(sdata_wide)
 ```
 
 ##Sumaraizing
 Next we will summarize the student data. First we can simply take an average of one of our student variables such as motivation:
 
 ```{r}
-student_data %>% dplyr::summarise(mean(motivation))
+Student_Data_Spread %>% dplyr::summarise(mean(motivation))
 
 #That isn't super interesting, so let's break it down by week:
 
-student_data %>% dplyr::group_by(date) %>% dplyr::summarise(mean(motivation))
+Student_Data_Spread %>% dplyr::group_by(date) %>% dplyr::summarise(mean(motivation))
 ```
 
 Create two new data sets using this method. One that sumarizes average motivation for students for each week (student_week) and another than sumarizes "m_active_time" for the instructor per week (instructor_week). Write your code in the following chunk:
 
 ```{r}
-
+student_week <- sdata_wide %>% dplyr::group_by(week) %>% dplyr::summarise(mean(motivation))
+instructor_week <- instructor_data %>% dplyr::group_by(week) %>% dplyr::summarise(mean(m_active_time))
 ```
 
 ##Merging
@@ -129,13 +132,22 @@ Now we will merge these two data frames using dplyr.
 
 ```{r}
 merge <- dplyr::full_join(instructor_week, student_week, "week")
+names(merge)<-c("week","mean_active_time","mean_motivation")
 ```
 
 ##Visualize
 Visualize the relationship between these two variables (mean motivation and mean instructor activity) with the "plot" command and then run a Pearson correlation test (hint: cor.test()). Write the code for the these commands below:
 
 ```{r}
-
+plot(merge$mean_active_time,merge$mean_motivation,xlim=c(1000,10000),ylim=c(2,2))
+cor.test(merge$mean_active_time,merge$mean_motivation)
 ```
 
 Fnally save your markdown document and your plot to this folder and comit, push and pull your repo to submit.
+
+##Alcoholic
+```{r}
+counts <- table(instructor_data$alcoholic_beverages)
+barplot(counts, main="Coping Distribution", 
+  	xlab="Number of 'help me forgets' to cope with students")
+```
\ No newline at end of file
diff --git a/class7.Rproj b/class7.Rproj
new file mode 100644
index 0000000..8e3c2eb
--- /dev/null
+++ b/class7.Rproj
@@ -0,0 +1,13 @@
+Version: 1.0
+
+RestoreWorkspace: Default
+SaveWorkspace: Default
+AlwaysSaveHistory: Default
+
+EnableCodeIndexing: Yes
+UseSpacesForTab: Yes
+NumSpacesForTab: 2
+Encoding: UTF-8
+
+RnwWeave: Sweave
+LaTeX: pdfLaTeX