From 7b4f50278255097213898325ae7bbb736d036c52 Mon Sep 17 00:00:00 2001
From: JR1990 <jg3821@tc.columbia.edu>
Date: Thu, 29 Sep 2016 12:16:52 -0400
Subject: [PATCH] Assignment 3 - jie gao

---
 Class 7 Instructions.Rmd | 52 ++++++++++++++++++++++------------------
 1 file changed, 29 insertions(+), 23 deletions(-)

diff --git a/Class 7 Instructions.Rmd b/Class 7 Instructions.Rmd
index 5ae641a..ec41f2e 100644
--- a/Class 7 Instructions.Rmd	
+++ b/Class 7 Instructions.Rmd	
@@ -9,7 +9,7 @@ date: "February 13, 2016"
 
 ##Install packages for manipulating data
 We will use two packages: tidyr and dplyr
-```{r}
+```{r, eval=FALSE}
 #Insall packages
 install.packages("tidyr", "dplyr")
 #Load packages
@@ -17,8 +17,8 @@ library(tidyr, dplyr)
 ```
 
 ##Upload wide format instructor data (instructor_activity_wide.csv)
-```{r}
-data_wide <- read.table("~/Documents/NYU/EDCT2550/Assignments/Assignment 3/instructor_activity_wide.csv", sep = ",", header = TRUE)
+```{r, eval=FALSE}
+data_wide <- read.table("instructor_activity_wide.csv", sep = ",", header = TRUE)
 
 #Now view the data you have uploaded and notice how its structure: each variable is a date and each row is a type of measure.
 View(data_wide)
@@ -37,7 +37,7 @@ The gather command requires the following input arguments:
 - value: Name of new value column
 - ...: Names of source columns that contain values
 
-```{r}
+```{r, eval=FALSE}
 data_long <- gather(data_wide, date, variables)
 #Rename the variables so we don't get confused about what is what!
 names(data_long) <- c("variables", "date", "measure")
@@ -52,14 +52,15 @@ The spread function requires the following input:
 - key: Name of column containing the new column names
 - value: Name of column containing values
 
-```{r}
+```{r, eval=FALSE}
 instructor_data <- spread(data_long, variables, measure)
 ```
 
 ##Now we have a workable instructor data set!The next step is to create a workable student data set. Upload the data "student_activity.csv". View your file once you have uploaded it and then draw on a piece of paper the structure that you want before you attempt to code it. Write the code you use in the chunk below. (Hint: you can do it in one step)
 
-```{r}
-
+```{r, eval=FALSE}
+student_activity <- read.table("student_activity.csv", sep = ",", header = TRUE)
+student_data <- spread(student_activity, variable, measure)
 ```
 
 ##Now that you have workable student data set, subset it to create a data set that only includes data from the second class. 
@@ -68,45 +69,45 @@ To do this we will use the dplyr package (We will need to call dplyr in the comm
 
 Notice that the way we subset is with a logical rule, in this case date == 20160204. In R, when we want to say that something "equals" something else we need to use a double equals sign "==". (A single equals sign means the same as <-).
 
-```{r}
-student_data_2 <- dplyr::filter(student_data, date == 20160204)
+```{r, eval=FALSE}
+student_data_2 <- dplyr::filter(student_activity, date == 20160204)
 ```
 
 Now subset the student_activity data frame to create a data frame that only includes students who have sat at table 4. Write your code in the following chunk:
 
-```{r}
-
+```{r, eval=FALSE}
+student_data_3 <- dplyr::filter(student_data, table == 4)
 ```
 
 ##Make a new variable
 
 It is useful to be able to make new variables for analysis. We can either apend a new variable to our dataframe or we can replace some variables with a new variable. Below we will use the "mutate" function to create a new variable "total_sleep" from the light and deep sleep variables in the instructor data.
 
-```{r}
+```{r, eval=FALSE}
 instructor_data <- dplyr::mutate(instructor_data, total_sleep = s_deep + s_light)
 ```
 
 Now, refering to the cheat sheet, create a data frame called "instructor_sleep" that contains ONLY the total_sleep variable. Write your code in the following code chunk:
 
-```{r}
-
+```{r, eval=FALSE}
+instructor_sleep <- dplyr::select(instructor_data, total_sleep)
 ```
 
 Now, we can combine several commands together to create a new variable that contains a grouping. The following code creates a weekly grouping variable called "week" in the instructor data set:
 
-```{r}
+```{r, eval=FALSE}
 instructor_data <- dplyr::mutate(instructor_data, week = dplyr::ntile(date, 3))
 ```
 
 Create the same variables for the student data frame, write your code in the code chunk below:
-```{r}
-
+```{r, eval=FALSE}
+student_data <- dplyr::mutate(student_data, week=dplyr::ntile(date, 3))
 ```
 
 ##Sumaraizing
 Next we will summarize the student data. First we can simply take an average of one of our student variables such as motivation:
 
-```{r}
+```{r, eval=FALSE}
 student_data %>% dplyr::summarise(mean(motivation))
 
 #That isn't super interesting, so let's break it down by week:
@@ -116,22 +117,27 @@ student_data %>% dplyr::group_by(date) %>% dplyr::summarise(mean(motivation))
 
 Create two new data sets using this method. One that sumarizes average motivation for students for each week (student_week) and another than sumarizes "m_active_time" for the instructor per week (instructor_week). Write your code in the following chunk:
 
-```{r}
-
+```{r, eval=FALSE}
+student_week <- student_data %>% dplyr::group_by(week) %>% dplyr::summarise(mean(motivation))
+instructor_week <- instructor_data %>% dplyr::group_by(week) %>% dplyr::summarise(mean(m_active_time))
 ```
 
 ##Merging
 Now we will merge these two data frames using dplyr. 
 
-```{r}
+```{r, eval=FALSE}
 merge <- dplyr::full_join(instructor_week, student_week, "week")
 ```
 
 ##Visualize
 Visualize the relationship between these two variables (mean motivation and mean instructor activity) with the "plot" command and then run a Pearson correlation test (hint: cor.test()). Write the code for the these commands below:
 
-```{r}
-
+```{r, eval=FALSE}
+names(instructor_week)<-c("week","average_instructor")
+names(student_week)<-c("week","average_student")
+names(merge)<-c("week", "avg_student", "avg_instructor")
+plot(merge$avg_student, merge$avg_instructor)
+cor.test(merge$avg_student, merge$avg_instructor)
 ```
 
 Fnally save your markdown document and your plot to this folder and comit, push and pull your repo to submit.