diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5b6a065 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +.Rproj.user +.Rhistory +.RData +.Ruserdata diff --git a/YQ class activity 1.Rmd b/YQ class activity 1.Rmd new file mode 100644 index 0000000..0274a41 --- /dev/null +++ b/YQ class activity 1.Rmd @@ -0,0 +1,49 @@ +--- +title: "activity 1" +author: "YQ" +date: "9/29/2019" +output: html_document +--- + + +## class activity 1 + +```{r} +library(tidyr) +library(dplyr) + +DF1 <- read.csv("swirl-data.csv", header = TRUE) + +##5. Create a new data frame that only includes the variables `hash`, `lesson_name` and `attempt` called `DF2` +DF2 <- select(DF1, hash, lesson_name, attempt) + +##6. Use the `group_by` function to create a data frame that sums all the attempts for each `hash` by each `lesson_name` called `DF3` +DF3 <- DF2 %>% group_by(hash, lesson_name) %>% summarise(attempt = sum(attempt)) +## same as DF3 <- summarise(group_by(DF2, hash, lesson_name), attempt = sum(attempt)) + + +##8. Convert `DF3` to this format +DF3b <- spread(DF3, lesson_name, attempt) + +##9. Create a new data frame from `DF1` called `DF4` that only includes the variables `hash`, `lesson_name` and `correct` +DF4 <- select(DF1, hash, lesson_name, correct) + +##10. Convert the `correct` variable so that `TRUE` is coded as the **number** `1` and `FALSE` is coded as `0` +DF4$correct <- ifelse(DF4$correct == TRUE, 1, 0) + +##11. Create a new data frame called `DF5` that provides a mean score for each student on each course +DF5 <- + ##summarise(group_by(DF4, hash, lesson_name), mean(correct, na.rm = TRUE)) + DF4 %>% group_by(hash, lesson_name) %>% summarise(mean = mean(correct, na.rm = TRUE)) + +##12. **Extra credit** Convert the `datetime` variable into month-day-year format and create a new data frame (`DF6`) that shows the average correct for each day +DF6 <- select(DF1, correct, datetime) + +DF6$correct <- ifelse(DF6$correct == TRUE, 1, 0) + +DF6$datetime <- as.POSIXlt(DF6$datetime, origin = "1970-01-01 00:00.00 UTC") + +DF6$datetime <- strftime(DF6$datetime, format="%m-%d-%y") + +DF7 <- summarise(group_by(DF6, datetime), average = mean(correct, na.rm = TRUE)) +``` diff --git a/class activity 1.Rproj b/class activity 1.Rproj new file mode 100644 index 0000000..8e3c2eb --- /dev/null +++ b/class activity 1.Rproj @@ -0,0 +1,13 @@ +Version: 1.0 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX