Reproducible Research: Peer Assessment 1

Loading and preprocessing the data

Read the data assuming that the unzipped csv file is in the working directory

dt <- read.csv("activity.csv", header=T, stringsAsFactors=F)
What is mean total number of steps taken per day?

# caculate total number of steps by date
sum <- summarise(group_by(dt, date), steps=sum(steps))
# histogram
ggplot(sum, aes(x=steps)) + geom_histogram()
# mean and median
What is the average daily activity pattern?

# average steps per interval
int <- summarise(group_by(dt, interval), steps=mean(steps, na.rm=T))
# convert integer interval to time format interval
# example: "105"" -> "0105""
int$interval <- sprintf("%04d", int$interval)
# plot
#ggplot(int, aes(x=strptime(int$interval, format="%H%M"), y=steps)) + geom_line()
plot(strptime(int$interval, format="%H%M"), int$steps, type="l", xlab="interval", ylab="average steps")

# which interval contains maximum num of steps
filter(int, steps==max(int$steps))
Imputing missing values

# number of missing value
nrow(dt[!complete.cases(dt), ])
# filling in missing value
filled.dt <- dt
# use mean of steps per interval for missing value
filled.dt[!complete.cases(filled.dt), ]$steps <- int$steps
# histogram total number of steps taken each day
filled.sum <- summarise(group_by(filled.dt, date), steps=sum(steps))
ggplot(filled.sum, aes(x=steps)) + geom_histogram()
# mean and median
There you can see the median is now equal to the mean, which is the result of filling in missing value

Are there differences in activity patterns between weekdays and weekends?

# convert string date to Date object
filled.dt$date <- as.Date(filled.dt$date)
# add another column representing weekday or weekend
weekdays <- weekdays(filled.dt$date)
isWeekend <- factor(weekdays == "Saturday" | weekdays == "Sunday")
levels(isWeekend) <- c("weekday", "weekend")
filled.dt <- mutate(filled.dt, day=isWeekend)
# multiplot
weekday <- filled.dt %>% filter(day=="weekday") %>% group_by(interval) %>% summarise(steps=mean(steps)) %>% mutate(interval=sprintf("%04d", interval))
weekend <- filled.dt %>% filter(day=="weekend") %>% group_by(interval) %>% summarise(steps=mean(steps)) %>% mutate(interval=sprintf("%04d", interval))
plot(strptime(weekday$interval, format="%H%M"), weekday$steps, type="l", xlab="Interval", ylab="Number of steps", main="Weekday")

plot(strptime(weekend$interval, format="%H%M"), weekend$steps, type="l", xlab="Interval", ylab="Number of steps", main="Weekend")