.Rhistory

select(state, location, columns)
View(state_year_data)
export_state_year <- function(name, year) {
year_given <- rlang::sym(toString(year))
columns <- c(paste0(
"any_", # <------------------------------------------------------------- MAKE A FUNCTION?
c("females_", "males_", "both_sexes_"),
year_given
), paste0(
"given_",
c("females_", "males_", "both_sexes_"),
year_given
))
arrange_col <- paste0("any_both_sexes_", year_given)
print(arrange_col)
state_year_data <- all_drinking %>%
filter(state == name) %>%
arrange(-(!!arrange_col)) %>% #  <------------------------ needs to be - to decend
select(state, location, columns)
View(state_year_data)
# write.csv(state_year_data, paste0('data/', paste0("drinking_", name, year)))
}
# 6
# Test function
export_state_year("Washington", 2011)
#################
#### PART 1 #####
#################
install.packages(dplyr)
#################
#### PART 1 #####
#################
install.packages("dplyr")
#################
#### PART 1 #####
#################
install.packages("dplyr")
library(dplyr)
library(dplyr)
export_state_year <- function(name, year) {
year_given <- rlang::sym(toString(year))
columns <- c(paste0(
"any_", # <------------------------------------------------------------- MAKE A FUNCTION?
c("females_", "males_", "both_sexes_"),
year_given
), paste0(
"given_",
c("females_", "males_", "both_sexes_"),
year_given
))
state_year_data <- all_drinking %>%
filter(state == name) %>%
arrange(-!!paste0("any_both_sexes_", year_given)) %>%
select(state, location, columns)
View(state_year_data)
# write.csv(state_year_data, paste0('data/', paste0("drinking_", name, year)))
}
# 6
# Test function
export_state_year("Washington", 2011)
#################
#### PART 1 #####
#################
# install.packages("dplyr")
library(dplyr)
# Access and isolate data
any_drink <- read.csv('data/any_drinking.csv', stringsAsFactors = FALSE)
any_drink_2012 <- any_drink %>%
select(state, location, both_sexes_2012, males_2012, females_2012) %>%
mutate(differ = males_2012 - females_2012)
# Number of locations in 2012 where females drink more than males
print(paste(
"Females drank more than males in",
any_drink_2012 %>% #make own variable? <----------------------------------------------
filter(differ < 0) %>%
nrow(),
"locations"
))
# Where male and female drinking percentages are the closest
closest <- any_drink_2012 %>%
filter(differ == min(differ)) %>%
select(location, state)
print(paste0(
"Drinking is most similar between genders in ",
closest$location, ", ",
closest$state
))
# 2012 State level observations
state_2012 <- any_drink_2012 %>%
filter(state == location)
# State with highest drinking rate (both sexes combined) in 2012
high_both <- state_2012 %>%
filter(both_sexes_2012 == max(both_sexes_2012)) %>%
select(state, both_sexes_2012)
print(high_both)
# State with lowest drinking rate (both sexes combined) in 2012
low_both <- state_2012 %>%
filter(both_sexes_2012 == min(both_sexes_2012)) %>%
select(state, both_sexes_2012)
print(low_both)
# Difference in lowest and highest drinking rates (both sexes combined) in 2012
print(paste0(
"Range in drinking rate was ",
high_both$both_sexes_2012 - low_both$both_sexes_2012,
"%"
))
#################
#### PART 2 #####
#################
# Access and isolate data
binge_drink <- read.csv('data/binge_drinking.csv', stringsAsFactors = FALSE)
county_only_binge <- binge_drink %>%
filter(state != location & state != "National")
# test data in county_only_binge
other_only_binge <- binge_drink %>%
filter(state == location | state == "National") %>%
nrow()
nrow(binge_drink) == nrow(county_only_binge) + other_only_binge # confirm data is correct
# Add 3 columns of changes in binge drinking rates for males, females, and both
county_only_binge <- county_only_binge %>%
mutate(
change_both = both_sexes_2002 - both_sexes_2012,
change_males = males_2002 - males_2012,
change_females = females_2002 - females_2012
)
# 1
# Average rate of bindge drinking (both sexes combined) in 2012; county only
print(county_only_binge %>%
summarize(avg = mean(both_sexes_2012))
)
# 2
# Data frame of max / min binge drinking rates for both_sexes_2012 for each state
state_binge <- county_only_binge %>%
group_by(state) %>%
summarize(min_rate = min(both_sexes_2012), max_rate = max(both_sexes_2012))
write.csv(state_binge, 'data/state_binge_drinking.csv')
# Level of binge drinking (both_sexes_2012) in the county with the lowest level in its state
state_binge %>%
filter(min_rate == min(min_rate)) %>%
select(min_rate)
# Level of binge drinking (both_sexes_2012) in the county with the highest level in its state
state_binge %>%
filter(max_rate == max(max_rate)) %>%
select(max_rate)
# 3
# Percentage of counties who observed an increase in male binge drinking (2002 - 2012)
nmale_increase <- county_only_binge %>%
filter(change_males < 0) %>%
print(paste0(
"Male binging increased in ",
round((nrow(nmale_increase) / nrow(county_only_binge)) * 100, 0),
"% of counties"
))
# 4
# Percentage of counties who observed an increase in female binge drinking (2002 - 2012)
nfemale_increase <- county_only_binge %>%
filter(change_females < 0)
print(paste0(
"Female binging increased in ",
round((nrow(nfemale_increase) / nrow(county_only_binge)) * 100, 0),
"% of counties"
))
# 5
# Percentage of counties who observed an increase in female binge drinking,
# but a decrease in male binge drinking (2002 - 2012)
f_incr_m_decr <- nfemale_increase %>%
filter(change_males > 0)
print(paste0(
"Female binging increased & male binging decreased in ",
round((nrow(f_incr_m_decr) / nrow(county_only_binge)) * 100, 0),
"% of counties"
))
# 6
# The state with the largest median increase in male binge drinking at county
# level (2002 - 2012)
med_incr_male <- county_only_binge %>%
group_by(state) %>%
filter(change_males == median(change_males)) %>%
ungroup(state) %>%
filter(change_males == min(change_males)) %>%
distinct(state, change_males)
print(med_incr_male)
# 7
# # The state with the largest median increase in female binge drinking at county
# level (2002 - 2012); only considers counties with incr in female and decr in male rates
med_incr_female <- f_incr_m_decr %>%
group_by(state) %>%
filter(change_females == median(change_females)) %>%
ungroup(state) %>%
filter(change_females == min(change_females)) %>%
distinct(state, change_females, change_both)
print(med_incr_female) # 3.4% close enough to 4% ? <-----------------------------------------------------
#################
#### PART 3 #####
#################
# Preparing data
# 1
colnames(any_drink) <- paste0("any_", colnames(any_drink))
colnames(binge_drink) <- paste0("binge_", colnames(binge_drink))
# 2
all_drinking <- left_join(
any_drink,
binge_drink,
by = c("any_state" = "binge_state", "any_location" = "binge_location")
)
all_drinking <- rename(all_drinking, state = any_state, location = any_location)
# 1
# Average rate of non-binge drinking at county level in 2012
county_only_all <- all_drinking %>%
filter(state != location & state != "National")
county_non_binge_avg <- county_only_all %>%
summarize(non_binge_avg = mean(any_both_sexes_2012) - mean(binge_both_sexes_2012))
print(county_non_binge_avg)
# 2
# State with the smallest amount of non binge drinking in 2012; state level
state_only_all <- all_drinking %>%
filter(state == location)
state_non_binge_min_state <- state_only_all %>%
mutate(non_binge_2012 = any_both_sexes_2012 - binge_both_sexes_2012) %>%
filter(non_binge_2012 == min(non_binge_2012)) %>%
select(state, any_both_sexes_2012, binge_both_sexes_2012, non_binge_2012)
print(state_non_binge_min_state)
# 3
# State with the smallest amount of non binge drinking in 2012; county level
state_non_binge_min_county <- county_only_all %>%
mutate(non_binge_2012 = any_both_sexes_2012 - binge_both_sexes_2012) %>%
group_by(state) %>%
summarize(
state_avg = mean(non_binge_2012),
any_drinking = mean(any_both_sexes_2012),
binge_drinking = mean(binge_both_sexes_2012),
non_binge_drinking = mean(non_binge_2012)
) %>%
ungroup(state) %>%
filter(state_avg == min(state_avg)) %>%
select(state, any_drinking, binge_drinking, non_binge_drinking)
print(state_non_binge_min_county)
# Question 2 and 3 are different because  <---------------------------------------------------
# 4
# State most likely to have individuals who, if they drank, also binge drank in 2012
state_only_all %>%
mutate(perc_binge_of_all = (binge_both_sexes_2012 / any_both_sexes_2012) * 100) %>%
filter(perc_binge_of_all == max(perc_binge_of_all)) %>%
select(state)
# 5
#
export_state_year <- function(name, year) {
year_given <- rlang::sym(toString(year))
columns <- c(paste0(
"any_", # <------------------------------------------------------------- MAKE A FUNCTION?
c("females_", "males_", "both_sexes_"),
year_given
), paste0(
"given_",
c("females_", "males_", "both_sexes_"),
year_given
))
state_year_data <- all_drinking %>%
filter(state == name) %>%
arrange(-!!paste0("any_both_sexes_", year_given)) %>%
select(state, location, columns)
View(state_year_data)
# write.csv(state_year_data, paste0('data/', paste0("drinking_", name, year)))
}
# 6
# Test function
export_state_year("Washington", 2011)
#################
#### PART 4 #####
#################
# Your script for Part 4 goes here (and delete this comment!)
export_state_year <- function(name, year) {
year_given <- rlang::sym(toString(year))
columns <- c(paste0(
"any_", # <------------------------------------------------------------- MAKE A FUNCTION?
c("females_", "males_", "both_sexes_"),
year_given
), paste0(
"given_",
c("females_", "males_", "both_sexes_"),
year_given
))
state_year_data <- all_drinking %>%
filter(state == name) %>%
arrange(-!!paste0("any_both_sexes_", year_given)) %>%
select(state, location, columns)
View(state_year_data)
# write.csv(state_year_data, paste0('data/', paste0("drinking_", name, year)))
}
# 6
# Test function
export_state_year("Washington", 2011)
# 2
all_drinking <- left_join(
any_drink,
binge_drink,
by = c("any_state" = "binge_state", "any_location" = "binge_location")
)
# 2
all_drinking <- left_join(
any_drink,
binge_drink,
by = c("any_state" = "binge_state", "any_location" = "binge_location")
)
# Access and isolate data
any_drink <- read.csv('data/any_drinking.csv', stringsAsFactors = FALSE)
# Access and isolate data
any_drink <- read.csv('data/any_drinking.csv', stringsAsFactors = FALSE)
install.packages("httr")
library("httr")
install.packages("jsonlite")
library("jsonlite")
shiny::runApp('Desktop/a8-data-app-meghanfrisch')
View(counties_not_numeric)
data_table <- counties_not_numeric %>%
filter(year == 2000, parent.location == "Alabama") %>%
select(County, Poverty_Rate, Rent_Burden, Eviction_Rate) %>%
distinct(County, .keep_all = TRUE)
View(data_table)
runApp('Desktop/a8-data-app-meghanfrisch')
runApp('Desktop/a8-data-app-meghanfrisch')
runApp('Desktop/a8-data-app-meghanfrisch')
runApp('Desktop/a8-data-app-meghanfrisch')
cat(
selected$County,
"\n Eviction Rate:", selected$Eviction_Rate,
"\n Rent Burden:", selected$Rent_Burden,
"\n Poverty Rate:", selected$Poverty_Rate
)
cat(
"\n Eviction Rate:",
"\n Rent Burden:",
"\n Poverty Rate:",
)
cat(
"\n", "Eviction Rate:",
"\n", "Rent Burden:",
"\n", "Poverty Rate:"
)
runApp('Desktop/a8-data-app-meghanfrisch')
runApp('Desktop/a8-data-app-meghanfrisch')
runApp('Desktop/a8-data-app-meghanfrisch')
runApp('Desktop/a8-data-app-meghanfrisch')
runApp('Desktop/a8-data-app-meghanfrisch')
?suppressWarnings
# set up time as numerical value (hour.minute)
suppressWarnings(
if (substring(data$time, 10, 11) == "PM") {
hour <- as.numeric(substring(data$time, 1, 2)) + 12
} else {
hour <- substring(data$time, 1, 2)
}
)
# set up data
data <- read.csv("data/cleaned_seattle_police_data.csv", stringsAsFactors = FALSE)
setwd("~/Desktop/info201project")
library(shiny)
library(ggplot2)
library(dplyr)
library(maps)
# set up data
data <- read.csv("data/cleaned_seattle_police_data.csv", stringsAsFactors = FALSE)
data <- data %>%
mutate(
date = substring(data$Event.Clearance.Date, 1, 10),
year = substring(data$Event.Clearance.Date, 7, 10),
time = substring(data$Event.Clearance.Date, 12, 22)
)
if (substring(data$time, 10, 11) == "PM") {
hour <- as.numeric(substring(data$time, 1, 2)) + 12
} else {
hour <- substring(data$time, 1, 2)
}
data$time <- as.numeric(paste0(hour, ".", substring(data$time, 4, 5)))
# set up sector data: exclude sector H
data <- data %>%
filter(District.Sector %in% c(
"N", "L", "J", "B", "U", "O", "R","S", "K",
"M", "D", "Q", "C", "E", "G", "F", "W")
)
# QUESTION 4
accident_data <- data %>%
filter(Event.Clearance.Group == "MOTOR VEHICLE COLLISION INVESTIGATION")
# WIDGETS
# times for accidents
time_range <- range(accident_data$time)
# districts
districts <- data %>%
distinct(District.Sector)
districts <- districts$District.Sector
# set up time as numerical value (hour.minute)
suppressWarnings(
if (substring(data$time, 10, 11) == "PM") {
hour <- as.numeric(substring(data$time, 1, 2)) + 12
} else {
hour <- substring(data$time, 1, 2)
}
data$time <- as.numeric(paste0(hour, ".", substring(data$time, 4, 5)))
)
suppressWarnings(
if (substring(data$time, 10, 11) == "PM") {
hour <- as.numeric(substring(data$time, 1, 2)) + 12
} else {
hour <- substring(data$time, 1, 2)
}
data$time <- as.numeric(paste0(hour, ".", substring(data$time, 4, 5)))
)
# set up time as numerical value (hour.minute)
suppressWarnings(
if (substring(data$time, 10, 11) == "PM") {
hour <- as.numeric(substring(data$time, 1, 2)) + 12
} else {
hour <- substring(data$time, 1, 2)
}
)
data$time <- as.numeric(paste0(hour, ".", substring(data$time, 4, 5)))
SuppressWarnings(data$time <- as.numeric(paste0(hour, ".", substring(data$time, 4, 5))))
suppressWarnings(data$time <- as.numeric(paste0(hour, ".", substring(data$time, 4, 5))))
runApp()
runApp()
shiny::runApp()
district_crimes <- criminal_data %>%
group_by(District.Sector) %>%
summarize(
total_crimes = nrow(data[District.Sector, ])
)
max_crime <- max(district_crimes$total_crimes)
criminal_data <- data %>%
filter(Event.Clearance.Group == major_crimes)
crime_rate_data <- criminal_data %>%
group_by(year) %>%
summarize(
total_crimes = nrow(data[year, ])
)
crime_rate <- round(sum(crime_rate_data[, 'total_crimes'])/nrow(crime_rate_data))
district_crimes <- criminal_data %>%
group_by(District.Sector) %>%
summarize(
total_crimes = nrow(data[District.Sector, ])
)
max_crime <- max(district_crimes$total_crimes)
max_district <- district_crimes %>%
filter(total_crimes == max(total_crimes)) %>%
select(District.Sector)
min_crime <- min(district_crimes$total_crimes)
min_district <- district_crimes %>%
filter(total_crimes == min(total_crimes)) %>%
select(District.Sector)
library(dplyr)
library(shiny)
library(ggplot2)
library(dplyr)
library(maps)
library(tidyr)
library(plotly)
library(DT)
library(leaflet)
library(shinythemes)
criminal_data <- data %>%
filter(Event.Clearance.Group == major_crimes)
crime_rate_data <- criminal_data %>%
group_by(year) %>%
summarize(
total_crimes = nrow(data[year, ])
)
crime_rate <- round(sum(crime_rate_data[, 'total_crimes'])/nrow(crime_rate_data))
district_crimes <- criminal_data %>%
group_by(District.Sector) %>%
summarize(
total_crimes = nrow(data[District.Sector, ])
)
max_crime <- max(district_crimes$total_crimes)
max_district <- district_crimes %>%
filter(total_crimes == max(total_crimes)) %>%
select(District.Sector)
min_crime <- min(district_crimes$total_crimes)
min_district <- district_crimes %>%
filter(total_crimes == min(total_crimes)) %>%
select(District.Sector)
data %>%
filter(Event.Clearance.Group == major_crimes)
major_crimes <- c(
"ASSAULTS",
"BURGLARY",
"HOMICIDE",
"NARCOTICS COMPLAINTS",
"PROSTITUTION",
"PROPERTY DAMAGE",
"ROBBERY"
)
data %>%
filter(Event.Clearance.Group == major_crimes)
data %>%
filter(Event.Clearance.Group == major_crimes)
data %>%
filter(Event.Clearance.Group == major_crimes)
criminal_data %>%
group_by(year) %>%
summarize(
total_crimes = nrow(data[year, ])
)
criminal_data <- data %>%
filter(Event.Clearance.Group == major_crimes)
crime_rate_data <- criminal_data %>%
group_by(year) %>%
summarize(
total_crimes = nrow(data[year, ])
)
runApp()
runApp()
shiny::runApp()
runApp()