-
Notifications
You must be signed in to change notification settings - Fork 0
/
creating plots, heart disease mortality grouped by social factors.R
65 lines (55 loc) · 3.21 KB
/
creating plots, heart disease mortality grouped by social factors.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
library(readr)
library(dplyr)
library(tidyr)
library(ggplot2)
hrtDisease <- read_csv("./formattedData/formattedAtlasDatawStateSorted.csv")
percentInsured <- read_csv("./formattedData/percentInsured.csv")
obesity <- read_csv("./socialFactorData/obesity % 2019.csv")
internet <- read_csv("./socialFactorData/% without broadband internet, 2016-2020 5year.csv")
diploma <- read_csv("./socialFactorData/% without high school diploma 2016-2020 5year.csv")
diabetes <- read_csv("./socialFactorData/diagnosed diabetes % 2019.csv")
cholesterol <- read_csv("./socialFactorData/high cholesterol % 2019.csv")
incinequality <- read_csv("./socialFactorData/income inequality (gini index), 2016-2020 5year.csv")
income <- read_csv("./socialFactorData/median household income (grouped) 2020.csv")
inactivity <- read_csv("./socialFactorData/physical inactivity % 2019.csv")
poverty <- read_csv("./socialFactorData/poverty % 2020.csv")
smoker <- read_csv("./socialFactorData/smoker status % 2019.csv")
urbanrural <- read_csv("./socialFactorData/urban-rural status 2013.csv")
factorName <- "% Obesity 2019"
socialFactor <- obesity
#remove missing values (-1)
socialFactor <- socialFactor %>% filter(Value>0)
#remove stuff that they don't both have (mostly counties that stopped existing or US territories, etc)
socialFactor <- filter(socialFactor, cnty_fips %in% hrtDisease$cnty_fips)
hrtDisease <- filter(hrtDisease, cnty_fips %in% socialFactor$cnty_fips)
#create histogram of social factor to aid in grouping them
ggplot(socialFactor, aes(x = Value)) + geom_histogram(data=socialFactor)
# groups <- split(socialFactor, cut(dig.lab = 10, socialFactor$Value, 4))
groups <- split(socialFactor, Hmisc::cut2(socialFactor$Value, g=4)) #even sized groups
# special for urban-rural ->
# groups <- split(socialFactor, as.factor(socialFactor$Value))
# names(groups) <- paste(sep = "", "(", substr(names(groups),2,5), "-", substr(names(groups), 7, 10), "%)")
#from the groups, create a series that contains the year, the mean value of heart disease in each group, and the group that the data comes from
plotSeries = data.frame(year=character(), mean=numeric(), groups=character())
years <- names(hrtDisease)[4:17]
yearsNormal <- as.character((2006:2019))
countYear <- 1
for(x in 1:length(groups)){
countYear <- 1
for(year in years){
mean <- mean(filter(hrtDisease, cnty_fips %in% groups[[x]]$cnty_fips)[[year]])[1]
plotSeries <- rbind(plotSeries, data.frame(year=yearsNormal[countYear], mean=mean, group=names(groups)[[x]]))
countYear <- countYear + 1
}
}
cbPalette <- c("#E69F00", "#56B4E9", "#009E73", "#CC79A7", "#F0E442", "#0072B2", "#D55E00", "#999999")
ggplot(plotSeries, aes(x = year, y = mean, group = group, color = group)) + geom_line(linewidth = 2.4) + geom_point(
aes(fill = group),
size = 5,
pch = 21, # Type of point that allows us to have both color (border) and fill.
colour = "#FFFFFF",
stroke = 1 # The width of the border, i.e. stroke.
) + labs(title = paste(sep="", "Average HDM of County-Cohort Groups Based on ", factorName, ", 2006-2018"),
x = "Year",
y = "Age-standardized Mortality Rate per 100,000") + theme(plot.title = element_text(size=9)
) + scale_fill_manual(values=cbPalette) + scale_colour_manual(values=cbPalette)