-
Notifications
You must be signed in to change notification settings - Fork 0
/
lazy population and employment graphs.R
116 lines (97 loc) · 5.45 KB
/
lazy population and employment graphs.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
library(did)
library(dplyr)
library(readr)
library(Hmisc)
library(ggplot2)
didTable <- read_csv("./formattedData/DidTable.csv")
didTable$TREATED <- as.numeric(didTable$TREATED)
didTable$POP <- exp(didTable$LOGPOP)
didTable$EMPLOY <- exp(didTable$LOGEMPLOY)
as.data.frame(didTable) -> table
years <- 2010:2019
statTable <- data.frame(year = numeric(), meanPop = numeric(), medianPop = numeric(), sdPop = numeric(), meanEmploy = numeric(), medianEmploy = numeric(), group = character())
for(year in years){
# # tCut <- split(filter(table, YEAR == year), cut2(filter(table, YEAR == 2010)$POP, g=10))
# tCut <- split(filter(table, YEAR == year), cut2(filter(table, YEAR == 2010)$POP, c(0, 10000, 100000, 1000000)))
# for(t in 1:length(tCut)){
# mean <- mean(tCut[[t]]$POP)
# median <- median(tCut[[t]]$POP)
# popTable <- popTable %>% add_row(year = year, meanPop = mean, medianPop = median, group = names(tCut)[t])
# }
meanP <- mean(filter(table, YEAR == year)$POP)
medianP <- median(filter(table, YEAR == year)$POP)
sdP <- sd(filter(table, YEAR == year)$POP)
meanE <- mean(filter(table, YEAR == year)$EMPLOY)
medianE <- median(filter(table, YEAR == year)$EMPLOY)
statTable <- statTable %>% add_row(year = year, meanPop = meanP, medianPop = medianP, sdPop = sdP, meanEmploy = meanE, medianEmploy = medianE)
}
statTable$year <- as.character(statTable$year)
# ggplot(statTable, aes(x = year, y = meanPop)) + geom_line()
# ggplot(statTable, aes(x = year, y = medianPop, group = group)) + geom_line()
#
# ggplot(statTable, aes(x = year, y = meanEmploy)) + geom_line()
# ggplot(statTable, aes(x = year, y = medianEmploy, group = group)) + geom_line()
statTable <- statTable %>% filter(year != 2019)
png(filename = 'figures/final paper/figure5a.png', units = 'in', width = 5.95, height = 3.5, res=300, type = c('cairo'))
ggplot(statTable, aes(x = year, y = meanPop, group = 1)) + geom_line(linewidth = 1.5, color = "black") + geom_point(
fill = "black",
size = 3.5,
pch = 21, # Type of point that allows us to have both color (border) and fill.
colour = "#FFFFFF",
stroke = 1 # The width of the border, i.e. stroke.
) + labs(title = paste(sep="", "Average population of US counties, 2010-2018"),
x = "Year",
y = "Average population") + theme(plot.title = element_text(size=14)) + coord_cartesian(ylim = c(70000,120000))
dev.off()
write_csv(statTable %>% select('year', 'meanPop'), 'figures/final paper/figure5adata.csv')
ggplot(statTable, aes(x = year, y = medianPop, group = 1)) + geom_line(linewidth = 2.4, color = "lightblue") + geom_point(
fill = "lightblue",
size = 5,
pch = 21, # Type of point that allows us to have both color (border) and fill.
colour = "#FFFFFF",
stroke = 1 # The width of the border, i.e. stroke.
) + labs(title = paste(sep="", "Median Population of US Counties, 2010-2019"),
x = "Year",
y = "Median Population") + theme(plot.title = element_text(size=14))
png(filename = 'figures/final paper/figure5b.png', units = 'in', width = 5.95, height = 3.5, res=300, type = c('cairo'))
ggplot(statTable, aes(x = year, y = meanEmploy, group = 1)) + geom_line(linewidth = 1.5, color = "black") + geom_point(
fill = "black",
size = 3.5,
pch = 21, # Type of point that allows us to have both color (border) and fill.
colour = "#FFFFFF",
stroke = 1 # The width of the border, i.e. stroke.
) + labs(title = paste(sep="", "Average number of people employed in US counties, 2010-2018"),
x = "Year",
y = "Average number employed") + theme(plot.title = element_text(size=12)) + coord_cartesian(ylim = c(70000,110000))
dev.off()
write_csv(statTable %>% select('year', 'meanEmploy'), 'figures/final paper/figure5bdata.csv')
ggplot(statTable, aes(x = year, y = medianEmploy, group = 1)) + geom_line(linewidth = 2.4, color = "lightblue") + geom_point(
fill = "lightblue",
size = 5,
pch = 21, # Type of point that allows us to have both color (border) and fill.
colour = "#FFFFFF",
stroke = 1 # The width of the border, i.e. stroke.
) + labs(title = paste(sep="", "Median Number of People Employed of US Counties, 2010-2019"),
x = "Year",
y = "Median Number Employed") + theme(plot.title = element_text(size=14))
#find how many counties went up in population, how many went down
startandend <- filter(table, YEAR == 2010 | YEAR == 2019)
up <- 0
theSame <- 0
down <- 0
change <- data.frame(GEO_ID = numeric(), change = numeric(), changePercent = numeric())
for(i in seq(1, length(startandend$GEO_ID), 2)){
x <- startandend$POP[i+1] - startandend$POP[i]
y <- (startandend$POP[i+1] - startandend$POP[i])/startandend$POP[i] * 100
change <- change %>% add_row(GEO_ID = startandend$GEO_ID[i], change = x, changePercent = y)
}
change <- arrange(change, change)
change$num <- 1:length(change$GEO_ID)
ggplot(change, aes(x = num, y=change)) + geom_point()
change <- arrange(change, changePercent)
change$num <- 1:length(change$GEO_ID)
ggplot(change, aes(x = num, y=changePercent, color = changePercent)) + geom_point(
) + labs(title = "% Change of Population in Individual US Counties from 2010 to 2019 ",
x = "Sorted Ranking of Change from Lowest Change to Highest Change",
y = "Percent Change",
color = "Percent Change of Population") + theme(plot.title = element_text(size=14)) + scale_color_gradient2(low = "darkred", high="darkgreen", mid = "lightyellow", midpoint = 0)