-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdatacleaning.R
30 lines (25 loc) · 1.42 KB
/
datacleaning.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
state_abbreviations = read.csv(file = './data/StateCodeData.csv', stringsAsFactors = T)
state_file_list = paste('./data/State Populations/', list.files(path = './data/State Populations/', pattern = '*.csv'), sep='')
#Define a function to extract the state abbreviation from a filepath (**** from './names/yob****.txt)
extract_state = function(filepath){
substr(filepath, 65, 66)
}
#Define a function to read a single file and returns an appropriately formatted dataframe
read_single_file = function(file){
temp_df = read.csv(file=file, header=T, col.names = c('Date', 'Population in 1000s'), stringsAsFactors = T)
temp_df$state = extract_state(file) #Calculates the state from the file name and stores it in a column
temp_df$Year = format(as.Date(temp_df$Date, "%Y-%m-%d"), "%Y")
return(temp_df)
}
#Create a single data frame from all .txt files in the data folder
aggregated_population_df =
lapply(state_file_list, read_single_file) %>%
bind_rows(.)
#Merge data frame so that state name exists in the dataframe
aggregated_population_df =
merge(x = aggregated_population_df, y = state_abbreviations, by.x = "state", by.y = "Code") %>%
select(., Year, Population.in.1000s, State = State) %>%
filter(., Year >= 1980 & Year <= 2014)
#Write the file to a CSV file to be used in the Shiny App
write.csv(aggregated_population_df, file = "./data/state_populations_by_year.csv", row.names=F)
View(aggregated_population_df)