-
Notifications
You must be signed in to change notification settings - Fork 0
/
00_preparation.R
92 lines (67 loc) · 2.66 KB
/
00_preparation.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
library(tidyverse)
getwd()
#GET DATA####
#lac
lac <- read.csv("data/00_LAC_Covid-19_survey_ES_2020_04_17_14_52_39_293232.csv", na.strings = "n/a", skip = 1)
# na
na <- read.csv("data/00_NA_MMC_NA_Covid19_20200404_03_2020_04_17_14_55_02_577375.csv", na.strings = "n/a", skip = 1)
#inspect####
dim(lac) #158 526
dim(na) #357 393
View(lac)
View(na)
#SELECT VARS####
#lac
lac2 <- lac %>%
select(c(1:3, 15, 16, 27, 28,
83:87, 88:100, 103, 104, 106:111, 113, 114, 115:123, 125, 126,
328:340, 343, 344, 346:351, 353, 354, 356:362, 364, 365, 367:374, 376,
378:381, 383, 385, 386, 387, 388:397, 400, 401, 403:411, 413, 415:419, 422, 424,
425:429, 431, 432, 434, 435:445, 447, 449:455, 457, 458, 460, 461:471, 473,
475:481, 483, 484, 486, 488, 489, 521)) %>%
mutate(Region = as.factor("Latin America"))
dim(lac2) #158 180
View(lac2)
#na
na2 <- na %>%
select(c(1:3, 15, 16, 29, 30,
88:92, 93:105, 107, 108, 110:115, 117, 118, 119:127, 129, 130,
198:210, 212, 213, 215:220, 222, 223, 225:231, 233, 234, 236:243, 245,
247:250, 252, 254, 255, 256, 257:266, 268, 269, 271:279, 281, 283:287, 289, 291,
292:296, 298, 299, 301, 302:312, 314, 316:322, 324, 325, 327, 328:338, 340,
342:348, 350, 351, 353, 355, 356, 388)) %>% #na has no 'none' for Q c6, c10, c18, c20
mutate(Region = as.factor("North Africa"))
dim(na2) #357 180
View(na2)
#create df with colnames
variables_names <- data.frame(lac = colnames(lac2), na = colnames(na2))
View(variables_names)
write.csv(variables_names, "data_outputs/00_variables_names_20200419.csv")
#harmonize colnames (using na as reference)
colnames(lac2)[7] <- "Q9..Monitor.observation..Sex.of.the.respondent."
colnames(lac2)[10] <- "Q32.How.far.do.you.agree.with.the.following.statement..I.am.worried.about.catching.coronavirus.and.its.impact.on.my.health."
colnames(lac2)[11] <- "Q33.How.far.do.you.agree.with.the.following.statement..I.am.worried.about.transmitting.coronavirus."
colnames(lac2)[12] <- "Q34.How.far.do.you.agree.with.the.following.statement..I.know.about.coronavirus.and.how.to.protect.myself.and.others."
#BIND####
data <- rbind(lac2, na2)
dim(data) #515 obs, 180 vars
View(data)
#ADD DURATION####
data <- data %>%
mutate(Duration = X_duration/60) %>%
select(-X_duration)
#final vars names
vars_names <- colnames(data)
View(vars_names)
write.csv(vars_names, "data_outputs/00_final_vars_names_20200419.csv")
#CLEAN####
#cleaning to-do
#date
#colnames
#levels names
#add survey duration
#SAVE DF####
#rda
save(data, file = "rda/00_cleaned_data_20200419.rda")
#csv
write.csv(data, "data_outputs/00_cleaned_data_20200419.csv")