-
Notifications
You must be signed in to change notification settings - Fork 8
/
MCNA_dry_run.R
128 lines (87 loc) · 5.24 KB
/
MCNA_dry_run.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
# setup
library(dplyr)
library(koboquest) # manage kobo questionnairs
library(kobostandards) # check inputs for inconsistencies
library(xlsformfill) # generate fake data for kobo
library(hypegrammaR) # simple stats 4 complex samples
library(composr) # horziontal operations
source("functions/to_alphanumeric_lowercase.R")
source("functions/analysisplan_factory.R")
#source("./pre-process_strata_names.R")
# load questionnaire inputs
questions <- read.csv("input/kobo_questions.csv",
stringsAsFactors=F, check.names=F)
choices <- read.csv("input/kobo_choices.csv",
stringsAsFactors=F, check.names=F)
# generate data
response <- xlsform_fill(questions,choices,1000)
response_filtered <- response %>%
filter(!is.na(population_group))
# add cluster ids
cluster_lookup_table <- read.csv("input/combined_sample_ids.csv",
stringsAsFactors=F, check.names=F)
response_filtered_w_clusterids <- response_filtered %>%
mutate(strata = paste0(lookup_table$district[match(cluster_location_id,cluster_lookup_table$new_ID)],population_group))
# horizontal operations / recoding
r <- response_filtered_w_clusterids %>%
new_recoding(source=how_much_debt, target=hh_with_debt_value) %>%
recode_to(0.25,where.num.larger.equal = 505000,otherwise.to=0) %>%
new_recoding(target=hh_unemployed) %>%
recode_to(0 ,where=!(is.na(response_filtered$work) | is.na(response_filtered$actively_seek_work))) %>%
recode_to(0.5,where=(work == "no") & (actively_seek_work == "yes")) %>%
new_recoding(source=reasons_for_debt, target=hh_unable_basic_needs) %>%
recode_to(0.25, where.selected.any = c("health","food","education","basic_hh_expenditure"), otherwise.to=0) %>%
end_recoding
r <- r %>% mutate(score_livelihoods = hh_with_debt_value+hh_unemployed+hh_unable_basic_needs)
# vertical operations / aggregation
### .. should/can this move up to dataset generation?
names(r)<-to_alphanumeric_lowercase(names(r))
### .. should/can this move up to loading inputs?
questionnaire <- load_questionnaire(r,questions,choices)
# make analysisplan including all questions as dependent variable by HH type, repeated for each governorate:
analysisplan<-make_analysisplan_all_vars(r,
questionnaire
,independent.variable = "population_group",
repeat.for.variable = "governorate_mcna"
)
### .. should/can this move up to loading inputs?
samplingframe <- load_samplingframe("./input/Strata_clusters_population.csv")
samplingframe_strata <- samplingframe %>%
group_by(stratum) %>%
summarize(population = sum(population))
samplingframe_strata<-as.data.frame(samplingframe_strata)
# this line is dangerous. If we end up with missing strata, they're silently removed.
# could we instead kick out more specifically the impossible district/population group combos?
r <- r %>%
filter(strata %in% samplingframe_strata$stratum)
strata_weight_fun <- map_to_weighting(sampling.frame = samplingframe_strata,
sampling.frame.population.column = "population",
sampling.frame.stratum.column = "stratum",
data.stratum.column = "strata")
r$cluster_id <- paste(r$cluster_location_id,r$population_group,sep = "_")
result <- from_analysisplan_map_to_output(r, analysisplan = analysisplan,
weighting = strata_weight_fun,
cluster_variable_name = "cluster_id",
questionnaire)
result_labeled <- result$results %>% lapply(map_to_labeled,questionnaire)
# exporting only small part of results for speed during testing:
subset_of_results<- rep(FALSE,length(results$results))
subset_of_results[500:700]<-TRUE
some_results<-hypegrammaR:::results_subset(results,logical = subset_of_results)
# not sure if this function should be "user facing" or have some wrappers (@Bouke thoughts?)
# essentially it handles all the looping over different column values as hierarchies.
# then each result is visualised by a function passed here that decides how to render each individual result
# see ?hypegrammaR:::map_to_generic_hierarchical_html
hypegrammaR:::map_to_generic_hierarchical_html(some_results,
render_result_with = hypegrammaR:::from_result_map_to_md_table,
by_analysisplan_columns = c("dependent.var","repeat.var.value"),
by_prefix = c("",""),
level = 2,
questionnaire = questionnaire,
label_varnames = TRUE,
dir = "./output",
filename = "summary_by_dependent_var_then_by_repeat_var.html"
)
browseURL("summary_by_dependent_var_then_by_repeat_var.html")
# not sure this is working correctly.. next on agenda (:
# big_table <- hypegrammaR:::map_to_datamerge(results$results, questionnaire = questionnaire, rows = "repeat.var.value")