diff --git a/Data/iterating_rm_countries_filt.rdata b/Data/iterating_rm_countries_filt.rdata index 9809f5d..5b5575a 100644 Binary files a/Data/iterating_rm_countries_filt.rdata and b/Data/iterating_rm_countries_filt.rdata differ diff --git a/R/Synth_create_sps.R b/R/Synth_create_sps.R index 0ec8991..f662526 100644 --- a/R/Synth_create_sps.R +++ b/R/Synth_create_sps.R @@ -301,7 +301,9 @@ save( iterateCountries <- function(data = synthData_u18[,1:4], ccodes = u_18_ccodes, n=4, start = 1985, pred = "rate", ...){ -cc <- ccodes +require(dplyr) + require(Synth) + cc <- ccodes it_c <- list() @@ -338,7 +340,7 @@ return(it_c) } -itco_u18_sp <- iterateCountries(Margin.ipop=.005,Sigf.ipop=7,Bound.ipop=6) +itco_u18_sp <- iterateCountries(Margin.ipop=.005,Sigf.ipop=7,Bound.ipop=6, ccodes = u_18_ccodes_f) itco_u18_sp_1990 <- iterateCountries(data = synthData_u18[,1:4], ccodes = u_18_ccodes, @@ -358,8 +360,76 @@ save(itco_u18_sp, itco_u18_sp_1990, itco_u20_sp, file = "Data/iterating_rm_count itco_u18_filt <- iterateCountries(synthData_u18_filt, u_18_ccodes_f, start = 1990, pred = "rate") itco_u20_filt <- iterateCountries(synthData_u20_filt, u_20_ccodes_f, start = 1990, pred = "pRate") + + +# iterate remove and replace ---------------------------------------------- + + +iterate_singular <- function(data = synthData_u18_filt, + ccodes = u_18_ccodes_f, + predictors = NULL, + dependent = "rate", + spreds = sp_u18_filt) { + out <- tibble() + + for (i in ccodes$Code) { + if (ccodes$Country[ccodes$Code==i]=="England and Wales"){ + next + } else { + + cc <- ccodes[ccodes$Code != i,] + + dp <- dataprep( + foo = data, + predictors = predictors, + special.predictors = spreds, + time.predictors.prior = 1990:1998, + dependent = dependent, + unit.variable = "Code", + unit.names.variable = "Country", + time.variable = "Year", + treatment.identifier = cc$Code[cc$Country =="England and Wales"], + controls.identifier = cc$Code[cc$Country !="England and Wales"], + time.optimize.ssr = 1990:1998, + time.plot = 1990:2013 + ) + + # md <- predvalues_synth(dp, synth_outputs = FALSE, ...) + + so <- synth(dp) + st <- synth.tab(so, dp) + + synthC <- dp$Y0 %*% so$solution.w + + md <- tibble(Year = as.numeric(rownames(dp$Y1)), Treated = dp$Y1[,1], Synthetic = synthC[,1]) %>% + gather("Group", "Rate", -1) + + mspe <- so$loss.v[1,1] + + w <- st$tab.w + # v <- tibble(Pred = row.names(st$tab.v), v_weight = as.numeric(st$tab.v)) + + gaps <- md %>% + spread(Group, Rate) %>% + mutate(Gap = Treated - Synthetic) + +out <- out %>% bind_rows(tibble(w_weights = list(w), + gaps = list(gaps), + mspe = mspe, + removed = ccodes$Country[ccodes$Code==i])) + + } + } + return(out) +} + +it_u18_all_sing <- iterate_singular() +it_u20_all_sing <- iterate_singular(data= synthData_u20_filt, spreds = sp_u20_filt, dependent = "pRate", ccodes = u_20_ccodes_f) + save( itco_u18_filt, itco_u20_filt, + it_u18_all_sing, + it_u20_all_sing, file = "Data/iterating_rm_countries_filt.rdata" ) diff --git a/R/Synth_functions.R b/R/Synth_functions.R index d39a7f2..8c1b42a 100644 --- a/R/Synth_functions.R +++ b/R/Synth_functions.R @@ -13,18 +13,18 @@ gg_synth <- function(dp = NULL, md = NULL, yr = 1999, post = FALSE, mspe = NULL, if (is.null(md)&&!is.null(dp)) { - agegrp <- gsub("^.*(\\d{2}).*$","\\1", deparse(substitute(dp))) - so <- synth(dp) - synthC <- dp$Y0 %*% so$solution.w - - md <- tibble(Year = as.numeric(rownames(dp$Y1)), Treated = dp$Y1[,1], Synthetic = synthC[,1]) %>% - gather("Group", "Rate", -1) - if (is.null(mspe) & mspeOptim){ - mspe <- so$loss.v[1] - } + agegrp <- gsub("^.*(\\d{2}).*$","\\1", deparse(substitute(dp))) + so <- synth(dp) + synthC <- dp$Y0 %*% so$solution.w + + md <- tibble(Year = as.numeric(rownames(dp$Y1)), Treated = dp$Y1[,1], Synthetic = synthC[,1]) %>% + gather("Group", "Rate", -1) + if (is.null(mspe) & mspeOptim){ + mspe <- so$loss.v[1] + } } else { - agegrp <- gsub("^.*(\\d{2}).*$","\\1", deparse(substitute(md))) + agegrp <- gsub("^.*(\\d{2}).*$","\\1", deparse(substitute(md))) } if(post){ @@ -39,14 +39,14 @@ gg_synth <- function(dp = NULL, md = NULL, yr = 1999, post = FALSE, mspe = NULL, } if(!is.null(mspe)){ - mspe <- signif(mspe, 3) + mspe <- signif(mspe, 3) } else { - mspe <- pre_MSPE(md) + mspe <- pre_MSPE(md) } plot <- ggplot(md, aes(Year, Rate, col = Group)) + # no linetype change - # plot <- ggplot(md, aes(Year, Rate, col = Group, linetype = Group)) + + # plot <- ggplot(md, aes(Year, Rate, col = Group, linetype = Group)) + geom_line(size = 2) + theme_sphsu_light() + ylab(paste0("Under-", agegrp, " rate (per 1,000 women)")) + @@ -63,7 +63,7 @@ gg_synth <- function(dp = NULL, md = NULL, yr = 1999, post = FALSE, mspe = NULL, scale_x_continuous(limits = c(NA, xmax)) + scale_y_continuous(limits = c(0, NA)) + labs(subtitle = paste0("Pre-intervention MSPE = ", mspe)) - + return(plot) } @@ -72,23 +72,23 @@ predvalues_synth <- function(dp, synth_outputs = TRUE, yr = 1999, ...) { require(dplyr) require(Synth) -so <- synth(dp, ...) + so <- synth(dp, ...) synthC <- dp$Y0 %*% so$solution.w if (synth_outputs){ - so_name <- gsub("^[a-z]+_(.*$)", "so_\\1", deparse(substitute(dp))) - st_name <- gsub("^[a-z]+_(.*$)", "st_\\1", deparse(substitute(dp))) - print(paste("outputting", so_name, "and", st_name)) - st <- synth.tab(so, dp) - - assign(so_name, so, envir = .GlobalEnv) - assign(st_name, st, envir = .GlobalEnv) + so_name <- gsub("^[a-z]+_(.*$)", "so_\\1", deparse(substitute(dp))) + st_name <- gsub("^[a-z]+_(.*$)", "st_\\1", deparse(substitute(dp))) + print(paste("outputting", so_name, "and", st_name)) + st <- synth.tab(so, dp) + + assign(so_name, so, envir = .GlobalEnv) + assign(st_name, st, envir = .GlobalEnv) } df <- tibble(Year = as.numeric(rownames(dp$Y1)), Treated = dp$Y1[,1], Synthetic = synthC[,1]) %>% gather("Group", "Rate", -1) -return(df) + return(df) } @@ -111,8 +111,8 @@ printCoefficients <- function(md = NULL, model = NULL){ P = as.numeric(`Pr(>|t|)`)) %>% select(Coefficient, Estimate, SE, P) - print(coefs) - + print(coefs) + print("Confidence intervals:") coefs %>% transmute(Coefficient = Coefficient, @@ -129,7 +129,7 @@ generatePlacebos <- function(data, time.plot = 1985:2013, dependent = "rate", ...) { -require(dplyr) + require(dplyr) require(Synth) require(tidyr) @@ -144,22 +144,22 @@ require(dplyr) placebos <- data.frame() for(i in 1:nrow(ccodes)){ - + dp <- dataprep( - data, - predictors = predictors, - predictors.op = "mean", - special.predictors = special.predictors, - time.predictors.prior = time.plot[1]:1998, - dependent = dependent, - unit.variable = "Code", - unit.names.variable = "Country", - time.variable = "Year", - treatment.identifier = ccodes$Code[i], - controls.identifier = ccodes$Code[-i], - time.optimize.ssr = time.optimize.ssr, - time.plot = time.plot - ) + data, + predictors = predictors, + predictors.op = "mean", + special.predictors = special.predictors, + time.predictors.prior = time.plot[1]:1998, + dependent = dependent, + unit.variable = "Code", + unit.names.variable = "Country", + time.variable = "Year", + treatment.identifier = ccodes$Code[i], + controls.identifier = ccodes$Code[-i], + time.optimize.ssr = time.optimize.ssr, + time.plot = time.plot + ) # md <- predvalues_synth(dp, FALSE, ...) so <- synth(dp, ...) @@ -173,13 +173,13 @@ require(dplyr) gaps <- md %>% spread(Group, Rate) %>% - mutate(Country = ccodes$Country[i], - Gap = Treated - Synthetic, - pre_mspe = mspe) + mutate(Country = ccodes$Country[i], + Gap = Treated - Synthetic, + pre_mspe = mspe) placebos <- bind_rows(placebos, gaps) -} - return(placebos) + } + return(placebos) } pre_MSPE <- function (md){ @@ -194,9 +194,9 @@ pre_MSPE <- function (md){ sPredText <- function(dp) { paste("Prediction periods:", - map(dp$tag$special.predictors, pluck(2)) %>% - map(function(x) paste0(min(x), "-", max(x))) %>% - str_flatten(collapse = "; ") + map(dp$tag$special.predictors, pluck(2)) %>% + map(function(x) paste0(min(x), "-", max(x))) %>% + str_flatten(collapse = "; ") ) } @@ -234,9 +234,9 @@ gg_gaps <- function(md, pl, dp = NULL, mspe_limit = NULL, title = FALSE, subtitl rateDiff <- function(md, age = "under 18") { pop <- synthData %>% filter(Year > 1998, - Year< 2014, - Country == "England and Wales", - grepl(age, .$agegrp, ignore.case = TRUE)) %>% + Year< 2014, + Country == "England and Wales", + grepl(age, .$agegrp, ignore.case = TRUE)) %>% select(Year, sumPops) df <- md %>% @@ -251,7 +251,7 @@ rateDiff <- function(md, age = "under 18") { list <- list(tot_rate = round(df[[1]], 2), tot_diff = round(df[[2]], 0) %>% format(big.mark = ','), mean_pop = round(mean(pop$sumPops), 0) %>% format(big.mark = ',') - ) + ) return(list) } @@ -278,40 +278,40 @@ interpolateAb <- function(country, data = synth_data_plus_ab){ } testSynthIterations_single <- function(yrs = 1990:1998, - pred = "pRate", - data = synthData_u20, - ccodes = u_20_ccodes, - n = 5, - predictors = NULL, - time.optimise = 1990:1998, - ...) { - + pred = "pRate", + data = synthData_u20, + ccodes = u_20_ccodes, + n = 5, + predictors = NULL, + time.optimise = 1990:1998, + ...) { + require(gtools) require(dplyr) require(Synth) require(stringr) - + data <- data.frame(data) x <- 1:n - + combos <- combinations(length(x), length(yrs), x, repeats.allowed = TRUE) - + i <- 1 - + for (i in 1:nrow(combos)){ combos[i,] <- as.numeric(as.factor(as.character(combos[i,]))) } - + combos <- unique(combos) - + mspes <- c() sps <- c() - + i <- 1 - + for (i in 1:nrow(combos)) { print(paste0("iteration ", i, "/", nrow(combos))) - + special.preds <- list( a = list(pred, yrs = yrs[combos[i,] == 1], "mean"), b = list(pred, yrs = yrs[combos[i,] == 2], "mean"), @@ -319,7 +319,7 @@ testSynthIterations_single <- function(yrs = 1990:1998, d = list(pred, yrs = yrs[combos[i,] == 4], "mean"), e = list(pred, yrs = yrs[combos[i,] == 5], "mean") ) - + dp <- dataprep( foo = data.frame(data %>% filter(Year >= yrs[1])), predictors = predictors, @@ -334,39 +334,39 @@ testSynthIterations_single <- function(yrs = 1990:1998, time.optimize.ssr = time.optimise, time.plot = yrs ) - + # md <- predvalues_synth(dp, synth_outputs = FALSE, ...) - + so <- synth(dp, ...) synthC <- dp$Y0 %*% so$solution.w - + md <- tibble(Year = as.numeric(rownames(dp$Y1)), Treated = dp$Y1[,1], Synthetic = synthC[,1]) %>% gather("Group", "Rate", -1) - + mspes[i] <- so$loss.v - + sps[[i]] <-special.preds[map_lgl(special.preds, ~ sum(.$yrs) > 0)] - - - + + + } - - + + tb <- tibble(iteration = 1:nrow(combos), pattern = NA, mspe = mspes, sPred = sps, groups = NA) - + i <- 1 for (i in 1:nrow(combos)){ tb$pattern[i] <- str_flatten(combos[i,], collapse = ", ") tb$groups[i] <- length(tb$sPred[[i]]) } - - + + return(tb) - + } testSynthIterations <- function(yrs = 1990:1998, @@ -419,68 +419,68 @@ testSynthIterations <- function(yrs = 1990:1998, tb <- foreach (i=1:nrow(combos), .packages = c("Synth", "dplyr", "purrr", "tidyr", "tcltk", "parallel"), .combine = bind_rows) %dopar% { - + if(!exists("pb")) pb <- tkProgressBar("Iterations", min = 1, max = nrow(combos)) setTkProgressBar(pb, i) - special.preds <- list( - a = list(pred, yrs = yrs[combos[i,] == 1], "mean"), - b = list(pred, yrs = yrs[combos[i,] == 2], "mean"), - c = list(pred, yrs = yrs[combos[i,] == 3], "mean"), - d = list(pred, yrs = yrs[combos[i,] == 4], "mean"), - e = list(pred, yrs = yrs[combos[i,] == 5], "mean") - ) - - dp <- dataprep( - foo = data.frame(data %>% filter(Year >= yrs[1])), - predictors = predictors, - special.predictors = special.preds[map_lgl(special.preds, ~ sum(.$yrs) > 0)], - time.predictors.prior = yrs[1]:1998, - dependent = dependent, - unit.variable = "Code", - unit.names.variable = "Country", - time.variable = "Year", - treatment.identifier = ccodes$Code[ccodes$Country =="England and Wales"], - controls.identifier = ccodes$Code[ccodes$Country !="England and Wales"], - time.optimize.ssr = time.optimise, - time.plot = yrs[1]:2013 - ) - - # md <- predvalues_synth(dp, synth_outputs = FALSE, ...) - - so <- synth(dp, ...) - st <- synth.tab(so, dp) - - synthC <- dp$Y0 %*% so$solution.w - - md <- tibble(Year = as.numeric(rownames(dp$Y1)), Treated = dp$Y1[,1], Synthetic = synthC[,1]) %>% - gather("Group", "Rate", -1) - - mspe <- so$loss.v[1,1] - - w <- st$tab.w - v <- tibble(Pred = row.names(st$tab.v), v_weight = as.numeric(st$tab.v)) - - sps <- special.preds[map_lgl(special.preds, ~ sum(.$yrs) > 0)] - - gaps <- md %>% - spread(Group, Rate) %>% - mutate(iteration = i, - groups = length(sps), - Gap = Treated - Synthetic) - - if(i>=(nrow(combos)-8)) {close(pb)} - - tibble(iteration = i, - pattern = NA, - mspe, - sPred = list(sps), - w_weights = list(w), - v_weights = list(v), - gaps = list(gaps), - groups = NA) - - } + special.preds <- list( + a = list(pred, yrs = yrs[combos[i,] == 1], "mean"), + b = list(pred, yrs = yrs[combos[i,] == 2], "mean"), + c = list(pred, yrs = yrs[combos[i,] == 3], "mean"), + d = list(pred, yrs = yrs[combos[i,] == 4], "mean"), + e = list(pred, yrs = yrs[combos[i,] == 5], "mean") + ) + + dp <- dataprep( + foo = data.frame(data %>% filter(Year >= yrs[1])), + predictors = predictors, + special.predictors = special.preds[map_lgl(special.preds, ~ sum(.$yrs) > 0)], + time.predictors.prior = yrs[1]:1998, + dependent = dependent, + unit.variable = "Code", + unit.names.variable = "Country", + time.variable = "Year", + treatment.identifier = ccodes$Code[ccodes$Country =="England and Wales"], + controls.identifier = ccodes$Code[ccodes$Country !="England and Wales"], + time.optimize.ssr = time.optimise, + time.plot = yrs[1]:2013 + ) + + # md <- predvalues_synth(dp, synth_outputs = FALSE, ...) + + so <- synth(dp, ...) + st <- synth.tab(so, dp) + + synthC <- dp$Y0 %*% so$solution.w + + md <- tibble(Year = as.numeric(rownames(dp$Y1)), Treated = dp$Y1[,1], Synthetic = synthC[,1]) %>% + gather("Group", "Rate", -1) + + mspe <- so$loss.v[1,1] + + w <- st$tab.w + v <- tibble(Pred = row.names(st$tab.v), v_weight = as.numeric(st$tab.v)) + + sps <- special.preds[map_lgl(special.preds, ~ sum(.$yrs) > 0)] + + gaps <- md %>% + spread(Group, Rate) %>% + mutate(iteration = i, + groups = length(sps), + Gap = Treated - Synthetic) + + if(i>=(nrow(combos)-8)) {close(pb)} + + tibble(iteration = i, + pattern = NA, + mspe, + sPred = list(sps), + w_weights = list(w), + v_weights = list(v), + gaps = list(gaps), + groups = NA) + + } # tb <- tibble(iteration = 1:nrow(combos), @@ -496,8 +496,8 @@ testSynthIterations <- function(yrs = 1990:1998, tb$pattern[i] <- str_flatten(combos[i,], collapse = ", ") tb$groups[i] <- length(tb$sPred[[i]]) } - - + + return(tb) registerDoSEQ() } @@ -515,14 +515,14 @@ synthPrep <- function(data, predictors = NULL, special.predictors = NULL, ...){ - + start <- min(data$Year) - -ccodes <- data %>% - select(Code, Country) %>% - arrange(Code) %>% - unique() + + ccodes <- data %>% + select(Code, Country) %>% + arrange(Code) %>% + unique() if(is.null(time.predictors.prior)){ time.predictors.prior <- start:1998 @@ -557,11 +557,11 @@ ccodes <- data %>% synthC <- dp$Y0 %*% so$solution.w mspe_lim <- so$loss.v[1] - -md <- tibble(Year = as.numeric(rownames(dp$Y1)), Treated = dp$Y1[,1], Synthetic = synthC[,1]) %>% - gather("Group", "Rate", -1) - + md <- tibble(Year = as.numeric(rownames(dp$Y1)), Treated = dp$Y1[,1], Synthetic = synthC[,1]) %>% + gather("Group", "Rate", -1) + + assign(paste0("dp_", grp), dp, envir = .GlobalEnv) assign(paste0("so_", grp), so, envir = .GlobalEnv) assign(paste0("st_", grp), st, envir = .GlobalEnv) @@ -690,8 +690,8 @@ plotIterations <- function(iteration = it_u18_rateSp, labels = FALSE) { if(labels){ gaps <- gaps + - geom_line(size = 1, alpha = 0.2) + - geom_line(data = label_pos, alpha = 1, size = 2) + + geom_line(size = 1, alpha = 0.2) + + geom_line(data = label_pos, alpha = 1, size = 2) + geom_text_repel(data = label_pos %>% filter(Year == 1998), aes(x = 1998, y = Gap, label = unit.names), hjust = 0, direction = "y", @@ -717,7 +717,7 @@ plotIterations <- function(iteration = it_u18_rateSp, labels = FALSE) { gg_pre_postMSPE <- function(md, pl){ -p <- md %>% + p <- md %>% spread(Group, Rate) %>% mutate(Gap = Treated - Synthetic, Country = "England and Wales") %>% @@ -733,8 +733,8 @@ p <- md %>% ggplot(aes(ratio)) + geom_histogram(fill = sphsu_cols("Cobalt"), col = "darkgrey", bins = 60) + theme_minimal() + - theme(panel.grid = element_blank()) - + theme(panel.grid = element_blank()) + ggp <- ggplot_build(p) ytop <- max(ggp[["data"]][[1]][["count"]]) @@ -750,70 +750,70 @@ p <- md %>% # iterating through removal of top-weighted countries -------------------------------------------------------- gg_iterateCountries <- function(itco, jitter = FALSE, n = 10, float_labs = FALSE) { -require(purrr) + require(purrr) require(ggrepel) require(ggplot2) require(dplyr) - -if(jitter){ - height <- itco %>% + + if(jitter){ + height <- itco %>% + reduce(bind_rows) %>% + top_n(n, -mspe) %>% + summarise(h = max(mspe)) %>% + pull()/50 + } else { + height <- 0 + } + + order_co <- + itco %>% map( ~ select(.x, label)) %>% reduce(bind_rows) %>% - top_n(n, -mspe) %>% - summarise(h = max(mspe)) %>% - pull()/50 -} else { - height <- 0 -} - -order_co <- - itco %>% map( ~ select(.x, label)) %>% - reduce(bind_rows) %>% - pull() - -df <- itco %>% - reduce(bind_rows) %>% - top_n(n, -mspe) %>% - # filter(mspe<5*min(mspe)) %>% - select(mspe, unit.names, label, gaps) %>% - unnest(gaps) %>% - mutate(label = factor(label, levels = order_co)) %>% - mutate(mgrp = as.numeric(factor(signif(mspe, 2)))) %>% - group_by(mgrp) %>% - mutate(mems = n()/24, - member = as.numeric(factor(unit.names)), - adjust = -(mems+1)/2+member) %>% - ungroup() %>% - mutate(nGap = Gap + 0.005*adjust*max(Gap)) - -plot <- df %>% - ggplot(aes(Year, nGap)) + - geom_line(size = 1, aes(col = label), position = position_jitter(h=height, w = 0)) + - theme_minimal() + - theme(panel.grid = element_blank()) + - geom_vline(xintercept = 1998.5, linetype = "dashed", col = "grey") + - geom_segment(x = min(itco[[1]]$gaps[[1]]$Year), xend = 2013, y = 0, yend = 0, col = "black") + - scale_colour_sphsu(name = "Top weighted country") + - ylab("Gap = Treated - Synthetic Control") - -if(float_labs){ - labs <- df %>% - filter(Year == 2013) %>% - select(Year, nGap, label) - plot <- - plot + - theme(legend.position = "none") + - coord_cartesian(clip = "off") + - geom_text_repel(data = labs, - aes(x = (Year), y = nGap, label = label), - hjust = 0, - direction = "y", - nudge_x = 0.8, - xlim = c(NA, 2030) - ) + - theme(plot.margin = unit(c(0,10,0,0), "cm")) + pull() + + df <- itco %>% + reduce(bind_rows) %>% + top_n(n, -mspe) %>% + # filter(mspe<5*min(mspe)) %>% + select(mspe, unit.names, label, gaps) %>% + unnest(gaps) %>% + mutate(label = factor(label, levels = order_co)) %>% + mutate(mgrp = as.numeric(factor(signif(mspe, 2)))) %>% + group_by(mgrp) %>% + mutate(mems = n()/24, + member = as.numeric(factor(unit.names)), + adjust = -(mems+1)/2+member) %>% + ungroup() %>% + mutate(nGap = Gap + 0.005*adjust*max(Gap)) + + plot <- df %>% + ggplot(aes(Year, nGap)) + + geom_line(size = 1, aes(col = label), position = position_jitter(h=height, w = 0)) + + theme_minimal() + + theme(panel.grid = element_blank()) + + geom_vline(xintercept = 1998.5, linetype = "dashed", col = "grey") + + geom_segment(x = min(itco[[1]]$gaps[[1]]$Year), xend = 2013, y = 0, yend = 0, col = "black") + + scale_colour_sphsu(name = "Top weighted country") + + ylab("Gap = Treated - Synthetic Control") + + if(float_labs){ + labs <- df %>% + filter(Year == 2013) %>% + select(Year, nGap, label) + plot <- + plot + + theme(legend.position = "none") + + coord_cartesian(clip = "off") + + geom_text_repel(data = labs, + aes(x = (Year), y = nGap, label = label), + hjust = 0, + direction = "y", + nudge_x = 0.8, + xlim = c(NA, 2030) + ) + + theme(plot.margin = unit(c(0,10,0,0), "cm")) -} - -return(plot) - + } + + return(plot) + } diff --git a/Synth-iterations-report.rmd b/Synth-iterations-report.rmd index dead381..13b2389 100644 --- a/Synth-iterations-report.rmd +++ b/Synth-iterations-report.rmd @@ -1,18 +1,18 @@ --- title: "Model-testing iterations" author: "Andrew Baxter" -date: "19/09/2019" +date: "`r Sys.Date()`" output: - html_document - # word_document: - # fig_height: 6 - # fig_width: 8 - # reference_docx: report_template.docx + # html_document + word_document: + fig_height: 6 + fig_width: 8 + reference_docx: C:\Users\2286432b\OneDrive - University of Glasgow\Teenage pregnancy - docs in development\ITS and Synth paper files\Appendices.docx always_allow_html: yes --- ```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = FALSE, message = FALSE, error = FALSE, warning = FALSE, fig.width = 10, fig.height = 7) +knitr::opts_chunk$set(echo = FALSE, message = FALSE, error = FALSE, warning = FALSE, fig.width = 10, fig.height = 6, dpi = 300) library(dplyr) library(svglite) @@ -21,7 +21,7 @@ library(ggplot2) library(SPHSUgraphs) library(purrr) library(ggrepel) -load('Data/synth_data_b.rdata') # outputted from 'Synth_data.R' +load('Data/synth_data_c.rdata') # outputted from 'Synth_data.R' load("Data/filtered_itsp.rdata") # outputted from 'Synth_create_sps.R' load("Data/iterating_rm_countries_filt.rdata") source('R/Synth_functions.R') @@ -29,7 +29,7 @@ source('R/Synth_functions.R') ``` -# Under-18 pregnancy - no added predictors +## Under-18 pregnancy - no added predictors Plotting MSPEs by iteration and gaps for all iterations. @@ -38,7 +38,7 @@ gr_it_u18_filt <- plotIterations(it_u18_filt) %>% annotate_figure(top = text_gr gr_it_u18_filt ``` -## Top countries +### Top countries Differing iterations produced different country weightings. I exctracted the top country for each iteration and plotted the lowest MSPE for the top four countries: @@ -47,32 +47,101 @@ gr_tops_u18_filt <- plotIterations(it_u18_filt, labels = TRUE) %>% annotate_fig gr_tops_u18_filt ``` -## Removing top countries +### Removing top countries -A further iterative cycle constructed sequential synthetic controls, removing the previous top-weighted country to test for over-reliance on very few abnormal countries. Labels represent the top-weighted country in each analysis by weight and pre-itervention MSPE: +A further iterative cycle constructed sequential synthetic controls, removing the previous top-weighted country to test for over-reliance on very few abnormal countries. Labels represent the top-weighted country in each analysis by weight and pre-intervention MSPE: ```{r itco_u18_sp} gr_itco_u18_filt <- gg_iterateCountries(itco_u18_filt, float_labs = TRUE) gr_itco_u18_filt ``` -# Under-20 - no added predictors + + +```{r it_u18_sing_plot, include = FALSE} +df <- it_u18_all_sing %>% + # mutate(top_w = map_chr(weights, ~ + # arrange(.x, desc(w.weights)) %>% + # head(1) %>% + # mutate(unit.names = as.character(unit.names)) %>% + # pull(unit.names))) + unnest(gaps) %>% + mutate(mspe = signif(mspe, 2), + label = ifelse(Year == 2013, paste0(removed, " removed; MSPE = ", mspe), NA)) + +df %>% + ggplot(aes(Year, Gap)) + + geom_line(size = 1, aes(col = removed)) + + theme_minimal() + + theme(panel.grid = element_blank()) + + geom_vline(xintercept = 1998.5, linetype = "dashed", col = "grey") + + geom_segment(x = 1990, xend = 2013, y = 0, yend = 0, col = "black") + + scale_colour_sphsu(name = "Country removed") + + ylab("Gap = Treated - Synthetic Control") + + theme(legend.position = "none") + + coord_cartesian(clip = "off") + + geom_text_repel(aes(x = Year, y = Gap, label = label), + hjust = 0, + direction = "y", + nudge_x = 0.8, + xlim = c(NA, 2030) + ) + + theme(plot.margin = unit(c(0,10,0,0), "cm")) + +``` + + +## Under-20 - no added predictors ```{r it_u20_plots} gr_it_u20_filt <- plotIterations(it_u20_filt) %>% annotate_figure(top = text_grob("Under-20, no predictors - year iterations")) gr_it_u20_filt ``` -## Top Countries +### Top Countries ```{r it_u20_tops} gr_tops_u20_filt <- plotIterations(it_u20_filt, labels = TRUE) %>% annotate_figure(top = text_grob("Under-20, no predictors - year iterations")) gr_tops_u20_filt ``` -## Removing Top Countries +### Removing Top Countries ```{r itco_u20_filt} gr_itco_u20_filt <- gg_iterateCountries(itco_u20_filt, float_labs = TRUE) gr_itco_u20_filt +``` + + + +```{r it_u20_sing_plot, include = FALSE} +df <- it_u20_all_sing %>% + # mutate(top_w = map_chr(weights, ~ + # arrange(.x, desc(w.weights)) %>% + # head(1) %>% + # mutate(unit.names = as.character(unit.names)) %>% + # pull(unit.names))) + unnest(gaps) %>% + mutate(mspe = signif(mspe, 2), + label = ifelse(Year == 2013, paste0(removed, " removed; MSPE = ", mspe), NA)) + +df %>% + ggplot(aes(Year, Gap)) + + geom_line(size = 1, aes(col = removed)) + + theme_minimal() + + theme(panel.grid = element_blank()) + + geom_vline(xintercept = 1998.5, linetype = "dashed", col = "grey") + + geom_segment(x = 1990, xend = 2013, y = 0, yend = 0, col = "black") + + scale_colour_sphsu(name = "Country removed") + + ylab("Gap = Treated - Synthetic Control") + + theme(legend.position = "none") + + coord_cartesian(clip = "off") + + geom_text_repel(aes(x = Year, y = Gap, label = label), + hjust = 0, + direction = "y", + nudge_x = 0.8, + xlim = c(NA, 2030) + ) + + theme(plot.margin = unit(c(0,10,0,0), "cm")) + ``` \ No newline at end of file diff --git a/Synth-report-updated.Rmd b/Synth-report-updated.Rmd index 6ebe905..959f198 100644 --- a/Synth-report-updated.Rmd +++ b/Synth-report-updated.Rmd @@ -1,7 +1,7 @@ --- title: "Synth reporting" author: "Andrew Baxter" -date: "27/01/2019" +date: "`r Sys.Date()`" output: # pdf_document: # fig_height: 5 @@ -18,7 +18,7 @@ output: # word_document: # fig_height: 6 # fig_width: 8 - # reference_docx: report_template.docx + # reference_docx: C:\Users\2286432b\OneDrive - University of Glasgow\Teenage pregnancy - docs in development\ITS and Synth paper files\Appendices.docx always_allow_html: yes params: save_graphs: @@ -817,7 +817,7 @@ rank_u18 <- p18$plot$data %>% select(Country, ratio, rank) ``` -In placebo-country tests, England and Wales ranked `r rank_u20 %>% filter(Country == "England and Wales") %>% pull(rank)` out of `r nrow(rank_u20)` countries by pre/post-MSPE ratio. +In placebo-country tests, England and Wales ranked `r rank_u18 %>% filter(Country == "England and Wales") %>% pull(rank)` out of `r nrow(rank_u18)` countries by pre/post-MSPE ratio. ```{r pp_u18_all} @@ -839,7 +839,7 @@ synthPrep(sd_noScot %>% filter(Country != "New Zealand"), append(sp_u20_edu) %>% append(sp_u20_gdp) %>% append(sp_u20_urb), - time.optimise.ssr = 1996:1998, + time.optimise.ssr = opt_y1:1998, time.plot = 1990:2013, time.predictors.prior = 1990:1998 ) @@ -847,7 +847,7 @@ synthPrep(sd_noScot %>% filter(Country != "New Zealand"), ``` ```{r md_u20_all_graph} -gr_u20_all <- gg_synth(md = md_u20_all, post = FALSE) + xlim(1990, 2013) +gr_u20_all <- gg_synth(md = md_u20_all, post = TRUE) + xlim(1990, 2013) gr_u20_all ``` @@ -865,4 +865,45 @@ st_u20_all$tab.v %>% kable(caption = "Predictor weights") st_u20_all$tab.pred %>% kable(caption = "Predictor balance between synthetic and treated units") ``` +### Placebo testing by country + +```{r plot_u20_pb_all} +pb_plot_u20_all <- + gg_gaps(md_u20_all, pl_u20_all) #+ + # theme(text = element_text(size = 12), + # title = element_text(size = 12)) + + # labs(title = "England vs Synthetic Control, no added predictors - country placebo") + +if (params$plotly_graphs == "plotly"){ + ggplotly(pb_plot_u20_all) +} else { +pb_plot_u20_all +} + +if (params$plotly_graphs == "plotly" & params$publish_plotly) { +api_create(last_plot(), filename = "Gaps - U20 - Excl Scotland") +} + +``` + +```{r rank_u20_all} +pp_u20_all <- gg_pre_postMSPE(md_u20_all, pl_u20_all) + labs(title = "Post/pre-TPS mean squared prediction error") + +p20 <- ggplot_build(pp_u20_all) + +rank_u20 <- p20$plot$data %>% + arrange(desc(ratio)) %>% + ungroup() %>% + mutate(rank = rank(-ratio)) %>% + select(Country, ratio, rank) +``` + +In placebo-country tests, England and Wales ranked `r rank_u20 %>% filter(Country == "England and Wales") %>% pull(rank)` out of `r nrow(rank_u20)` countries by pre/post-MSPE ratio. + +```{r pp_u20_all} + +pp_u20_all + +``` +
\ No newline at end of file