diff --git a/epiAnalysis/scripts/analysis.R b/epiAnalysis/scripts/analysis.R index 8a575e0..0ef5f96 100644 --- a/epiAnalysis/scripts/analysis.R +++ b/epiAnalysis/scripts/analysis.R @@ -37,7 +37,7 @@ df_sensitivity <- "_sensitivity.RDS" )) - +name_of_current_run <- paste0("remove_unnecessary_args_", Sys.Date()) ## Set up values used throughout for comp_labels and covariates-------------------------------------------------------------------- # Comp labels comp_labels <- @@ -810,11 +810,8 @@ plot_maxi_matrix_transfers( yllimit = 0.8, yulimit = 1.3, plot_log = TRUE, - units = "hr/day", - granularity = 2000, - theme = NULL, - point_specification = ggplot2::geom_point(size = 1) -) + granularity = 2000, + ) dev.off() ## Matrix plots: model comparison----------------------------------------- @@ -901,10 +898,7 @@ for (pair in model_pair_list) { yllimit = 0.8, yulimit = 1.3, plot_log = TRUE, - units = "hr/day", - granularity = 1000, - point_specification = ggplot2::geom_point(size = 2) - ) + granularity = 1000) dev.off() @@ -928,10 +922,8 @@ compare_all_transfers_side_by_side_three( yllimit = 0.8, yulimit = 1.3, plot_log = TRUE, - units = "hr/day", granularity = 1000, - point_specification = ggplot2::geom_point(size = 2) -) + ) dev.off() ### Write these models into df of numbers @@ -981,12 +973,8 @@ compare_all_transfers_ism_side_by_side( yllimit = 0.8, yulimit = 1.3, plot_log = TRUE, - lower_quantile = 0.05, - upper_quantile = 0.95, - units = "hr/day", granularity = 1000, - point_specification = ggplot2::geom_point(size = 2) -) + ) dev.off() ### Write numbers_df -------------------------------------------------- diff --git a/epiAnalysis/useful_functions/arrange_plots_matrix.R b/epiAnalysis/useful_functions/arrange_plots_matrix.R index 60f03ad..2e64f53 100644 --- a/epiAnalysis/useful_functions/arrange_plots_matrix.R +++ b/epiAnalysis/useful_functions/arrange_plots_matrix.R @@ -1,18 +1,10 @@ plot_maxi_matrix_transfers <- function(comp_model, - fixed_values = NULL, - part_1 = NULL, comp_labels, yllimit = NULL, yulimit = NULL, plot_log = FALSE, - lower_quantile = 0.05, - upper_quantile = 0.95, - units = "unitless", - specified_units = NULL, - terms = TRUE, granularity = 10000, - point_specification = ggplot2::geom_point(size = 2), theme = NULL, plotstitle = NULL) { #------------------------------------------------------------------ @@ -22,13 +14,9 @@ plot_maxi_matrix_transfers <- type <- epicoda:::process_model_type(comp_model) # Labels - if (!is.null(part_1)) { - comp_labels <- alter_order_comp_labels(comp_labels, part_1) - } transf_labels <- transf_labels(comp_labels, - transformation_type = "ilr", - part_1 = part_1) + transformation_type = "ilr") # Datasets dataset1 <- get_dataset_from_model(model = comp_model, comp_labels = comp_labels, transf_labels = transf_labels, type = type) @@ -59,14 +47,12 @@ plot_maxi_matrix_transfers <- granularity = granularity, units = "hr/day", comp_labels = comp_labels, - terms = terms, plot_log = plot_log, yllimit = yllimit, yulimit = yulimit, xllimit = xllimit, xulimit = xulimit, y_label = y_label, - fixed_values = fixed_values, point_specification = ggplot2::geom_point(size = 1.5) ) assign(paste0("p_", comp_labels[i], "_", comp_labels[j], "_model1"), pfirst ) diff --git a/epiAnalysis/useful_functions/compare_plot2.R b/epiAnalysis/useful_functions/compare_plot2.R index fac25b4..282ad59 100644 --- a/epiAnalysis/useful_functions/compare_plot2.R +++ b/epiAnalysis/useful_functions/compare_plot2.R @@ -10,19 +10,11 @@ compare_all_transfers_side_by_side <- ebc2 = "peachpuff", pc1 = "blue", pc2 = "darkred", - fixed_values = NULL, - part_1 = NULL, comp_labels, yllimit = NULL, yulimit = NULL, plot_log = FALSE, - lower_quantile = 0.05, - upper_quantile = 0.95, - units = "unitless", - specified_units = NULL, - terms = TRUE, granularity = 10000, - point_specification = ggplot2::geom_point(size = 2), theme = NULL, plotstitle = NULL) { #------------------------------------------------------------------ @@ -36,13 +28,9 @@ compare_all_transfers_side_by_side <- } # Labels - if (!is.null(part_1)) { - comp_labels <- alter_order_comp_labels(comp_labels, part_1) - } transf_labels <- transf_labels(comp_labels, - transformation_type = "ilr", - part_1 = part_1) + transformation_type = "ilr") # Datasets dataset1 <- get_dataset_from_model(model = comp_model, comp_labels = comp_labels, transf_labels = transf_labels, type = type) dataset2 <- get_dataset_from_model(model = comp_model2, comp_labels = comp_labels, transf_labels = transf_labels, type = type) @@ -74,14 +62,12 @@ compare_all_transfers_side_by_side <- granularity = granularity, units = "hr/day", comp_labels = comp_labels, - terms = terms, plot_log = plot_log, yllimit = yllimit, yulimit = yulimit, xllimit = xllimit, xulimit = xulimit, y_label = y_label, - fixed_values = fixed_values, point_specification = ggplot2::geom_point(size = 1.5, colour = pc1), error_bar_colour = ebc1 ) @@ -94,13 +80,11 @@ compare_all_transfers_side_by_side <- granularity = granularity, units = "hr/day", comp_labels = comp_labels, - terms = terms, plot_log = plot_log, yllimit = yllimit, yulimit = yulimit, xllimit = xllimit, xulimit = xulimit, - fixed_values = fixed_values, y_label = " ", point_specification = ggplot2::geom_point(size = 1.5, colour = pc2), error_bar_colour = ebc2 diff --git a/epiAnalysis/useful_functions/compare_plot3.R b/epiAnalysis/useful_functions/compare_plot3.R index d3dae40..b520454 100644 --- a/epiAnalysis/useful_functions/compare_plot3.R +++ b/epiAnalysis/useful_functions/compare_plot3.R @@ -8,20 +8,11 @@ compare_all_transfers_side_by_side_three <- pc1 = "blue", pc2 = "darkred", pc3 = "darkgreen", - fixed_values = NULL, - part_1 = NULL, comp_labels, yllimit = NULL, yulimit = NULL, plot_log = FALSE, - lower_quantile = 0.05, - upper_quantile = 0.95, - units = "unitless", - specified_units = NULL, - terms = TRUE, granularity = 10000, - point_specification = ggplot2::geom_point(size = 2), - theme = NULL, plotstitle = NULL) { #------------------------------------------------------------------ # First we calculate the shared limits for all the plots @@ -35,13 +26,9 @@ compare_all_transfers_side_by_side_three <- } # Labels - if (!is.null(part_1)) { - comp_labels <- alter_order_comp_labels(comp_labels, part_1) - } - transf_labels <- + transf_labels <- transf_labels(comp_labels, - transformation_type = "ilr", - part_1 = part_1) + transformation_type = "ilr") # Datasets dataset1 <- get_dataset_from_model(model = comp_model, comp_labels = comp_labels, transf_labels = transf_labels, type = type) @@ -71,14 +58,12 @@ compare_all_transfers_side_by_side_three <- granularity = granularity, units = "hr/day", comp_labels = comp_labels, - terms = terms, plot_log = plot_log, yllimit = yllimit, yulimit = yulimit, xllimit = xllimit, xulimit = xulimit, y_label = y_label, - fixed_values = fixed_values, point_specification = ggplot2::geom_point(size = 1.5, colour = pc1), error_bar_colour = ebc1 ) @@ -91,13 +76,11 @@ compare_all_transfers_side_by_side_three <- granularity = granularity, units = "hr/day", comp_labels = comp_labels, - terms = terms, plot_log = plot_log, yllimit = yllimit, yulimit = yulimit, xllimit = xllimit, xulimit = xulimit, - fixed_values = fixed_values, y_label = " ", point_specification = ggplot2::geom_point(size = 1.5, colour = pc2), error_bar_colour = ebc2 @@ -111,14 +94,12 @@ compare_all_transfers_side_by_side_three <- granularity = granularity, units = "hr/day", comp_labels = comp_labels, - terms = terms, plot_log = plot_log, yllimit = yllimit, yulimit = yulimit, xllimit = xllimit, xulimit = xulimit, - fixed_values = fixed_values, - y_label = " ", + y_label = " ", point_specification = ggplot2::geom_point(size = 1.5, colour = pc3), error_bar_colour = ebc3 ) diff --git a/epiAnalysis/useful_functions/compare_plot_linear.R b/epiAnalysis/useful_functions/compare_plot_linear.R index 5ef52ee..7a4dfac 100644 --- a/epiAnalysis/useful_functions/compare_plot_linear.R +++ b/epiAnalysis/useful_functions/compare_plot_linear.R @@ -6,23 +6,11 @@ compare_all_transfers_ism_side_by_side <- ebc2 = "peachpuff", pc1 = "blue", pc2 = "darkred", - fixed_values = NULL, - transformation_type = "ilr", - comparison_part = NULL, - part_1 = NULL, comp_labels, yllimit = NULL, yulimit = NULL, plot_log = FALSE, - lower_quantile = 0.05, - upper_quantile = 0.95, - units = "unitless", - specified_units = NULL, - rounded_zeroes = TRUE, - det_limit = NULL, - terms = TRUE, granularity = 10000, - point_specification = ggplot2::geom_point(size = 2), theme = NULL, plotstitle = NULL) { @@ -34,13 +22,9 @@ compare_all_transfers_ism_side_by_side <- type <- epicoda:::process_model_type(comp_model) # Labels - if (!is.null(part_1)) { - comp_labels <- alter_order_comp_labels(comp_labels, part_1) - } transf_labels <- transf_labels(comp_labels, - transformation_type = "ilr", - part_1 = part_1) + transformation_type = "ilr") # Datasets dataset1 <- get_dataset_from_model(model = comp_model, comp_labels = comp_labels, transf_labels = transf_labels, type = type) @@ -71,14 +55,12 @@ compare_all_transfers_ism_side_by_side <- granularity = granularity, units = "hr/day", comp_labels = comp_labels, - terms = terms, plot_log = plot_log, yllimit = yllimit, yulimit = yulimit, xllimit = xllimit, xulimit = xulimit, y_label = y_label, - fixed_values = fixed_values, point_specification = ggplot2::geom_point(size = 1.5, colour = pc1), error_bar_colour = ebc1 ) @@ -93,13 +75,11 @@ compare_all_transfers_ism_side_by_side <- granularity = granularity, units = "hr/day", comp_labels = comp_labels, - terms = terms, plot_log = plot_log, yllimit = yllimit, yulimit = yulimit, xllimit = xllimit, xulimit = xulimit, - fixed_values = fixed_values, y_label = " ", point_specification = ggplot2::geom_point(size = 1.5, colour = pc2), error_bar_colour = ebc2 diff --git a/epiAnalysis/useful_functions/plot_transfers_ism.R b/epiAnalysis/useful_functions/plot_transfers_ism.R index 99e2d34..62dd484 100644 --- a/epiAnalysis/useful_functions/plot_transfers_ism.R +++ b/epiAnalysis/useful_functions/plot_transfers_ism.R @@ -99,8 +99,8 @@ plot_transfers_ism <- function(from_part, dataset = dataset_ready, units = "hr/day", comp_labels = comp_labels, - lower_quantile = 0.05, - upper_quantile = 0.95, + lower_quantile = lower_quantile, + upper_quantile = upper_quantile, granularity = granularity ) new_data <- epicoda:::normalise_comp(new_data, comp_labels) diff --git a/ukbAccProcessing/scripts/acc_plots_over_the_day_by_group.R b/ukbAccProcessing/scripts/acc_plots_over_the_day_by_group.R new file mode 100644 index 0000000..b051e1b --- /dev/null +++ b/ukbAccProcessing/scripts/acc_plots_over_the_day_by_group.R @@ -0,0 +1,310 @@ +# USE RUNNAME TO BE ABLE TO PULL OLD FILES ========================================================= +name_of_current_run <- "2022-01-20update_negative_control" + +# LOAD PACKAGES AND HELPER FUNCTIONS; NAME RUN====================================================== +library(ggplot2) +library(data.table) +source("ukbAccProcessing/useful_functions/average_day_plot_by_group.R") + +# LOAD DATA========================================================================================= +a <- + data.frame(fread("ukbAccProcessing/inputData/sep20-summary-all.csv")) # all acc data +participant <- fread( + "ukbDataPrep/inputData/participant_new_nc_20220114.csv", + stringsAsFactors = FALSE, + data.table = FALSE, + check.names = TRUE, + tz = "" +) + +extra_cols <- fread(# TO EXTRACT EXTRA COLUMNS NOT RECORDED IN MAIN DATA + "../Analyses/thesis_only_analyses/data/thesis-phenoData-20220124.csv", + stringsAsFactors = FALSE, + data.table = FALSE, + check.names = TRUE, + tz = "" +) + +df <- readRDS(# analytic dataset + paste0( + "epiAnalysis/inputData/", + name_of_current_run, + "_ready_to_use.RDS" + )) + + +# NAME OF CURRENT RUN UPDATE FOR THESIS============================================================== +name_of_current_run <- "2022-06-22" + +# RESTRICT TO PATTICIPANTS IN THE FINAL ANALYTIC SAMPLE============================================== +a$eid <- gsub("_90001_0_0.gz", "", a$eid) +a <- a[a$eid %in% df$eid, ] + +a <- merge(a, participant, by = "eid") +a <- merge(a, df[, c("age_entry", "eid")], by = "eid") + +a <- merge(a, extra_cols, by = "eid") + +# TIME OF YEAR PLOT=================================================================================== +a$month <- lubridate::month(a$file.startTime) +a$dst <- "UNCLEAR" +a$dst[a$month %in% c(4, 5, 6, 7, 8, 9)] <- "summer" +a$dst[a$month %in% c(11, 12, 1, 2)] <- "winter" +a$dst <- as.factor(a$dst) +plotAverageDayGroupWise( + a, + "dst", + "sleep.hourOfDay.", + ".avg", + yAxisLabel = "sleep", + title ="Checking DST variable correctly incorporated" +) + + +# PREPROCESSING OF BEHAVIOURAL VARIABLES ============================================================= +a[, "GetMorn"] <- + factor( + a[, "GetMorn"], + ordered = TRUE, + levels = c("Very easy", "Fairly easy", "Not very easy", "Not at all easy") + ) +a[, "JobInvolvNightShiftWork"] <- + plyr::revalue( + a[, "JobInvolvNightShiftWork"], + c( + "Prefer not to answer" = NA, + "Do not know" = NA, + "Always" = "Shift work - always nights", + "Usually" = "Shift work - usually nights", + "Sometimes" = "Shift work - sometimes nights", + "Never/rarely" = "Shift work - never/rarely nights" + ) + ) +a[, "JobInvolvNightShiftWork"] <- + plyr::mapvalues(a[, "JobInvolvNightShiftWork"], "", "No shift work") +a[, "JobInvolvNightShiftWork"] <- + factor( + a[, "JobInvolvNightShiftWork"], + ordered = TRUE, + levels = c( + "Shift work - always nights", + "Shift work - usually nights", + "Shift work - sometimes nights", + "Shift work - never/rarely nights", + "No shift work" + ) + ) + +a$JobInvolveHeavyManualPhysicalWork <- a$p816_i0 # NOTE PARTICIPANTS WITHOUT A JOB WILL BE NA FOR THESE VARIABLES +a$JobInvolveWalkingStanding <- a$p806_i0 + +# MAKE PLOTS ========================================================================================================= +# GROUPINGS FOR WHICH USE ALL DAYS ================================================================= +for (group in c( + "JobInvolvNightShiftWork", + "JobInvolvShiftWork", + "GetMorn", + "Morning.evenPerson.Chronotype.", + "NapDureDay" +)) { + + # FINAL ADMIN ==================================================================================== + a[, group] <- as.factor(a[, group]) # FACTOR COERCION + a[, group] <- + plyr::revalue(a[, group], c("Prefer not to answer" = NA, "Do not know" = NA)) # ON THE FLY NA PROCESSING + + + # TITLE ASSIGNMENT=============================================================================== + tit <- NULL # BEGIN WITH NULL TITLE + + if (group == "JobInvolvNightShiftWork") { + dat <- a[a$age_entry < 365.25 * 60,] + tit <- "Working night shifts (under 60s only)" + } + else { + dat <- a + } + + if (group == "GetMorn") { + tit <- "Ease of getting up in the morning" + } + + if (group == "NapDureDay") { + tit <- "Napping during the day" + } + + # PRINTING ====================================================================================== + print(group) + print(levels(dat[, group])) + print(nrow(dat)) + + # DO AVERAGE-DAY-BY-GROUP PLOTS================================================================== + for (label in list( + c("MVPA", "Probability in MVPA"), + c("light", "Probability in LIPA"), + c("sedentary", "Probability in SB"), + c("sleep", "Probability in sleep") + )) { + lab <- paste0("p_", label[1]) + assign( + lab, + plotAverageDayGroupWise( + dat, + group, + paste0(label[1], ".hourOfDay."), + ".avg", + yAxisLabel = label[2], + title = tit + ) + ) + } + + # WRITE OUT COMBINED FIGURE ====================================================================== + # svg( + # paste0( + # "ukbAccProcessing/plots/", + # name_of_current_run, + # "_face_validity_", + # group, + # ".svg", + # sep = "" + # ), + # width = 10, + # height = 4 + # ) + # + # gridExtra::grid.arrange( + # grobs = list(p_MVPA, p_light, p_sedentary, p_sleep), + # layout_matrix = cbind(c(4, 3), c(2, 1)) + # ) + # dev.off() + + # WRITE OUT SLEEP PLOT=========================================================================== + svg( + paste0( + "ukbAccProcessing/plots/", + name_of_current_run, + "_face_validity_sleep_", + group, + ".svg", + sep = "" + ), + width = 10, + height = 4 + ) + print(p_sleep) + dev.off() +} + +# GROUPINGS FOR WHICH USE UNDER 60s and WEEKDAYS ONLY============================================================================== +dat <- a[a$age_entry < 365.25 * 60,] # Restrict to under 60s +for (group in c("JobInvolveHeavyManualPhysicalWork", + "JobInvolveWalkingStanding")) { + dat[, group] <- plyr::revalue(dat[, group], c("Prefer not to answer" = NA, "Do not know" = NA)) + dat[, group] <- plyr::mapvalues(dat[, group], "", NA) + dat[, group] <- factor(dat[, group], ordered = TRUE, levels = c("Never/rarely", "Sometimes", "Usually", "Always")) + + tit <- NULL + if (group == "JobInvolveHeavyManualPhysicalWork"){ + tit <- "Job involves heavy manual or physical work (under 60s only, weekdays)" + } + + if (group == "JobInvolveWalkingStanding"){ + tit <- "Job involves walking or standing (under 60s only, weekdays)" + } + + # PRINTING + print(group) + print(levels(dat[, group])) + print(nrow(dat)) + + # PLOT GROUPS + for (label in list( + c("MVPA", "Probability in MVPA"), + c("light", "Probability in LIPA"), + c("sedentary", "Probability in SB"), + c("sleep", "Probability in sleep") + )) { + lab <- paste0("p_", label[1]) + assign( + lab, + plotAverageDayGroupWise( + dat, + group, + paste0(label[1], ".hourOfWeekday."), + ".avg", + yAxisLabel = label[2], + title = tit + ) + ) + } + + for (label in list( + c("MVPA", "Probability in MVPA") + )) { + lab <- paste0("p_", label[1], "_zoomed") + assign( + lab, + plotAverageDayGroupWise( + dat, + group, + paste0(label[1], ".hourOfWeekday."), + ".avg", + yAxisLabel = label[2], + title = tit, + ylim = 0.075 + ) + ) + } + + # # WRITE OUT COMBINED FIGURE============================== + # svg( + # paste0( + # "ukbAccProcessing/plots/", + # name_of_current_run, + # "_face_validity_weekday", + # group, + # ".svg", + # sep = "" + # ), + # width = 10, + # height = 4 + # ) + # + # gridExtra::grid.arrange( + # grobs = list(p_MVPA, p_light, p_sedentary, p_sleep), + # layout_matrix = cbind(c(4, 3), c(2, 1)) + # ) + # dev.off() + + # WRITE OUT FIGURES FOR EACH BEHAVIOURS================== + svg( + paste0( + "ukbAccProcessing/plots/", + name_of_current_run, + "_face_validity_weekday_light_", + group, + ".svg", + sep = "" + ), + width = 10, + height = 4 + ) + print(p_light) + dev.off() + svg( + paste0( + "ukbAccProcessing/plots/", + name_of_current_run, + "_face_validity_weekday_MVPA_", + group, + ".svg", + sep = "" + ), + width = 10, + height = 4 + ) + print(p_MVPA_zoomed) + dev.off() +} + diff --git a/ukbAccProcessing/useful_functions/average_day_plot_by_group.R b/ukbAccProcessing/useful_functions/average_day_plot_by_group.R new file mode 100644 index 0000000..c746542 --- /dev/null +++ b/ukbAccProcessing/useful_functions/average_day_plot_by_group.R @@ -0,0 +1,48 @@ +plotAverageDayGroupWise <- function(data, group, exposurePrefix, exposureSuffix, yAxisLabel = exposurePrefix, title = NULL, ylim = 1){ + # PREP DATA FRAME ================================================================ + groups_data <- data.frame("hrs" = c(), "mean_PAcols" = c(), "group" = c()) + l <- levels(data[, group]) + + # CYCLE THROUGH LEVELS OF GROUPS ================================================== + for (g in l){ + print(g) + dat_local <- data[data[, group] == g,] + + # SET UP RECORD OF MEAN VALUES FOR COL ========================================== + hrPACols <- c() + mean_PACols <- c() + hrs <- c() + gp <- c() + for (hr in 0:23){ + hrs <- c(hrs, as.numeric(hr)) + hrPACols <- c(hrPACols , paste(exposurePrefix, hr, exposureSuffix, sep = "")) + mean_PACols <- c(mean_PACols, as.numeric(mean(dat_local[,hrPACols[hr+1]], na.rm = TRUE))) + gp <- c(gp, g) + + # CHECK + if (hrPACols[hr+1] != hrPACols[length(hrPACols)]){ + stop("Not pulling correct label") + } + } + + # BIND ALL MEAN VALUES INTO OVERALL GROUPS DATA + groups_data <- rbind(groups_data, data.frame("hrs" = hrs, "mean_PAcols" = mean_PACols, "group" = gp)) + } + + # REFORMAT GROUPS DATA + groups_data$group <- factor(groups_data$group, levels = l, ordered = TRUE) + + # PRODUCE PLOT + plot <- ggplot(data = groups_data, aes(x = hrs, y = mean_PAcols, colour = group))+ + geom_line(size = 1)+ + ylim(0, ylim)+ + labs( + y = yAxisLabel, + x = "Hour of Day", + title = title)+ + theme(legend.title=element_blank()) # MAKE SURE LEGEND TITLE BLANK + + + return(plot) + +}