From e6a124a22b29f9d42b704a9ee19a87a9f30c9eb3 Mon Sep 17 00:00:00 2001 From: Lukasz Szafron Date: Thu, 11 Apr 2024 16:33:14 +0200 Subject: [PATCH] Some minor bugs have been fixed. --- methyl_arrays.r | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/methyl_arrays.r b/methyl_arrays.r index 418d5a8..7ff8533 100755 --- a/methyl_arrays.r +++ b/methyl_arrays.r @@ -4,14 +4,14 @@ options(max.print = 10000) cat("Program path:", unlist(strsplit(grep(commandArgs(), pattern = "file=", value = T), split = "="))[2], "\n") arguments <- commandArgs(trailingOnly = T) -# arguments <- c('/shares/Microarrays/Macice', '/shares/Microarrays/Macice/Macice.extended.csv', '/workspace/lukasz/Microarrays', "TRUE", "60", "Sample_Name", "Sample_Source", "Sample_Gender", "Sample_Label", "Sample_Group", "Sentrix_ID", "Sentrix_Position", "NA") +# arguments <- c("/shares/Microarrays/OvCa", "/shares/Microarrays/OvCa/OvCa.microarrays.new.full.csv", "/workspace/lukasz/Microarrays", "FALSE", "60", "Sample_Name", "Sample_Source", "Sample_Gender", "Sample_Description", "Sample_Group+Sample_Source", "Sentrix_ID", "Sentrix_Position", "/workspace/lukasz/Microarrays/OvCa.microarrays.new.full/Selected_CpGs.txt") if(length(arguments) != 13) {cat(paste("The number of provided arguments is incorrect:", length(arguments), "instead of 13. The arguments should be placed in the following order: 1 - a directory where microarray data are stored, 2 - a csv file with microarray data description, 3 - a directory where the analysis results should be saved, - 4 - a boolean value (TRUE/FALSE) determining if the data binarization step should be performed, + 4 - a Boolean value (TRUE/FALSE) determining if the data binarization step should be performed, 5 - a number of CPU threads to be used, 6 - a variable in the aforementioned csv file containing sample names, 7 - a variable in the aforementioned csv file containing sample sources, @@ -88,6 +88,7 @@ genome.ver <- "hg19" # Do not modify this value methyl.type <- "EPIC" dataTable <- fread(csvfile) +dataTable <- dataTable %>% dplyr::mutate(across(.cols = ind.factors, .fns = make.names)) both.sexes <- if(length(levels(factor(dataTable[[genders]]))) == 1) { FALSE @@ -127,6 +128,8 @@ print(with(dataTable, by(dataTable, INDICES = lapply(dataTable[,..ind.factors], sink() targets <- read.metharray.sheet(dataDirectory, pattern = basename(csvfile)) # Define targets +targets <- targets %>% dplyr::mutate(across(.cols = ind.factors, .fns = make.names)) + ah <- AnnotationHub(ask = F) setAnnotationHubOption("max_downloads", threads) epiFiles <- query(ah, "EpigenomeRoadMap") @@ -809,8 +812,9 @@ kwallis.mins <- kwallis.mins[kwallis.mins <0.05] all.res.df <- all.res.df[all.res.df$gene.name %in% names(kwallis.mins), , drop = F] final.order <- foreach(name = names(kwallis.mins), .combine = c) %dopar% {which(all.res.df$gene.name %in% name)} all.res.df <- all.res.df[final.order,] -all.res.df <- all.res.df %>% tibble::add_column(index = seq(1, nrow(all.res.df)), .before = 1) all.res.df <- subset(all.res.df, subset = !gene.region %in% c("(m)", "(p)")) +all.res.df <- all.res.df %>% tibble::add_column(index = seq(1, nrow(all.res.df)), .before = 1) + addWorksheet(wb = get(wb), sheetName = "CpGs.GeneRegions.analysis") writeData(x = all.res.df, wb = get(wb), sheet = "CpGs.GeneRegions.analysis", rowNames = F) @@ -847,7 +851,7 @@ invisible(foreach(gene.no = seq(length(genes.list))) %dopar% { stat_summary(geom = "point", fun = mean, fill = "yellow", shape = 24) + annotate(geom = "table", x = (length(level.names)+1)/2, y = max(Aver.Betas.df$Average.Betas) * 1.3, label = list(all.res.df.sub.sub), table.theme = ttheme_gtminimal, vjust = 1, hjust = 0.5) + scale_fill_manual(values = as.character(cols25())) + - labs(title = paste("Comparison of beta values distribution, gene:", gene, ", region:", region), + labs(title = paste0("Comparison of beta values distribution, gene: ", gene, ", region: ", region), x = ind.factors[1], y = paste("Average beta values")) + theme(plot.title = element_text(hjust = 0.5), legend.position = "none") print(box.plot) @@ -1417,7 +1421,7 @@ save.image(paste("Methylation analysis results", suffix, "RData", sep = ".")) Beta.vals <- Beta.values.bin} sel.CpGs.existing <- sel.CpGs[sel.CpGs %in% rownames(Beta.vals)] - Beta.vals.sel <- Beta.vals[sel.CpGs.existing,] + Beta.vals.sel <- Beta.vals[sel.CpGs.existing,, drop = F] if(!all(colnames(Beta.vals.sel) == targets$ID)) {stop("The colnames and target IDs do not match.")} df.sel <- as.data.frame(cbind(t(Beta.vals.sel), targets[ind.factors[1]])) df.sel.means <- with(df.sel, aggregate(x = df.sel %>% dplyr::select(-ind.factors[1]), by = list(get(ind.factors[1])), FUN = mean))