diff --git a/README.md b/README.md index 3150b4a..37a2f5e 100644 --- a/README.md +++ b/README.md @@ -120,7 +120,7 @@ The `sample sheet` has the following columns: Path to the **folder** containing both the hard-filtered and soft-filtered vcf outputs from [`wi-gatk`](https://github.com/AndersenLab/wi-gatk). VCF should contain **ALL** strains, the first step will be to subset isotype reference strains for further analysis. !!! Note - This should be the **path to the folder**, we want to isotype-subset both hard and soft filtered VCFs. For example: `--vcf_folder /projects/b1059/projects/Katie/wi-gatk/WI-20210121/variation/` + This should be the **path to the folder**, we want to isotype-subset both hard and soft filtered VCFs. For example: `--vcf_folder /projects/b1059/projects/Katie/wi-gatk/WI-20210121/variation/` or `--vcf_folder /projects/b1059/data/c_elegans/WI/variation/20210121/vcf/` ### --species (optional) diff --git a/bin/reoptimzied_divergent_region_characterization.Rmd b/bin/reoptimzied_divergent_region_characterization.Rmd index b2a5b0b..6bbbc6a 100644 --- a/bin/reoptimzied_divergent_region_characterization.Rmd +++ b/bin/reoptimzied_divergent_region_characterization.Rmd @@ -19,6 +19,7 @@ library(tidyr) library(readr) library(stringr) library(purrr) +library(ggplot2) #library(glue) #setwd(glue::glue("{dirname(rstudioapi::getActiveDocumentContext()$path)}/..")) @@ -430,8 +431,9 @@ readr::write_tsv(df_div_all_clustered, "All_divergent_regions_clustered.tsv") df_chr_length <- data.table::fread("df_chr_length.tsv") # order by size of total divergent regions per strain +colnames(div_bed) <- c("CHROM", "start", "stop", "strain") + div_bed <- div_bed %>% - dplyr::rename(CHROM = V1, start = V2, stop = V3, strain = V4) %>% dplyr::mutate(size = stop - start) %>% dplyr::group_by(strain) %>% dplyr::mutate(total = sum(size)) %>% @@ -440,11 +442,11 @@ div_bed$strain <- factor(div_bed$strain, levels = unique(div_bed$strain)) div_bed %>% na.omit() %>% - ggplot(.) + - geom_rect(data=df_chr_length, aes(xmin = start/1e6, xmax = stop/1e6), ymin = div_bed$strain[1], ymax=div_bed$strain[1], color='transparent', fill='transparent', size =0.1) + - geom_rect(aes(xmin = start/1e6, xmax = stop/1e6, ymin = strain , ymax = strain), fill = 'black',color='black', size = 0.5) + - theme_bw(12) + - theme(#axis.text.x = element_text(face = "bold"), + ggplot2::ggplot(.) + + ggplot2::geom_rect(data=df_chr_length, aes(xmin = start/1e6, xmax = stop/1e6), ymin = div_bed$strain[1], ymax=div_bed$strain[1], color='transparent', fill='transparent', size =0.1) + + ggplot2::geom_rect(aes(xmin = start/1e6, xmax = stop/1e6, ymin = strain , ymax = strain), fill = 'black',color='black', size = 0.5) + + ggplot2::theme_bw(12) + + ggplot2::theme(#axis.text.x = element_text(face = "bold"), axis.text.y = element_blank(), legend.position = 'none', axis.title=element_text(face = "bold"), @@ -453,10 +455,10 @@ div_bed %>% # panel.spacing = unit(0.1, "lines"), panel.grid = element_blank()) + # scale_y_continuous(expand = c(0.00, 0.00), limits=c(0.4,327.6)) + - scale_x_continuous(expand = c(0.02, 0.02), breaks = c(5, 10, 15)) + - facet_grid(~CHROM, scales="free",space = 'free') + - labs(x="Genomic position (Mb)",y= glue::glue("{length(unique(div_bed$strain))} wild isotypes")) -ggsave("divergent_regions.png", height = 5, width = 7.5) + ggplot2::scale_x_continuous(expand = c(0.02, 0.02), breaks = c(5, 10, 15)) + + ggplot2::facet_grid(~CHROM, scales="free",space = 'free') + + ggplot2::labs(x="Genomic position (Mb)",y= glue::glue("{length(unique(div_bed$strain))} wild isotypes")) +ggplot2::ggsave("divergent_regions.png", height = 5, width = 7.5) ``` diff --git a/main.nf b/main.nf index 814e892..44a8982 100644 --- a/main.nf +++ b/main.nf @@ -274,7 +274,7 @@ process subset_iso_ref_strains { fi # output list of strains for divergent - bcftools query -l ${vcf} > div_isotype_list.txt + bcftools query -l \${output} > div_isotype_list.txt """