Skip to content

Commit

Permalink
small changes divergent script
Browse files Browse the repository at this point in the history
  • Loading branch information
katiesevans committed Feb 7, 2022
1 parent b02dfba commit 28d5725
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 12 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ The `sample sheet` has the following columns:
Path to the **folder** containing both the hard-filtered and soft-filtered vcf outputs from [`wi-gatk`](https://github.com/AndersenLab/wi-gatk). VCF should contain **ALL** strains, the first step will be to subset isotype reference strains for further analysis.

!!! Note
This should be the **path to the folder**, we want to isotype-subset both hard and soft filtered VCFs. For example: `--vcf_folder /projects/b1059/projects/Katie/wi-gatk/WI-20210121/variation/`
This should be the **path to the folder**, we want to isotype-subset both hard and soft filtered VCFs. For example: `--vcf_folder /projects/b1059/projects/Katie/wi-gatk/WI-20210121/variation/` or `--vcf_folder /projects/b1059/data/c_elegans/WI/variation/20210121/vcf/`

### --species (optional)

Expand Down
22 changes: 12 additions & 10 deletions bin/reoptimzied_divergent_region_characterization.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ library(tidyr)
library(readr)
library(stringr)
library(purrr)
library(ggplot2)
#library(glue)
#setwd(glue::glue("{dirname(rstudioapi::getActiveDocumentContext()$path)}/.."))
Expand Down Expand Up @@ -430,8 +431,9 @@ readr::write_tsv(df_div_all_clustered, "All_divergent_regions_clustered.tsv")
df_chr_length <- data.table::fread("df_chr_length.tsv")
# order by size of total divergent regions per strain
colnames(div_bed) <- c("CHROM", "start", "stop", "strain")
div_bed <- div_bed %>%
dplyr::rename(CHROM = V1, start = V2, stop = V3, strain = V4) %>%
dplyr::mutate(size = stop - start) %>%
dplyr::group_by(strain) %>%
dplyr::mutate(total = sum(size)) %>%
Expand All @@ -440,11 +442,11 @@ div_bed$strain <- factor(div_bed$strain, levels = unique(div_bed$strain))
div_bed %>%
na.omit() %>%
ggplot(.) +
geom_rect(data=df_chr_length, aes(xmin = start/1e6, xmax = stop/1e6), ymin = div_bed$strain[1], ymax=div_bed$strain[1], color='transparent', fill='transparent', size =0.1) +
geom_rect(aes(xmin = start/1e6, xmax = stop/1e6, ymin = strain , ymax = strain), fill = 'black',color='black', size = 0.5) +
theme_bw(12) +
theme(#axis.text.x = element_text(face = "bold"),
ggplot2::ggplot(.) +
ggplot2::geom_rect(data=df_chr_length, aes(xmin = start/1e6, xmax = stop/1e6), ymin = div_bed$strain[1], ymax=div_bed$strain[1], color='transparent', fill='transparent', size =0.1) +
ggplot2::geom_rect(aes(xmin = start/1e6, xmax = stop/1e6, ymin = strain , ymax = strain), fill = 'black',color='black', size = 0.5) +
ggplot2::theme_bw(12) +
ggplot2::theme(#axis.text.x = element_text(face = "bold"),
axis.text.y = element_blank(),
legend.position = 'none',
axis.title=element_text(face = "bold"),
Expand All @@ -453,10 +455,10 @@ div_bed %>%
# panel.spacing = unit(0.1, "lines"),
panel.grid = element_blank()) +
# scale_y_continuous(expand = c(0.00, 0.00), limits=c(0.4,327.6)) +
scale_x_continuous(expand = c(0.02, 0.02), breaks = c(5, 10, 15)) +
facet_grid(~CHROM, scales="free",space = 'free') +
labs(x="Genomic position (Mb)",y= glue::glue("{length(unique(div_bed$strain))} wild isotypes"))
ggsave("divergent_regions.png", height = 5, width = 7.5)
ggplot2::scale_x_continuous(expand = c(0.02, 0.02), breaks = c(5, 10, 15)) +
ggplot2::facet_grid(~CHROM, scales="free",space = 'free') +
ggplot2::labs(x="Genomic position (Mb)",y= glue::glue("{length(unique(div_bed$strain))} wild isotypes"))
ggplot2::ggsave("divergent_regions.png", height = 5, width = 7.5)
```
2 changes: 1 addition & 1 deletion main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ process subset_iso_ref_strains {
fi
# output list of strains for divergent
bcftools query -l ${vcf} > div_isotype_list.txt
bcftools query -l \${output} > div_isotype_list.txt
"""

Expand Down

0 comments on commit 28d5725

Please sign in to comment.