Skip to content

Commit

Permalink
updates plots
Browse files Browse the repository at this point in the history
  • Loading branch information
roddypr committed Dec 6, 2021
1 parent 3cc0293 commit 379d07b
Showing 1 changed file with 83 additions and 81 deletions.
164 changes: 83 additions & 81 deletions Topology weighting/twisst_interpretation.rmd
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,28 @@ weight_by_window$Topology_type[!weight_by_window$Topology_type %in% c("Introgres
"Trans-species\npolymorphism",
"Species tree")] <- "Other"
```

## Remove genes from scaffold NW_011795620.1

Using blast, we have double-checked the location of supergene in the assembly by Yan et al (2020). We found that the four genes in scaffold NW_011795620.1 map ar atound 8.2Mb in chromosome 16, whereas the supergene region maps at position 12.6Mb. We have removed these four genes from the analysus

```{r}
weight_by_window %>%
filter((chrom == "chr16") & (start >= 11680438) & (end <= 11719720)) -> to_filter_out
stopifnot(length(table(to_filter_out$ID)) == 1)
weight_by_window %>%
filter(!((chrom == "chr16") & (start >= 11680438) & (end <= 11719720))) -> weight_by_window
```

## Number of trees supporting a given tree

```{r}
# Subset dataset to best
weight_by_window %>%
group_by(ID, chrom, start, end, mid, region) %>%
Expand Down Expand Up @@ -349,7 +371,7 @@ topology_order <- c("Introgression (S. invicta to S. richteri)",
my_colours <- c("Introgression (S. invicta to S. richteri)" = "#0072B2",
"Introgression (S. richteri to S. invicta)" = "#56B4E9",
"Trans-species polymorphism" = "#F0E442",
"Species tree" = "#009E73",
"Species tree" = "#009E73",
"topo113" = "#E69F00",
"topo641" = "#D55E00",
"topo643" = "#CC79A7",
Expand All @@ -369,7 +391,7 @@ best_topologies_per_window_filtered %>%
group_by(region) %>%
mutate(perc = n / sum(n)) %>%
ggplot(aes(x = Best_topology_type, fill = Best_topology, y = perc)) +
geom_bar(stat = "identity") +
geom_bar(stat = "identity") +
scale_y_continuous(labels=scales::percent, name = "Windows supporting topology") +
facet_grid(rows=vars(region)) +
theme_bw() +
Expand All @@ -378,7 +400,7 @@ best_topologies_per_window_filtered %>%
theme(legend.position = c(0.25, 0.7),
legend.background = element_rect(fill="white",
size=0.5,
linetype="solid",
linetype="solid",
colour ="darkgrey")) -> p
ggsave(p, file = "results/twisst_figures/best_topology_per_window.pdf",
Expand All @@ -395,7 +417,7 @@ best_topologies_per_window_filtered %>%
group_by(region) %>%
mutate(perc = n / sum(n)) %>%
ggplot(aes(x = Best_topology_type, fill = Best_topology, y = perc)) +
geom_bar(stat = "identity") +
geom_bar(stat = "identity") +
facet_grid(cols=vars(region)) +
scale_y_continuous(labels=scales::percent, name = "Windows supporting topology") +
theme_bw() +
Expand All @@ -405,14 +427,13 @@ best_topologies_per_window_filtered %>%
theme(legend.position = c(0.25, 0.7),
legend.background = element_rect(fill="white",
size=0.5,
linetype="solid",
linetype="solid",
colour ="darkgrey")) -> p
ggsave(p, file = "results/twisst_figures/best_topology_per_window_facet.pdf",
width = 7, height = 4)
p
```

We need to make a table with the counts:
Expand All @@ -434,13 +455,34 @@ best_topologies_per_window_filtered %>%
group_by(region, Best_topology_type) %>%
count %>%
group_by(region) %>%
mutate(perc = round(100* n / sum(n)))
mutate(perc = round(100* n / sum(n))) %>%
as.data.frame()
best_topologies_per_window_filtered %>%
group_by(region, Best_topology_type, Best_topology) %>%
count %>%
group_by(region) %>%
mutate(perc = round(100* n / sum(n)))
mutate(perc = round(100* n / sum(n))) %>%
as.data.frame()
```

After collapsing the trees with diverging early branches:

```{r}
best_topologies_per_window_filtered_coll <- best_topologies_per_window_filtered
best_topologies_per_window_filtered_coll$Best_topology[best_topologies_per_window_filtered_coll$Best_topology == "topo641"] <- "Introgression (S. invicta to S. richteri)"
best_topologies_per_window_filtered_coll$Best_topology[best_topologies_per_window_filtered_coll$Best_topology == "topo643"] <- "Species tree"
best_topologies_per_window_filtered_coll$Best_topology[best_topologies_per_window_filtered_coll$Best_topology == "topo13"] <- "Species tree"
best_topologies_per_window_filtered_coll %>%
group_by(region, Best_topology_type, Best_topology) %>%
count %>%
group_by(region) %>%
mutate(perc = round(100* n / sum(n))) %>%
as.data.frame()
```

Expand All @@ -460,7 +502,7 @@ topology_order_for_trees <- c("topo116",
my_colours <- c("Introgression (S. invicta to S. richteri)" = "#0072B2",
"Introgression (S. richteri to S. invicta)" = "#56B4E9",
"Trans-species polymorphism" = "#F0E442",
"Species tree" = "#009E73",
"Species tree" = "#009E73",
"topo113" = "#E69F00",
"topo641" = "#D55E00",
"topo643" = "#CC79A7",
Expand Down Expand Up @@ -517,15 +559,15 @@ topology_order <- c("Introgression (S. invicta to S. richteri)",
"Species tree",
"Other")
# best_topologies_per_window_plot <- best_topologies_per_window
#
#
# best_topologies_per_window_plot$Best_topology[!best_topologies_per_window_plot$Best_topology %in%
# topology_order] <- "Other"
#
#
my_colours <- c("Introgression (S. invicta to S. richteri)" = "#0072B2",
"Introgression (S. richteri to S. invicta)" = "#56B4E9",
"Trans-species polymorphism" = "#F0E442",
"Species tree" = "#009E73",
"Species tree" = "#009E73",
"Other" = "#E69F00")
scaffold_fill <- c(
Expand Down Expand Up @@ -612,7 +654,7 @@ topology_colour <- colorblind_pal()(length(top_topologies_names) + 1)[-1]
topology_colour <- c(topology_colour[1], topology_colour[6], topology_colour[2],
topology_colour[3], topology_colour[4], topology_colour[5])
topology_colour <- rev(topology_colour)
# Sum the relative weights of all "Other" topologies
weight_by_window %>%
select(-Topology_type, -Topology) %>%
Expand All @@ -637,6 +679,7 @@ weight_by_window_with_other %>%


```{r fig.width = 14, fig.height = 4}
weight_by_window_with_other_rect %>%
ggplot() +
geom_rect(aes(xmin = start,
Expand All @@ -655,58 +698,37 @@ weight_by_window_with_other_rect %>%
suffix = " Mb")) -> p
p
```

```{r fig.width = 14, fig.height = 4}
weight_by_window_with_other_rect %>%
ggplot() +
geom_line(aes(x = mid,
y = Relative_weight,
group = Topology_id,
colour = Topology_id)) +
theme_bw() +
scale_colour_manual(breaks = rev(names(top_topologies_names)),
labels = rev(top_topologies_names),
values = rev(topology_colour)) +
facet_grid(rows=vars(chrom)) +
ylab("Relative weight of topology") +
xlab("Window middle coordinate in chromosome 16")
## Windows per inversion

```
```{r}
```{r fig.width = 14, fig.height = 4}
inversion_df <- best_topologies_per_window
# p <- ggplot(data=weight_by_window_with_other_rect,
# aes(x=mid, group=Topology_id, fill=Topology_id)) +
# geom_density(adjust=1.5, position="fill") +
# theme_bw() +
# facet_grid(rows=vars(chrom)) +
# scale_fill_manual(breaks = rev(names(top_topologies_names)),
# labels = rev(top_topologies_names),
# values = rev(topology_colour)) +
# ylab("Relative weight of topology") +
# xlab("Window middle coordinate in chromosome 16")
# p
#
inversion_df$Inversion <- "No known inversion"
inversion_df$Inversion[inversion_df$chrom == "chr16" &
inversion_df$mid > 11732910 &
inversion_df$mid < 16339881 ] <- "In(16)3"
weight_by_window_with_other_rect %>%
mutate(Topology_id = fct_relevel(Topology_id, rev(levels(weight_by_window_with_other_rect$Topology_id)))) %>%
ggplot(aes(x = mid,
y = Relative_weight,
fill = Topology_id)) +
geom_area() +
theme_bw() +
scale_fill_manual(breaks = rev(names(top_topologies_names)),
labels = rev(top_topologies_names),
values = rev(topology_colour)) +
facet_grid(rows=vars(chrom)) +
ylab("Relative weight of topology") +
xlab("Window middle coordinate in chromosome 16")
inversion_df$Inversion[inversion_df$chrom == "chr16" &
inversion_df$mid > 16509882 &
inversion_df$mid < 17962688 ] <- "In(16)2"
inversion_df$Inversion[inversion_df$chrom == "chr16" &
inversion_df$mid > 17972920 &
inversion_df$mid < 28149888 ] <- "In(16)1"
# Rename topologies with discordant early branches
inversion_df$Best_topology[inversion_df$Best_topology == "topo641"] <- "Introgression (S. invicta to S. richteri)"
inversion_df$Best_topology[inversion_df$Best_topology == "topo643"] <- "Species tree"
inversion_df$Best_topology[inversion_df$Best_topology == "topo13"] <- "Species tree"
inversion_df$Best_topology[inversion_df$Best_topology == "topo113"] <- "Other"
```

## Windows per inversion

```{r, echo=TRUE}
Expand All @@ -716,8 +738,7 @@ lm %>%
as.data.frame(inv_3_reg)
best_topologies_per_window %>%
filter(chrom == "chr16", start > 11732910, end < 16339881) -> inv_3_windows
filter(inversion_df, Inversion == "In(16)3") -> inv_3_windows
stopifnot(nrow(inv_3_windows) == 0)
Expand All @@ -734,8 +755,7 @@ lm %>%
as.data.frame(inv_2_reg)
best_topologies_per_window %>%
filter(chrom == "chr16", mid > 16509882, mid < 17962688) -> inv_2_windows
filter(inversion_df, Inversion == "In(16)2") -> inv_2_windows
inv_2_windows_high <- inv_2_windows[inv_2_windows$Relative_weight > 0.9,]
Expand All @@ -747,6 +767,7 @@ inv_2_size <- sum(inv_2_reg$chr_end - inv_2_reg$chr_start)

The inversion In(16)2 region of the supergene is `r round(inv_2_size/1e6, 2)` Mbp long. There were `r nrow(inv_2_windows)` windows in In(16)2 region of the supergene. Out of these, `r nrow(inv_2_windows_high)` had a single topology with a weight greater than 0.9, both supporting the _S. invicta_ to _S. richteri_ introgression scenario.


```{r}
inv_2_windows %>%
Expand All @@ -764,8 +785,7 @@ inv_1_reg %>%
slice(c(1,n())) %>%
as.data.frame()
best_topologies_per_window %>%
filter(chrom == "chr16", mid > 17972920, mid < 28149888) -> inv_1_windows
filter(inversion_df, Inversion == "In(16)1") -> inv_1_windows
inv_1_windows_high <- inv_1_windows[inv_1_windows$Relative_weight > 0.9,]
Expand All @@ -778,29 +798,11 @@ inv_1_size <- sum(inv_1_reg$chr_end - inv_1_reg$chr_start)

The inversion In(16)1 region of the supergene is `r round(inv_1_size/1e6, 2)` Mbp long. There were `r nrow(inv_1_windows)` windows in In(16)1 region of the supergene. Out of these, `r nrow(inv_1_windows_high)` (`r round(100 * nrow(inv_1_windows_high)/nrow(inv_1_windows))`%) had a single topology with a relative weight greater than 0.9. Of these, `r inv_1_windows_high_int` (`r round(100*inv_1_windows_high_int/nrow(inv_1_windows_high))`%) suported the _S. invicta_ to _S. richteri_ introgression scenario, while `r inv_1_windows_high_anc` (`r round(100*inv_1_windows_high_anc/nrow(inv_1_windows_high))`%) supported the next best topology, consistent with the Trans-species ancestral polymorphism.

```{r}
best_topologies_per_window$Inversion <- "No known inversion"
best_topologies_per_window$Inversion[best_topologies_per_window$chrom == "chr16" &
best_topologies_per_window$mid > 11732910 &
best_topologies_per_window$mid < 16339881 ] <- "In(16)3"
best_topologies_per_window$Inversion[best_topologies_per_window$chrom == "chr16" &
best_topologies_per_window$mid > 16509882 &
best_topologies_per_window$mid < 17962688 ] <- "In(16)2"
best_topologies_per_window$Inversion[best_topologies_per_window$chrom == "chr16" &
best_topologies_per_window$mid > 17972920 &
best_topologies_per_window$mid < 28149888 ] <- "In(16)1"
```

The number of windows with a single topology with a relative wight greater than 0.9 is:

```{r}
best_topologies_per_window %>%
inversion_df %>%
group_by(Inversion) %>%
summarise(window_number = n(),
high_weight_windows = sum(Relative_weight > 0.9),
Expand All @@ -813,7 +815,7 @@ The number of windows with a single topology with a relative wight greater than

```{r}
best_topologies_per_window %>%
inversion_df %>%
filter(Relative_weight > 0.9) %>%
group_by(Inversion, Best_topology) %>%
summarise(window_number = n()) %>%
Expand Down

0 comments on commit 379d07b

Please sign in to comment.