Skip to content

Commit

Permalink
Move vignettes
Browse files Browse the repository at this point in the history
  • Loading branch information
saketkc committed Jan 12, 2024
1 parent ebee898 commit 8f2912e
Show file tree
Hide file tree
Showing 6 changed files with 592 additions and 0 deletions.
105 changes: 105 additions & 0 deletions vignettes/_MultinomialModeling-Germany.Rmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
---
title: "Multinomial modeling - Germany"
output:
html_document:
df_print: paged
---


```{r}
suppressPackageStartupMessages({
library(covmuller)
library(tidyverse)
})
theme_set(CovmullerTheme())
```

```{r, warning=FALSE, message=FALSE}
gisaid_metadata <- qs::qread(file = "~/data/epicov/metadata_tsv_2024_01_11.qs")
gisaid_germany <- gisaid_metadata %>%
filter(Country == "Germany") %>%
filter(Host == "Human")
gisaid_germany <- FormatGISAIDMetadata(gisaid_germany) %>%
filter(State != "") %>%
filter(pangolin_lineage != "Unassigned")
```

## Plot total sequenced cases


```{r, fig.width=8, fig.height=5, warning=FALSE}
country_seq_stats <- TotalSequencesPerMonthCountrywise(gisaid_germany, rename_country_as_state = TRUE)
p0 <- BarPlot(country_seq_stats, ylabel = "Sequenced per month", color = "slateblue1", label_si = TRUE, xangle = 90, title = "Germany")
p0
```

# Plot stateswise sequenced cases

```{r, fig.width=8, fig.height=5, warning=FALSE}
state_seq_stats <- TotalSequencesPerMonthStatewise(gisaid_germany)
state_seq_stats_summary <- state_seq_stats %>%
group_by(State) %>%
summarise(value = sum(value))
p1 <- BarPlot(state_seq_stats_summary, xaxis = "State", ylabel = "Sequenced per month", color = "slateblue1", label_si = TRUE, xangle = 90, title = "Germany")
p1
```

## Get VOCs

```{r}
vocs <- GetVOCs()
omicron <- vocs[["omicron"]]
vocs[["omicron"]] <- NULL
custom_voc_mapping <- list(
`BA.1.1` = "BA.1",
`BA.1` = "BA.1",
`BA.2` = "BA.2",
`BA.2.10` = "BA.2.X",
`BA.2.10.1` = "BA.2.X",
`BA.2.12` = "BA.2.X",
`BA.2.12.1` = "BA.2.X",
`BA.3` = "BA.3",
`BA.4` = "BA.4",
`BA.5` = "BA.5",
`BA.2.74` = "BA.2.X",
`BA.2.75` = "BA.2.75",
`BA.2.76` = "BA.2.X",
`XBB.*` = "XBB",
`BQ.1` = "BQ.1+",
`BQ.1.*` = "BQ.1+"
)
```

```{r}
gisaid_germany_collapsed <- CollapseLineageToVOCs(
variant_df = gisaid_germany,
vocs = vocs,
custom_voc_mapping = custom_voc_mapping,
summarize = FALSE
)
gisaid_germany_collapsed_sel <- gisaid_germany_collapsed %>%
filter(MonthYearCollected >= "Oct 2022") %>%
filter(lineage_collapsed != "Unassigned") %>%
filter(State != "Unknown")
vocs_to_keep <- table(gisaid_germany_collapsed_sel$lineage_collapsed)
vocs_to_keep <- vocs_to_keep[vocs_to_keep > 50]
gisaid_germany_collapsed_sel <- gisaid_germany_collapsed_sel %>% filter(lineage_collapsed %in% names(vocs_to_keep))
gisaid_germany_shared_dateweek <- SummarizeVariantsDatewise(gisaid_germany_collapsed_sel, by_state = TRUE)
head(gisaid_germany_shared_dateweek)
```

```{r}
fit_germany_multi_predsbystate <- FitMultinomStatewiseDaily(gisaid_germany_shared_dateweek)
head(fit_germany_multi_predsbystate)
```

# Plot Smooth Muller Plots

```{r, fig.width=13, fig.height=13}
muller_germanybystate_mfit <- PlotMullerDailyPrevalence(fit_germany_multi_predsbystate, ncol = 3)
muller_germanybystate_mfit
```
97 changes: 97 additions & 0 deletions vignettes/_MultinomialModeling_Australia.Rmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
---
title: "Multinomial modeling - Australia"
output:
html_document:
df_print: paged
---


```{r}
suppressPackageStartupMessages({
library(covmuller)
library(tidyverse)
})
theme_set(CovmullerTheme())
```

```{r, warning=FALSE, message=FALSE}
gisaid_metadata <- qs::qread(file = "~/data/epicov/metadata_tsv_2024_01_11.qs")
gisaid_australia <- gisaid_metadata %>%
filter(Country == "Australia") %>%
filter(Host == "Human")
gisaid_australia <- FormatGISAIDMetadata(gisaid_australia)
gisaid_australia$State <- gsub(pattern = "?", replacement = "", x = gisaid_australia$State)
gisaid_australia <- gisaid_australia %>%
filter(State != "") %>%
arrange(State, MonthYearCollected)
gisaid_australia <- gisaid_australia %>% filter(State != "Unknown")
```

## Plot total sequenced cases


```{r, fig.width=8, fig.height=5, warning=FALSE}
country_seq_stats <- TotalSequencesPerMonthCountrywise(gisaid_australia, rename_country_as_state = TRUE)
p0 <- BarPlot(country_seq_stats, ylabel = "Sequenced per month", color = "slateblue1", label_si = TRUE, xangle = 90, title = "Australia")
p0
```

## Get VOCs

```{r}
vocs <- GetVOCs()
omicron <- vocs[["omicron"]]
vocs[["omicron"]] <- NULL
custom_voc_mapping <- list(
`BA.1.1` = "BA.1",
`BA.1` = "BA.1",
`BA.2` = "BA.2",
`BA.2.10` = "BA.2.X",
`BA.2.10.1` = "BA.2.X",
`BA.2.12` = "BA.2.X",
`BA.2.12.1` = "BA.2.X",
`BA.3` = "BA.3",
`BA.4` = "BA.4",
`BA.5` = "BA.5",
`BA.2.74` = "BA.2.X",
`BA.2.75` = "BA.2.75",
`BA.2.76` = "BA.2.X",
`XBB.*` = "XBB",
`BQ.1` = "BQ.1+",
`BQ.1.*` = "BQ.1+"
)
```


```{r}
gisaid_australia_collapsed <- CollapseLineageToVOCs(
variant_df = gisaid_australia,
vocs = vocs,
custom_voc_mapping = custom_voc_mapping,
summarize = FALSE
)
gisaid_australia_collapsed_sel <- gisaid_australia_collapsed %>%
filter(MonthYearCollected >= "Oct 2022") %>%
filter(lineage_collapsed != "Unassigned")
vocs_to_keep <- table(gisaid_australia_collapsed_sel$lineage_collapsed)
vocs_to_keep <- vocs_to_keep[vocs_to_keep > 100]
gisaid_australia_collapsed_sel <- gisaid_australia_collapsed_sel %>% filter(lineage_collapsed %in% names(vocs_to_keep))
gisaid_australia_shared_dateweek <- SummarizeVariantsDatewise(gisaid_australia_collapsed_sel, by_state = TRUE)
head(gisaid_australia_shared_dateweek)
```

```{r}
fit_australia_multi_predsbystate <- FitMultinomStatewiseDaily(gisaid_australia_shared_dateweek)
head(fit_australia_multi_predsbystate)
```

# Plot Smooth Muller Plots

```{r, fig.width=12, fig.height=9}
muller_australiabystate_mfit <- PlotMullerDailyPrevalence(fit_australia_multi_predsbystate, ncol = 3)
muller_australiabystate_mfit
```
92 changes: 92 additions & 0 deletions vignettes/_MultinomialModeling_Canada.Rmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
---
title: "Multinomial modeling - Canada"
output:
html_document:
df_print: paged
---


```{r}
suppressPackageStartupMessages({
library(covmuller)
library(tidyverse)
})
theme_set(CovmullerTheme())
```

```{r, warning=FALSE, message=FALSE}
gisaid_metadata <- qs::qread(file = "~/data/epicov/metadata_tsv_2024_01_11.qs")
gisaid_canada <- gisaid_metadata %>%
filter(Country == "Canada") %>%
filter(Host == "Human")
gisaid_canada <- FormatGISAIDMetadata(gisaid_canada)
gisaid_canada <- gisaid_canada %>%
arrange(State, MonthYearCollected)
gisaid_canada$State <- CleanCanadaStates(gisaid_canada$State)
gisaid_canada <- gisaid_canada %>% filter(State != "Unknown")
```
## Plot total sequenced cases


```{r, fig.width=8, fig.height=5, warning=FALSE}
country_seq_stats <- TotalSequencesPerMonthCountrywise(gisaid_canada, rename_country_as_state = TRUE)
p0 <- BarPlot(country_seq_stats, ylabel = "Sequenced per month", color = "slateblue1", label_si = TRUE, xangle = 90, title = "Canada")
p0
```

## Get VOCs

```{r}
vocs <- GetVOCs()
omicron <- vocs[["omicron"]]
vocs[["omicron"]] <- NULL
custom_voc_mapping <- list(
`BA.1.1` = "BA.1",
`BA.1` = "BA.1",
`BA.2` = "BA.2",
`BA.2.10` = "BA.2.X",
`BA.2.10.1` = "BA.2.X",
`BA.2.12` = "BA.2.X",
`BA.2.12.1` = "BA.2.X",
`BA.3` = "BA.3",
`BA.4` = "BA.4",
`BA.5` = "BA.5",
`BA.2.74` = "BA.2.X",
`BA.2.75` = "BA.2.75",
`BA.2.76` = "BA.2.X",
`XBB.*` = "XBB",
`BQ.1` = "BQ.1+",
`BQ.1.*` = "BQ.1+"
)
```


```{r}
gisaid_canada_collapsed <- CollapseLineageToVOCs(
variant_df = gisaid_canada,
vocs = vocs,
custom_voc_mapping = custom_voc_mapping,
summarize = FALSE
)
gisaid_canada_collapsed_sel <- gisaid_canada_collapsed %>% filter(MonthYearCollected >= "Oct 2022")
vocs_to_keep <- table(gisaid_canada_collapsed_sel$lineage_collapsed)
vocs_to_keep <- vocs_to_keep[vocs_to_keep > 100]
gisaid_canada_collapsed_sel <- gisaid_canada_collapsed_sel %>% filter(lineage_collapsed %in% names(vocs_to_keep))
gisaid_canada_shared_dateweek <- SummarizeVariantsDatewise(gisaid_canada_collapsed_sel, by_state = TRUE)
head(gisaid_canada_shared_dateweek)
```

```{r}
fit_canada_multi_predsbystate <- FitMultinomStatewiseDaily(gisaid_canada_shared_dateweek)
head(fit_canada_multi_predsbystate)
```

# Plot Smooth Muller Plots

```{r, fig.width=12, fig.height=9}
muller_canadabystate_mfit <- PlotMullerDailyPrevalence(fit_canada_multi_predsbystate, ncol = 3)
muller_canadabystate_mfit
```
99 changes: 99 additions & 0 deletions vignettes/_MultinomialModeling_India.Rmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
---
title: "Multinomial modeling - India"
output:
html_document:
df_print: paged
---


```{r}
suppressPackageStartupMessages({
library(covmuller)
library(tidyverse)
})
theme_set(CovmullerTheme())
```

```{r, warning=FALSE, message=FALSE}
gisaid_metadata <- qs::qread(file = "~/data/epicov/metadata_tsv_2024_01_11.qs")
gisaid_india <- FilterGISAIDIndia(gisaid_metadata_all = gisaid_metadata)
```

## Plot total sequenced cases


```{r, fig.width=8, fig.height=5, warning=FALSE}
country_seq_stats <- TotalSequencesPerMonthCountrywise(gisaid_india, rename_country_as_state = TRUE)
p0 <- BarPlot(country_seq_stats, ylabel = "Sequenced per month", color = "slateblue1", label_si = TRUE, xangle = 90, title = "India")
p0
```

# Plot stateswise sequenced cases

```{r, fig.width=8, fig.height=5, warning=FALSE}
state_seq_stats <- TotalSequencesPerMonthStatewise(gisaid_india)
state_seq_stats_summary <- state_seq_stats %>%
group_by(State) %>%
summarise(value = sum(value))
state_seq_stats_summary$State[state_seq_stats_summary$State == "Dadra and Nagar Haveli and Daman and Diu"] <- "Dadra/N Haveli/Daman/Diu"
p1 <- BarPlot(state_seq_stats_summary, xaxis = "State", ylabel = "Total sequences deposited", color = "slateblue1", label_si = TRUE, xangle = 90, title = "India")
p1
```

## Get VOCs

```{r}
vocs <- GetVOCs()
omicron <- vocs[["omicron"]]
vocs[["omicron"]] <- NULL
custom_voc_mapping <- list(
`BA.1.1` = "BA.1",
`BA.1` = "BA.1",
`BA.2` = "BA.2",
`BA.2.10` = "BA.2.X",
`BA.2.10.1` = "BA.2.X",
`BA.2.12` = "BA.2.X",
`BA.2.12.1` = "BA.2.X",
`BA.3` = "BA.3",
`BA.4` = "BA.4",
`BA.5` = "BA.5",
`BA.2.74` = "BA.2.X",
`BA.2.75` = "BA.2.75",
`BA.2.76` = "BA.2.X",
`XBB.*` = "XBB",
`BQ.1` = "BQ.1+",
`BQ.1.*` = "BQ.1+"
)
```

```{r}
gisaid_india_collapsed <- CollapseLineageToVOCs(
variant_df = gisaid_india,
vocs = vocs,
custom_voc_mapping = custom_voc_mapping,
summarize = FALSE
)
gisaid_india_collapsed_sel <- gisaid_india_collapsed %>%
filter(MonthYearCollected >= "Oct 2022") %>%
filter(lineage_collapsed != "Unassigned")
vocs_to_keep <- table(gisaid_india_collapsed_sel$lineage_collapsed)
vocs_to_keep <- vocs_to_keep[vocs_to_keep > 100]
gisaid_india_collapsed_sel <- gisaid_india_collapsed_sel %>% filter(lineage_collapsed %in% names(vocs_to_keep))
gisaid_india_shared_dateweek <- SummarizeVariantsDatewise(gisaid_india_collapsed_sel, by_state = TRUE)
head(gisaid_india_shared_dateweek)
```

```{r}
fit_india_multi_predsbystate <- FitMultinomStatewiseDaily(gisaid_india_shared_dateweek)
head(fit_india_multi_predsbystate)
```

# Plot Smooth Muller Plots

```{r, fig.width=13, fig.height=13}
muller_indiabystate_mfit <- PlotMullerDailyPrevalence(fit_india_multi_predsbystate)
muller_indiabystate_mfit
```
Loading

0 comments on commit 8f2912e

Please sign in to comment.