Skip to content

Commit

Permalink
Included a plotdata attribute to the output of get_deg_list() wit…
Browse files Browse the repository at this point in the history
…h log2 fold changes for all genes
  • Loading branch information
almeidasilvaf committed Nov 28, 2023
1 parent a46b1c5 commit 5246079
Show file tree
Hide file tree
Showing 6 changed files with 78 additions and 19 deletions.
37 changes: 34 additions & 3 deletions R/02_de_analyses.R
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,12 @@
#' \item{pvalue}{Numeric, p-value.}
#' \item{padj}{Numeric, P-value adjusted for multiple testing.}
#' }
#'
#' The list contains two additional attributes named \strong{ngenes} (numeric,
#' total number of genes), and \strong{plotdata}, which is a 3-column data
#' frame with variables "gene" (character, gene ID), "lFC_F1_vs_P1" (numeric,
#' log2 fold change between F1 and P1), and "lFC_F1_vs_P2" (numeric,
#' log2 fold change between F1 and P2).
#'
#' @export
#' @rdname get_deg_list
Expand Down Expand Up @@ -99,14 +105,39 @@ get_deg_list <- function(
res_df <- as.data.frame(DESeq2::results(
res, contrast = c(coldata_column, x[1], x[2]), ...
))
res_df <- res_df[!is.na(res_df$padj), ]
res_df <- res_df[res_df$padj < alpha &
abs(res_df$log2FoldChange) > lfcThreshold, ]

return(res_df)
})

# Extract log2foldChange for contrast F1_P1 and F1_P2
cont <- c("F1_vs_P1", "F1_vs_P2")
dge_f1 <- dge_list[cont]
log2fc_df <- lapply(seq_along(dge_f1), function(x) {

stats_df <- data.frame(
Gene = rownames(dge_f1[[x]]),
log2FoldChange = dge_f1[[x]]$log2FoldChange
)
names(stats_df) <- c("Gene", paste0("lFC_", cont[x]))

return(stats_df)
})
log2fc_df <- Reduce(
function(x, y) merge(x, y, by = "Gene", all.x = TRUE),
log2fc_df
)
log2fc_df[is.na(log2fc_df)] <- 0

# Filter DGE list based on user-defined thresholds
dge_list <- lapply(dge_list, function(x) {

res_df <- x[!is.na(x$padj), ]
res_df <- res_df[res_df$padj < alpha &
abs(res_df$log2FoldChange) > lfcThreshold, ]
return(res_df)
})
attributes(dge_list)$ngenes <- ngenes
attributes(dge_list)$plotdata <- log2fc_df

return(dge_list)
}
Expand Down
13 changes: 1 addition & 12 deletions R/03_expression_partitioning.R
Original file line number Diff line number Diff line change
Expand Up @@ -62,18 +62,7 @@ expression_partitioning <- function(deg_list) {
)

# Get a table with log2foldChange for contrasts `F1_vs_P1` and `F1_vs_P2`
log2fc_df <- merge(
data.frame(
Gene = rownames(deg_list$F1_vs_P1),
lFC_F1_vs_P1 = deg_list$F1_vs_P1$log2FoldChange
),
data.frame(
Gene = rownames(deg_list$F1_vs_P2),
lFC_F1_vs_P2 = deg_list$F1_vs_P2$log2FoldChange
),
all = TRUE
)
log2fc_df[is.na(log2fc_df)] <- 0
log2fc_df <- attributes(deg_list)$plotdata

# Combine classification data frame with log2foldchange
class_df <- merge(
Expand Down
Binary file modified data/deg_list.rda
Binary file not shown.
39 changes: 36 additions & 3 deletions inst/script/DATA_ACQUISITION.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,11 @@ this package.
``` r
library(tidyverse)

coldata_path <- "~/meta_data.csv"
counts_path <- "~/TS_count.txt"

# Create colData
coldata <- read_csv("~/meta_data.csv", show_col_types = FALSE) |>
coldata <- read_csv(coldata_path, show_col_types = FALSE) |>
tibble::column_to_rownames("sample") |>
mutate(ploidy_gen = str_c(ploidy, generation, sep = "_")) |>
filter(ploidy_gen %in% c("haploid_Anc", "diploid_Anc", "triploid_Anc")) |>
Expand All @@ -25,7 +28,7 @@ coldata <- read_csv("~/meta_data.csv", show_col_types = FALSE) |>
mutate(Generation = factor(Generation, levels = c("F1", "P1", "P2")))

# Create assay
counts <- read_tsv("~/TS_count.txt", show_col_types = FALSE) |>
counts <- read_tsv(counts_path, show_col_types = FALSE) |>
select(-transcript_id) |>
filter(
!gene_id %in% c(
Expand Down Expand Up @@ -75,7 +78,7 @@ se <- add_midparent_expression(se_chlamy)
deg_list <- get_deg_list(se, spikein_norm = TRUE)

# Save object
usethis::use_data(deg_list, compress = "xz")
usethis::use_data(deg_list, compress = "xz", overwrite = TRUE)
```

## `data/deg_counts.rda`
Expand All @@ -90,3 +93,33 @@ deg_counts <- get_deg_counts(deg_list)
# Save data
usethis::use_data(deg_counts, compress = "xz", overwrite = TRUE)
```

## `data/`

``` r
# Read table with functional annotation (from Phytozome)
annotation <- readr::read_tsv("~/functional_annotation.txt.gz")

# Get a table with genes and GO IDs
go_table <- annotation |>
select(gene = locusName, GO) |>
mutate(gene = str_replace_all(gene, "_4532", "")) |>
separate_longer_delim(GO, delim = " ") |>
dplyr::distinct()

# Replace GO IDs with names
go_plaza <- readr::read_tsv(
"https://ftp.psb.ugent.be/pub/plaza/plaza_public_dicots_05/GO/go.cre.csv.gz",
skip = 8, show_col_types = FALSE
) |>
select(GO = go, description) |>
distinct()

go_chlamy <- go_table |>
left_join(go_plaza) |>
select(gene, GO = description) |>
as.data.frame()

# Save data set
usethis::use_data(go_chlamy, compress = "xz")
```
6 changes: 6 additions & 0 deletions man/get_deg_list.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion vignettes/HybridExpress.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ in the count matrix. The total number of genes in the count matrix
is stored in the `ngenes` attribute of the list returned by `get_deg_list()`:

```{r}
attributes(deg_list)
attributes(deg_list)$ngenes
```

However, since the count matrix usually does not include all genes in the
Expand Down

0 comments on commit 5246079

Please sign in to comment.