Skip to content

Commit

Permalink
updated notebooks paths
Browse files Browse the repository at this point in the history
  • Loading branch information
joseale2310 committed Apr 14, 2023
1 parent f540c55 commit 4f94411
Show file tree
Hide file tree
Showing 10 changed files with 39 additions and 41 deletions.
8 changes: 4 additions & 4 deletions Notebooks/05b_count_matrix.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ For the sake of reproducibility, we will be using the backup results from our pr

```{r}
# Tabulated separated files can be opened using the read_table() function.
read_table("/work/sequencing_data/Preprocessing_backup/results_salmon/salmon/Control_1/quant.sf", ) %>% head()
read_table("/work/Intro_to_bulkRNAseq/Data/salmon/Control_1/quant.sf", ) %>% head()
```

For each transcript that was assayed in the reference, we have:
Expand All @@ -83,7 +83,7 @@ We will use the `samplesheet.csv` file that we use to process our raw reads, sin

```{r}
# Load metadata
meta <- read_csv("../Data/samplesheet.csv")
meta <- read_csv("/work/Intro_to_bulkRNAseq/Data/samplesheet.csv")
# View metadata
meta
Expand All @@ -93,7 +93,7 @@ Using the samples column, we can create all the paths needed:

```{r}
# Directory where salmon files are. You can change this path to the results of your own analysis
dir <- "/work/sequencing_data/Preprocessing_backup/results_salmon"
dir <- "/work/Intro_to_bulkRNAseq/Data"
# List all directories containing quant.sf files using the samplename column of metadata
files <- file.path(dir,"salmon", meta$sample, "quant.sf")
Expand All @@ -106,7 +106,7 @@ files
Our Salmon files were generated with transcript sequences listed by Ensembl IDs, but `tximport` needs to know **which genes these transcripts came from**. We will use annotation table the that was created in our workflow, called `tx2gene.txt`.

```{r}
tx2gene <- read_table("/work/sequencing_data/Preprocessing_backup/results_salmon/salmon/salmon_tx2gene.tsv", col_names = c("transcript_ID","gene_ID","gene_symbol"))
tx2gene <- read_table("/work/Intro_to_bulkRNAseq/Data/salmon_tx2gene.tsv", col_names = c("transcript_ID","gene_ID","gene_symbol"))
tx2gene %>% head()
```

Expand Down
10 changes: 5 additions & 5 deletions Notebooks/05c_count_normalization.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,9 @@ library(DESeq2)
library(tximport)
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
meta <- read_csv("../Data/samplesheet.csv")
dir <- "/work/sequencing_data/Preprocessing_backup/results_salmon/salmon"
tx2gene <- read_table(file.path(dir,"salmon_tx2gene.tsv"), col_names = c("transcript_ID","gene_ID","gene_symbol"))
meta <- read_csv("/work/Intro_to_bulkRNAseq/Data/samplesheet.csv")
dir <- "/work/Intro_to_bulkRNAseq/Data/salmon"
tx2gene <- read_table("/work/Intro_to_bulkRNAseq/Data/salmon_tx2gene.tsv", col_names = c("transcript_ID","gene_ID","gene_symbol"))
files <- file.path(dir, meta$sample, "quant.sf")
names(files) <- meta$sample
txi <- tximport(files, type="salmon", tx2gene=tx2gene, countsFromAbundance = "lengthScaledTPM", ignoreTxVersion = TRUE)
Expand Down Expand Up @@ -156,7 +156,7 @@ dds <- DESeqDataSetFromTximport(txi,
```{r, eval=FALSE}
## DO NOT RUN!
## Create DESeq2Dataset object from traditional count matrix
dds <- DESeqDataSetFromMatrix(countData = "../Data/Mov10_full_counts.txt",
dds <- DESeqDataSetFromMatrix(countData = "/work/Intro_to_bulkRNAseq/Data/Mov10_counts_traditional.txt",
colData = meta %>% column_to_rownames("sample"),
design = ~ sampletype)
```
Expand Down Expand Up @@ -209,5 +209,5 @@ head(normalized_counts)
We can save this normalized data matrix to file for later use:

```{r}
write.table(normalized_counts, file="../Results/normalized_counts.txt", sep="\t", quote=F)
write.table(normalized_counts, file="/work/Intro_to_bulkRNAseq/Results/normalized_counts.txt", sep="\t", quote=F)
```
6 changes: 3 additions & 3 deletions Notebooks/06_exploratory_analysis.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,9 @@ library(DESeq2)
library(tximport)
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
meta <- read_csv("../Data/samplesheet.csv")
dir <- "/work/sequencing_data/Preprocessing_backup/results_salmon/salmon"
tx2gene <- read_table(file.path(dir,"salmon_tx2gene.tsv"), col_names = c("transcript_ID","gene_ID","gene_symbol"))
meta <- read_csv("/work/Intro_to_bulkRNAseq/Data/samplesheet.csv")
dir <- "/work/Intro_to_bulkRNAseq/Data/salmon"
tx2gene <- read_table("/work/Intro_to_bulkRNAseq/Data/salmon_tx2gene.tsv", col_names = c("transcript_ID","gene_ID","gene_symbol"))
files <- file.path(dir, meta$sample, "quant.sf")
names(files) <- meta$sample
txi <- tximport(files, type="salmon", tx2gene=tx2gene, countsFromAbundance = "lengthScaledTPM", ignoreTxVersion = TRUE)
Expand Down
6 changes: 3 additions & 3 deletions Notebooks/07a_DEA.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,9 @@ library(DESeq2)
library(tximport)
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
meta <- read_csv("../Data/samplesheet.csv")
dir <- "/work/sequencing_data/Preprocessing_backup/results_salmon/salmon"
tx2gene <- read_table(file.path(dir,"salmon_tx2gene.tsv"), col_names = c("transcript_ID","gene_ID","gene_symbol"))
meta <- read_csv("/work/Intro_to_bulkRNAseq/Data/samplesheet.csv")
dir <- "/work/Intro_to_bulkRNAseq/Data/salmon"
tx2gene <- read_table("/work/Intro_to_bulkRNAseq/Data/salmon_tx2gene.tsv", col_names = c("transcript_ID","gene_ID","gene_symbol"))
files <- file.path(dir, meta$sample, "quant.sf")
names(files) <- meta$sample
txi <- tximport(files, type="salmon", tx2gene=tx2gene, countsFromAbundance = "lengthScaledTPM", ignoreTxVersion = TRUE)
Expand Down
6 changes: 3 additions & 3 deletions Notebooks/07b_hypothesis_testing.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,9 @@ library(DESeq2)
library(tximport)
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
meta <- read_csv("../Data/samplesheet.csv")
dir <- "/work/sequencing_data/Preprocessing_backup/results_salmon/salmon"
tx2gene <- read_table(file.path(dir,"salmon_tx2gene.tsv"), col_names = c("transcript_ID","gene_ID","gene_symbol"))
meta <- read_csv("/work/Intro_to_bulkRNAseq/Data/samplesheet.csv")
dir <- "/work/Intro_to_bulkRNAseq/Data/salmon"
tx2gene <- read_table("/work/Intro_to_bulkRNAseq/Data/salmon_tx2gene.tsv", col_names = c("transcript_ID","gene_ID","gene_symbol"))
files <- file.path(dir, meta$sample, "quant.sf")
names(files) <- meta$sample
txi <- tximport(files, type="salmon", tx2gene=tx2gene, countsFromAbundance = "lengthScaledTPM", ignoreTxVersion = TRUE)
Expand Down
8 changes: 4 additions & 4 deletions Notebooks/07c_DEA_visualization.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,9 @@ library(ggrepel)
library(tximport)
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
meta <- read_csv("../Data/samplesheet.csv")
dir <- "/work/sequencing_data/Preprocessing_backup/results_salmon/salmon"
tx2gene <- read_table(file.path(dir,"salmon_tx2gene.tsv"), col_names = c("transcript_ID","gene_ID","gene_symbol"))
meta <- read_csv("/work/Intro_to_bulkRNAseq/Data/samplesheet.csv")
dir <- "/work/Intro_to_bulkRNAseq/Data/salmon"
tx2gene <- read_table("/work/Intro_to_bulkRNAseq/Data/salmon_tx2gene.tsv", col_names = c("transcript_ID","gene_ID","gene_symbol"))
files <- file.path(dir, meta$sample, "quant.sf")
names(files) <- meta$sample
txi <- tximport(files, type="salmon", tx2gene=tx2gene, countsFromAbundance = "lengthScaledTPM", ignoreTxVersion = TRUE)
Expand Down Expand Up @@ -218,7 +218,7 @@ In addition to plotting subsets, we could also extract the normalized values of
```{r}
### Extract normalized expression for significant genes from the OE and control samples
### also get gene name
norm_OEsig <- normalized_counts %>% select(gene, starts_with("Control"), starts_with("Mov10_oe"))
norm_OEsig <- normalized_counts %>% select(gene, starts_with("Control"), starts_with("Mov10_oe")) %>%
dplyr::filter(gene %in% sigOE$gene)
```

Expand Down
9 changes: 4 additions & 5 deletions Notebooks/08a_FA_genomic_annotation.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -41,17 +41,16 @@ library(DESeq2)
library(tximport)
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
meta <- read_csv("../Data/samplesheet.csv")
dir <- "/work/sequencing_data/Preprocessing_backup/results_salmon/salmon"
tx2gene <- read_table(file.path(dir,"salmon_tx2gene.tsv"), col_names = c("transcript_ID","gene_ID","gene_symbol"))
meta <- read_csv("/work/Intro_to_bulkRNAseq/Data/samplesheet.csv")
dir <- "/work/Intro_to_bulkRNAseq/Data/salmon"
tx2gene <- read_table("/work/Intro_to_bulkRNAseq/Data/salmon_tx2gene.tsv", col_names = c("transcript_ID","gene_ID","gene_symbol"))
files <- file.path(dir, meta$sample, "quant.sf")
names(files) <- meta$sample
txi <- tximport(files, type="salmon", tx2gene=tx2gene, countsFromAbundance = "lengthScaledTPM", ignoreTxVersion = TRUE)
dds <- DESeqDataSetFromTximport(txi,
colData = meta %>% column_to_rownames("sample"),
design = ~ condition)
keep <- rowSums(counts(dds)) >= 10
keep <- rowSums(counts(dds)) > 0
dds <- dds[keep,]
dds <- DESeq(dds)
Expand Down
12 changes: 6 additions & 6 deletions Notebooks/08b_FA_overrepresentation.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -43,19 +43,19 @@ library(annotables)
library(tximport)
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
meta <- read_csv("../Data/samplesheet.csv")
dir <- "/work/sequencing_data/Preprocessing_backup/results_salmon/salmon"
tx2gene <- read_table(file.path(dir,"salmon_tx2gene.tsv"), col_names = c("transcript_ID","gene_ID","gene_symbol"))
meta <- read_csv("/work/Intro_to_bulkRNAseq/Data/samplesheet.csv")
dir <- "/work/Intro_to_bulkRNAseq/Data/salmon"
tx2gene <- read_table("/work/Intro_to_bulkRNAseq/Data/salmon_tx2gene.tsv", col_names = c("transcript_ID","gene_ID","gene_symbol"))
files <- file.path(dir, meta$sample, "quant.sf")
names(files) <- meta$sample
txi <- tximport(files, type="salmon", tx2gene=tx2gene, countsFromAbundance = "lengthScaledTPM", ignoreTxVersion = TRUE)
dds <- DESeqDataSetFromTximport(txi,
colData = meta %>% column_to_rownames("sample"),
design = ~ condition)
keep <- rowSums(counts(dds)) >= 10
keep <- rowSums(counts(dds)) > 0
dds <- dds[keep,]
dds <- DESeq(dds)
res_tableOE <- lfcShrink(dds, coef = "condition_MOV10_overexpression_vs_control")
Expand Down Expand Up @@ -129,7 +129,7 @@ Let's check the results:
cluster_summary <- data.frame(ego)
cluster_summary
write.csv(cluster_summary, "../Results/clusterProfiler_Mov10oe.csv")
write.csv(cluster_summary, "/work/Intro_to_bulkRNAseq/Results/clusterProfiler_Mov10oe.csv")
```

***
Expand Down
11 changes: 5 additions & 6 deletions Notebooks/08c_FA_GSEA.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -43,17 +43,16 @@ library(annotables)
library(tximport)
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
meta <- read_csv("../Data/samplesheet.csv")
dir <- "/work/sequencing_data/Preprocessing_backup/results_salmon/salmon"
tx2gene <- read_table(file.path(dir,"salmon_tx2gene.tsv"), col_names = c("transcript_ID","gene_ID","gene_symbol"))
meta <- read_csv("/work/Intro_to_bulkRNAseq/Data/samplesheet.csv")
dir <- "/work/Intro_to_bulkRNAseq/Data/salmon"
tx2gene <- read_table("/work/Intro_to_bulkRNAseq/Data/salmon_tx2gene.tsv", col_names = c("transcript_ID","gene_ID","gene_symbol"))
files <- file.path(dir, meta$sample, "quant.sf")
names(files) <- meta$sample
txi <- tximport(files, type="salmon", tx2gene=tx2gene, countsFromAbundance = "lengthScaledTPM", ignoreTxVersion = TRUE)
dds <- DESeqDataSetFromTximport(txi,
colData = meta %>% column_to_rownames("sample"),
design = ~ condition)
keep <- rowSums(counts(dds)) >= 10
keep <- rowSums(counts(dds)) > 0
dds <- dds[keep,]
dds <- DESeq(dds)
Expand Down Expand Up @@ -134,7 +133,7 @@ head(gseaKEGG_results)

```{r}
## Write GSEA results to file
write.csv(gseaKEGG_results, "../Results/gseaOE_kegg.csv", quote=F)
write.csv(gseaKEGG_results, "/work/Intro_to_bulkRNAseq/Results/gseaOE_kegg.csv", quote=F)
```

> ***NOTE:** We will all get different results for the GSEA because the permutations performed use random reordering. If we would like to use the same permutations every time we run a function (i.e. we would like the same results every time we run the function), then we could use the `set.seed(123456)` function prior to running. The input to `set.seed()` could be any number, but if you would want the same results, then you would need to use the same number as input.*
Expand Down
4 changes: 2 additions & 2 deletions Notebooks/09_summarized_workflow.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -85,12 +85,12 @@ dds <- DESeqDataSetFromMatrix(countData = data %>% column_to_rownames("GeneSymbo
Load samplesheet with all our metadata from our pipeline
```{r}
# Load data, metadata and tx2gene and create a txi object
meta <- read_csv("../Data/samplesheet.csv")
meta <- read_csv("/work/Intro_to_bulkRNAseq/Data/samplesheet.csv")
```

Create a list of salmon results
```{r}
dir <- "/work/sequencing_data/Preprocessing_backup/results_salmon/salmon"
dir <- "/work/Intro_to_bulkRNAseq/Data/salmon"
tx2gene <- read_table(file.path(dir,"salmon_tx2gene.tsv"), col_names = c("transcript_ID","gene_ID","gene_symbol"))
# Get all salmon results files
Expand Down

0 comments on commit 4f94411

Please sign in to comment.