From beb8805a29a6357a375c0e714a6a7bd39e6c954d Mon Sep 17 00:00:00 2001 From: Candace Savonen Date: Mon, 7 Dec 2020 13:58:06 -0500 Subject: [PATCH] RNA-seq DGE dataset switch (#416) * Introduce SRP123625 * Updating wording and some other items * Re-render it * Spell error fixes * jashapiro review suggestions * one more change and re-render --- .../differential-expression_rnaseq_01.Rmd | 157 ++++---- .../differential-expression_rnaseq_01.html | 339 +++++++++--------- 03-rnaseq/pathway-analysis_rnaseq_03_gsva.Rmd | 2 +- components/dictionary.txt | 4 + components/references.bib | 13 + 5 files changed, 259 insertions(+), 256 deletions(-) diff --git a/03-rnaseq/differential-expression_rnaseq_01.Rmd b/03-rnaseq/differential-expression_rnaseq_01.Rmd index c59ecc58..e54ca8c0 100644 --- a/03-rnaseq/differential-expression_rnaseq_01.Rmd +++ b/03-rnaseq/differential-expression_rnaseq_01.Rmd @@ -1,7 +1,7 @@ --- title: "Differential Expression - RNA-seq" author: "CCDL for ALSF" -date: "October 2020" +date: "December 2020" output: html_notebook: toc: true @@ -11,7 +11,13 @@ output: # Purpose of this analysis -This notebook takes RNA-seq data and metadata from refine.bio and identifies differentially expressed genes between experimental groups. +This notebook takes RNA-seq expression data and metadata from refine.bio and identifies differentially expressed genes between two experimental groups. + +Differential expression analysis identifies genes with significantly varying expression among experimental groups by comparing the variation among samples within a group to the variation between groups. +The simplest version of this analysis is comparing two groups where one of those groups is a control group. + +Our refine.bio RNA-seq examples use DESeq2 for these analyses because it handles RNA-seq data well and has great documentation. +Read more about DESeq2 and why we like it on our [Getting Started page](https://alexslemonade.github.io/refinebio-examples/03-rnaseq/00-intro-to-rnaseq.html#02_About_DESeq2). ⬇️ [**Jump to the analysis code**](#analysis) ⬇️ @@ -66,7 +72,7 @@ In the same place you put this `.Rmd` file, you should now have three new empty For general information about downloading data for these examples, see our ['Getting Started' section](https://alexslemonade.github.io/refinebio-examples/01-getting-started/getting-started.html#how-to-get-the-data). -Go to this [dataset's page on refine.bio](https://www.refine.bio/experiments/SRP078441/rna-seq-of-primary-patient-aml-samples). +Go to this [dataset's page on refine.bio](https://www.refine.bio/experiments/SRP123625). Click the "Download Now" button on the right side of this screen. @@ -87,9 +93,9 @@ You will get an email when it is ready. ## About the dataset we are using for this example -For this example analysis, we will use this [acute myeloid leukemia (AML) dataset](https://www.refine.bio/experiments/SRP078441/rna-seq-of-primary-patient-aml-samples) [@Micol2017] - -@Micol2017 performed RNA-seq on primary peripheral blood and bone marrow samples from AML patients with and without _ASXL1/2_ mutations. +For this example analysis, we are using RNA-seq data from an [acute lymphoblastic leukemia (ALL) mouse lymphoid cell model](https://www.refine.bio/experiments/SRP123625) [@Kampen2019]. +All of the lymphoid mouse cell samples in this experiment have a human RPL10 gene; three with a reference (wild-type) RPL10 gene and three with the R98S mutation. +We will perform our differential expression using these knock-in and wild-type mice designations. ## Place the dataset in your new `data/` folder @@ -104,7 +110,7 @@ For more details on the contents of this folder see [these docs on refine.bio](h The `` folder has the data and metadata TSV files you will need for this example analysis. Experiment accession ids usually look something like `GSE1235` or `SRP12345`. -Copy and paste the `SRP078441` folder into your newly created `data/` folder. +Copy and paste the `SRP123625` folder into your newly created `data/` folder. ## Check out our file structure! @@ -112,7 +118,7 @@ Your new analysis folder should contain: - The example analysis `.Rmd` you downloaded - A folder called "data" which contains: - - The `SRP078441` folder which contains: + - The `SRP123625` folder which contains: - The gene expression - The metadata TSV - A folder for `plots` (currently empty) @@ -131,17 +137,17 @@ This is handy to do because if we want to switch the dataset (see next section f ```{r} # Define the file path to the data directory # Replace with the path of the folder the files will be in -data_dir <- file.path("data", "SRP078441") +data_dir <- file.path("data", "SRP123625") # Declare the file path to the gene expression matrix file # inside directory saved as `data_dir` # Replace with the path to your dataset file -data_file <- file.path(data_dir, "SRP078441.tsv") +data_file <- file.path(data_dir, "SRP123625.tsv") # Declare the file path to the metadata file # inside the directory saved as `data_dir` # Replace with the path to your metadata file -metadata_file <- file.path(data_dir, "metadata_SRP078441.tsv") +metadata_file <- file.path(data_dir, "metadata_SRP123625.tsv") ``` Now that our file paths are declared, we can use the `file.exists()` function to check that the files are where we specified above. @@ -226,7 +232,7 @@ We stored our file paths as objects named `metadata_file` and `data_file` in [th metadata <- readr::read_tsv(metadata_file) # Read in data TSV file -df <- readr::read_tsv(data_file) %>% +expression_df <- readr::read_tsv(data_file) %>% tibble::column_to_rownames("Gene") ``` @@ -234,11 +240,11 @@ Let's ensure that the metadata and data are in the same sample order. ```{r} # Make the data in the order of the metadata -df <- df %>% +expression_df <- expression_df %>% dplyr::select(metadata$refinebio_accession_code) # Check if this is in the same order -all.equal(colnames(df), metadata$refinebio_accession_code) +all.equal(colnames(expression_df), metadata$refinebio_accession_code) ``` The information we need to make the comparison is in the `refinebio_title` column of the metadata data.frame. @@ -249,31 +255,25 @@ head(metadata$refinebio_title) ## Set up metadata -This dataset includes data from patients with and without ASXL gene mutations. -The authors of this data have ASXL mutation status along with other information is stored all in one string (this is not very convenient for us). +This dataset includes data from mouse lymphoid cells with human RPL10, with and without a `R98S` mutation. +The mutation status is stored along with other information in a single string (this is not very convenient for us). We need to extract the mutation status information into its own column to make it easier to use. ```{r} metadata <- metadata %>% - # The last bit of the title, separated by "-" contains the mutation - # information that we want to extract - dplyr::mutate(asxl_mutation_status = stringr::word(refinebio_title, - -1, - sep = "-" - )) %>% - # Now let's summarized the ASXL1 mutation status from this variable - dplyr::mutate(asxl_mutation_status = dplyr::case_when( - grepl("ASXL1|ASXL2", asxl_mutation_status) ~ "asxl_mutation", - grepl("ASXLwt", asxl_mutation_status) ~ "no_mutation" + # Let's get the RPL10 mutation status from this variable + dplyr::mutate(mutation_status = dplyr::case_when( + stringr::str_detect(refinebio_title, "R98S") ~ "R98S", + stringr::str_detect(refinebio_title, "WT") ~ "reference" )) ``` -Let's take a look at `metadata_df` to see if this worked. +Let's take a look at `metadata` to see if this worked by looking at the `refinebio_title` and `mutation_status` columns. ```{r} -# looking at the first 6 rows of the metadata_df and only at the columns that -# contain the title and the mutation status we extracted from the title -head(dplyr::select(metadata, refinebio_title, asxl_mutation_status)) +# Let's take a look at the original metadata column's info +# and our new `mutation_status` column +dplyr::select(metadata, refinebio_title, mutation_status) ``` Before we set up our model in the next step, we want to check if our modeling variable is set correctly. @@ -281,29 +281,49 @@ We want our "control" to to be set as the first level in the variable we provide Here we will use the `str()` function to print out a preview of the **str**ucture of our variable ```{r} -# Print out a preview of `asxl_mutation_status` -str(metadata$asxl_mutation_status) +# Print out a preview of `mutation_status` +str(metadata$mutation_status) ``` -Currently, `asxl_mutation_status` is a character. -To make sure it is set how we want for the `DESeq` object and subsequent testing, let's mutate it to a factor so we can explicitly set the levels. +Currently, `mutation_status` is a character, which is not necessarily what we want. +To make sure it is set how we want for the `DESeq` object and subsequent testing, let's change it to a factor so we can explicitly set the levels. + +In the `levels` argument, we will list `reference` first since that is our control group. ```{r} -# Make asxl_mutation_status a factor and set the levels appropriately +# Make mutation_status a factor and set the levels appropriately metadata <- metadata %>% dplyr::mutate( # Here we will set up the factor aspect of our new variable. - asxl_mutation_status = factor(asxl_mutation_status, levels = c("no_mutation", "asxl_mutation")) + mutation_status = factor(mutation_status, levels = c("reference", "R98S")) ) ``` +Note if you don't specify `levels`, the `factor()` function will set levels in alphabetical order -- which sometimes means your control group will not be listed first! + Let's double check if the levels are what we want using the `levels()` function. ```{r} -levels(metadata$asxl_mutation_status) +levels(metadata$mutation_status) +``` + +Yes! `reference` is the first level as we want it to be. +We're all set and ready to move on to making our `DESeq2Dataset` object. + +## Define a minimum counts cutoff + +We want to filter out the genes that have not been expressed or that have low expression counts, since these do not have high enough counts to yield reliable differential expression results. +Removing these genes saves on memory usage during the tests. +We are going to do some pre-filtering to keep only genes with 10 or more reads in total across the samples. + +```{r} +# Define a minimum counts cutoff and filter the data to include +# only rows (genes) that have total counts above the cutoff +filtered_expression_df <- expression_df %>% + dplyr::filter(rowSums(.) >= 10) ``` -Yes! `no_mutation` is the first level as we want it to be. We're all set and ready to move on to making our `DESeq2Dataset` object. +If you have a bigger dataset, you will probably want to make this cutoff larger. ## Create a DESeq2Dataset @@ -312,39 +332,29 @@ First we need to prep our gene expression data frame so it's in the format that ```{r} # We are making our data frame into a matrix and rounding the numbers -gene_matrix <- round(as.matrix(df)) +gene_matrix <- round(as.matrix(filtered_expression_df)) ``` Now we need to create `DESeqDataSet` from our expression dataset. -We use the `asxl_mutation_status` variable we created in the design formula because that will allow us -to model the presence/absence of _ASXL1/2_ mutation. +We use the `mutation_status` variable we created in the design formula because that will allow us +to model the presence/absence of _R98S_ mutation. ```{r} ddset <- DESeqDataSetFromMatrix( + # Here we supply non-normalized count data countData = gene_matrix, + # Supply the `colData` with our metadata data frame colData = metadata, - design = ~asxl_mutation_status + # Supply our experimental variable to `design` + design = ~mutation_status ) ``` -## Define a minimum counts cutoff - -We want to filter out the genes that have not been expressed or that have low expression counts, since these do not have high enough counts to yield reliable differential expression results. -Removing these genes saves on memory usage during the tests. -We are going to do some pre-filtering to keep only genes with 10 or more reads in total across the samples. - -```{r} -# Define a minimum counts cutoff and filter `DESeqDataSet` object to include -# only rows that have counts above the cutoff -genes_to_keep <- rowSums(counts(ddset)) >= 10 -ddset <- ddset[genes_to_keep, ] -``` - ## Run differential expression analysis We'll use the wrapper function `DESeq()` to do our differential expression analysis. -In our `DESeq2` object we designated our `asxl_mutation_status` variable as the `model` argument. -Because of this, the `DESeq` function will use groups defined by `asxl_mutation_status` to test for differential expression. +In our `DESeq2` object we designated our `mutation_status` variable as the `model` argument. +Because of this, the `DESeq` function will use groups defined by `mutation_status` to test for differential expression. ```{r} deseq_object <- DESeq(ddset) @@ -385,8 +395,8 @@ deseq_df <- deseq_results %>% # display tibble::rownames_to_column("Gene") %>% dplyr::mutate(threshold = padj < 0.05) %>% - # let's sort by statistic -- the highest values should be what is up in the - # ASXL mutated samples + # sort by statistic -- the highest values should be genes with higher expression + # RPL10 mutated samples dplyr::arrange(dplyr::desc(log2FoldChange)) ``` @@ -401,10 +411,10 @@ head(deseq_df) To double check what a differentially expressed gene looks like, we can plot one with `DESeq2::plotCounts()` function. ```{r} -plotCounts(ddset, gene = "ENSG00000196074", intgroup = "asxl_mutation_status") +plotCounts(ddset, gene = "ENSMUSG00000026623", intgroup = "mutation_status") ``` -The `mutation` group samples have higher expression of this gene than the control group, which helps assure us that the results are showing us what we are looking for. +The `R98S` mutated samples have higher expression of this gene than the control group, which helps assure us that the results are showing us what we are looking for. ## Save results to TSV @@ -415,7 +425,7 @@ readr::write_tsv( deseq_df, file.path( results_dir, - "SRP078441_differential_expression_results.tsv" # Replace with a relevant output file name + "SRP123625_differential_expression_results.tsv" # Replace with a relevant output file name ) ) ``` @@ -423,26 +433,17 @@ readr::write_tsv( ## Create a volcano plot We'll use the `EnhancedVolcano` package's main function to plot our data [@Zhu2018]. + Here we are plotting the `log2FoldChange` (which was estimated by `lfcShrink` step) on the x axis and `padj` on the y axis. The `padj` variable are the p values corrected with `Benjamini-Hochberg` (the default from the `results()` step). -```{r} -EnhancedVolcano::EnhancedVolcano( - deseq_df, - lab = deseq_df$Gene, # A vector that contains our gene names - x = "log2FoldChange", # The variable in `deseq_df` you want to be plotted on the x axis - y = "padj" # The variable in `deseq_df` you want to be plotted on the y axis -) -``` - -Here the red point is the gene that meets both the default p value and log2 fold change cutoff (which are 10e-6 and 1 respectively). - -We used the adjusted p values for our plot above, so you may want to loosen this cutoff with the `pCutoff` argument (Take a look at all the options for tailoring this plot using `?EnhancedVolcano`). +Because we are using adjusted p values we can feel safe in making our `pCutoff` argument `0.01` (default is `1e-05`). +Take a look at all the options for tailoring this plot using `?EnhancedVolcano`. -Let's make the same plot again, but adjust the `pCutoff` since we are using multiple-testing corrected p values and this time we will assign the plot to our environment as `volcano_plot`. +We will save the plot to our environment as `volcano_plot` to make it easier to save the figure separately later. ```{r} -# We'll assign this as `volcano_plot` this time +# We'll assign this as `volcano_plot` volcano_plot <- EnhancedVolcano::EnhancedVolcano( deseq_df, lab = deseq_df$Gene, @@ -455,13 +456,13 @@ volcano_plot <- EnhancedVolcano::EnhancedVolcano( volcano_plot ``` -This looks pretty good. +This looks pretty good! Let's save it to a PNG. ```{r} ggsave( plot = volcano_plot, - file.path(plots_dir, "SRP078441_volcano_plot.png") + file.path(plots_dir, "SRP123625_volcano_plot.png") ) # Replace with a plot name relevant to your data ``` diff --git a/03-rnaseq/differential-expression_rnaseq_01.html b/03-rnaseq/differential-expression_rnaseq_01.html index 107f307d..0317f2b0 100644 --- a/03-rnaseq/differential-expression_rnaseq_01.html +++ b/03-rnaseq/differential-expression_rnaseq_01.html @@ -2570,7 +2570,7 @@ "vtp_value":"" },{ "function":"__c", - "vtp_value":false + "vtp_value":0 },{ "function":"__aev", "vtp_varType":"URL", @@ -2979,7 +2979,7 @@ function f(){}function h(T){var ja=!0;T&&f();ja&&zj.push(J)}function k(){return function(T){u&&(T=Jg(T));return T}}var l=qg(a),q=b==C.ba,r=l.C[0],p=l.C[1],t=void 0!==p,n=function(T){return d.getWithConfig(T)}, u=void 0!=n(C.M)&&!1!==n(C.M),v=!1!==n(C.mb),x=n(C.lb)||n(C.fa),y=n(C.V),w=n(C.da),z=n(C.ja),A=n(C.Ha),B=xj(A);Dj(B);var D={prefix:x,domain:y,Ib:w,flags:z};if(q){var F=n(C.ka)||{};v&&(Mf(F[C.ob],!!F[C.D])&&cg(Aj,D),$f(D),eg(["aw","dc"],D));n(C.ya)&&gg();F[C.D]&&dg(Aj,F[C.D],F[C.qb],!!F[C.pb],x);Gg(l,d);hj(!1,u,a)}if(b===C.Ea){var M=n(C.xa),P=n(C.wa),W=n(M);if(M===C.Ub&&void 0===W){var Z=lg("aw",D.prefix,u);0===Z.length?P(void 0):1===Z.length?P(Z[0]): P(Z)}else P(W);return}var na=!1===n(C.$d)||!1===n(C.sb);if(!q||!t&&!na)if(!0===n(C.ae)&&(t=!1),!1!==n(C.ca)||t){var J={google_conversion_id:r,google_remarketing_only:!t,onload_callback:d.H,gtm_onFailure:d.F,google_conversion_format:"3",google_conversion_color:"ffffff",google_conversion_domain:"",google_conversion_label:p,google_conversion_language:n(C.Va),google_conversion_value:n(C.za),google_conversion_currency:n(C.va),google_conversion_order_id:n(C.ub), -google_user_id:n(C.vb),google_conversion_page_url:n(C.rb),google_conversion_referrer_url:n(C.Fa),google_gtm:Di()};t&&(J.google_transport_url=oi(A,"/"));J.google_restricted_data_processing=n(C.Uc);tg()&&(J.opt_image_generator=function(){return new Image},J.google_enable_display_cookie_match=!1);!1===n(C.ca)&&(J.google_allow_ad_personalization_signals=!1);J.google_read_gcl_cookie_opt_out= +google_user_id:n(C.vb),google_conversion_page_url:n(C.rb),google_conversion_referrer_url:n(C.Fa),google_gtm:Di()};J.google_gtm_experiments={capi:!0};t&&(J.google_transport_url=oi(A,"/"));J.google_restricted_data_processing=n(C.Uc);tg()&&(J.opt_image_generator=function(){return new Image},J.google_enable_display_cookie_match=!1);!1===n(C.ca)&&(J.google_allow_ad_personalization_signals=!1);J.google_read_gcl_cookie_opt_out= !v;v&&x&&(J.google_gcl_cookie_prefix=x);var K=function(){var T={event:b},ja=d.eventModel;if(!ja)return null;m(ja,T);for(var U=0;UEnsembl Gene ID Annotation
  • Ortholog Mapping
  • Pathway Analysis - ORA
  • +
  • Pathway Analysis - GSVA
  • @@ -3766,14 +3767,17 @@

    Differential Expression - RNA-seq

    CCDL for ALSF

    -

    October 2020

    +

    December 2020

    1 Purpose of this analysis

    -

    This notebook takes RNA-seq data and metadata from refine.bio and identifies differentially expressed genes between experimental groups.

    +

    This notebook takes RNA-seq expression data and metadata from refine.bio and identifies differentially expressed genes between two experimental groups.

    +

    Differential expression analysis identifies genes with significantly varying expression among experimental groups by comparing the variation among samples within a group to the variation between groups. The simplest version of this analysis is comparing two groups where one of those groups is a control group.

    +

    Our refine.bio RNA-seq examples use DESeq2 for these analyses because it handles RNA-seq data well and has great documentation.
    +Read more about DESeq2 and why we like it on our Getting Started page.

    ⬇️ Jump to the analysis code ⬇️

    @@ -3795,7 +3799,7 @@

    2.2 Set up your analysis folders< } # Define the file path to the plots directory -plots_dir <- "plots" # Can replace with path to desired output plots directory +plots_dir <- "plots" # Create the plots folder if it doesn't exist if (!dir.exists(plots_dir)) { @@ -3803,7 +3807,7 @@

    2.2 Set up your analysis folders< } # Define the file path to the results directory -results_dir <- "results" # Can replace with path to desired output results directory +results_dir <- "results" # Create the results folder if it doesn't exist if (!dir.exists(results_dir)) { @@ -3814,7 +3818,7 @@

    2.2 Set up your analysis folders<

    2.3 Obtain the dataset from refine.bio

    For general information about downloading data for these examples, see our ‘Getting Started’ section.

    -

    Go to this dataset’s page on refine.bio.

    +

    Go to this dataset’s page on refine.bio.

    Click the “Download Now” button on the right side of this screen.

    Fill out the pop up window with your email and our Terms and Conditions:

    @@ -3825,8 +3829,7 @@

    2.3 Obtain the dataset from refin

    2.4 About the dataset we are using for this example

    -

    For this example analysis, we will use this acute myeloid leukemia (AML) dataset (Micol et al. 2017)

    -

    Micol et al. (2017) performed RNA-seq on primary peripheral blood and bone marrow samples from AML patients with and without ASXL1/2 mutations.

    +

    For this example analysis, we are using RNA-seq data from an acute lymphoblastic leukemia (ALL) mouse lymphoid cell model (Kampen et al. 2019). All of the lymphoid mouse cell samples in this experiment have a human RPL10 gene; three with a reference (wild-type) RPL10 gene and three with the R98S mutation. We will perform our differential expression using these knock-in and wild-type mice designations.

    2.5 Place the dataset in your new data/ folder

    @@ -3834,7 +3837,7 @@

    2.5 Place the dataset in your new

    For more details on the contents of this folder see these docs on refine.bio.

    The <experiment_accession_id> folder has the data and metadata TSV files you will need for this example analysis. Experiment accession ids usually look something like GSE1235 or SRP12345.

    -

    Copy and paste the SRP078441 folder into your newly created data/ folder.

    +

    Copy and paste the SRP123625 folder into your newly created data/ folder.

    2.6 Check out our file structure!

    @@ -3844,7 +3847,7 @@

    2.6 Check out our file structure!
  • A folder called “data” which contains:
      -
    • The SRP078441 folder which contains: +
    • The SRP123625 folder which contains:
      • The gene expression
      • @@ -3860,15 +3863,20 @@

        2.6 Check out our file structure!

        In order for our example here to run without a hitch, we need these files to be in these locations so we’ve constructed a test to check before we get started with the analysis. These chunks will declare your file paths and double check that your files are in the right place.

        First we will declare our file paths to our data and metadata files, which should be in our data directory. This is handy to do because if we want to switch the dataset (see next section for more on this) we are using for this analysis, we will only have to change the file path here to get started.

        # Define the file path to the data directory
        -data_dir <- file.path("data", "SRP078441") # Replace with accession number which will be the name of the folder the files will be in
        -
        -# Declare the file path to the gene expression matrix file using the data directory saved as `data_dir`
        -data_file <- file.path(data_dir, "SRP078441.tsv") # Replace with file path to your dataset
        -
        -# Declare the file path to the metadata file using the data directory saved as `data_dir`
        -metadata_file <- file.path(data_dir, "metadata_SRP078441.tsv") # Replace with file path to your metadata
        +# Replace with the path of the folder the files will be in +data_dir <- file.path("data", "SRP123625") + +# Declare the file path to the gene expression matrix file +# inside directory saved as `data_dir` +# Replace with the path to your dataset file +data_file <- file.path(data_dir, "SRP123625.tsv") + +# Declare the file path to the metadata file +# inside the directory saved as `data_dir` +# Replace with the path to your metadata file +metadata_file <- file.path(data_dir, "metadata_SRP123625.tsv")

  • Now that our file paths are declared, we can use the file.exists() function to check that the files are where we specified above.

    -
    # Check if the gene expression matrix file is at the file path stored in `data_file`
    +
    # Check if the gene expression matrix file is at the path stored in `data_file`
     file.exists(data_file)
    ## [1] TRUE
    # Check if the metadata file is at the file path stored in `metadata_file`
    @@ -3890,7 +3898,7 @@ 

    4 Differential Expression

    4.1 Install libraries

    See our Getting Started page with instructions for package installation for a list of the other software you will need, as well as more tips and resources.

    -

    In this analysis, we will be using DESeq2 (Love et al. 2014) for the differential expression testing. We will also use EnhancedVolcano for plotting and apeglm for some log fold change estimates in the results table (Zhu et al. 2018; Blighe et al. 2020)

    +

    In this analysis, we will be using DESeq2 (Love et al. 2014) for the differential expression testing. We will also use EnhancedVolcano for plotting and apeglm for some log fold change estimates in the results table (Zhu et al. 2018; Blighe et al. 2020)

    if (!("DESeq2" %in% installed.packages())) {
       # Install this package if it isn't installed yet
       BiocManager::install("DESeq2", update = FALSE)
    @@ -3922,7 +3930,7 @@ 

    4.2 Import data and metadata

    # Read in metadata TSV file
     metadata <- readr::read_tsv(metadata_file)
    ## 
    -## ── Column specification ──────────────────────────────────────────────
    +## ── Column specification ──────────────────────────────────────────────────────────────────────
     ## cols(
     ##   .default = col_logical(),
     ##   refinebio_accession_code = col_character(),
    @@ -3936,107 +3944,96 @@ 

    4.2 Import data and metadata

    ## ) ## ℹ Use `spec()` for the full column specifications.
    # Read in data TSV file
    -df <- readr::read_tsv(data_file) %>%
    +expression_df <- readr::read_tsv(data_file) %>%
       tibble::column_to_rownames("Gene")
    ## 
    -## ── Column specification ──────────────────────────────────────────────
    +## ── Column specification ──────────────────────────────────────────────────────────────────────
     ## cols(
     ##   Gene = col_character(),
    -##   SRR3895734 = col_double(),
    -##   SRR3895735 = col_double(),
    -##   SRR3895736 = col_double(),
    -##   SRR3895737 = col_double(),
    -##   SRR3895738 = col_double(),
    -##   SRR3895739 = col_double(),
    -##   SRR3895740 = col_double(),
    -##   SRR3895741 = col_double(),
    -##   SRR3895742 = col_double(),
    -##   SRR3895743 = col_double(),
    -##   SRR3895744 = col_double(),
    -##   SRR3895745 = col_double(),
    -##   SRR3895746 = col_double(),
    -##   SRR3895747 = col_double(),
    -##   SRR3895748 = col_double(),
    -##   SRR3895749 = col_double()
    +##   SRR6255584 = col_double(),
    +##   SRR6255585 = col_double(),
    +##   SRR6255586 = col_double(),
    +##   SRR6255587 = col_double(),
    +##   SRR6255588 = col_double(),
    +##   SRR6255589 = col_double()
     ## )

    Let’s ensure that the metadata and data are in the same sample order.

    # Make the data in the order of the metadata
    -df <- df %>%
    +expression_df <- expression_df %>%
       dplyr::select(metadata$refinebio_accession_code)
     
     # Check if this is in the same order
    -all.equal(colnames(df), metadata$refinebio_accession_code)
    +all.equal(colnames(expression_df), metadata$refinebio_accession_code)
    ## [1] TRUE

    The information we need to make the comparison is in the refinebio_title column of the metadata data.frame.

    head(metadata$refinebio_title)
    -
    ## [1] "CBF54-BM-ASXLwt"      "49060-2010-BM-ASXLwt"
    -## [3] "CBF234-BM-ASXLwt"     "CBF124-BM-ASXLwt"    
    -## [5] "41267-BM-ASXL2"       "45565-BM-ASXL1"
    +
    ## [1] "R98S11_mRNA_Suppl" "R98S13_mRNA_Suppl" "R98S35_mRNA_Suppl"
    +## [4] "WT28_mRNA_Suppl"   "WT29_mRNA_Suppl"   "WT36_mRNA_Suppl"

    4.3 Set up metadata

    -

    This dataset includes data from patients with and without ASXL gene mutations. The authors of this data have ASXL mutation status along with other information is stored all in one string (this is not very convenient for us). We need to extract the mutation status information into its own column to make it easier to use.

    +

    This dataset includes data from mouse lymphoid cells with human RPL10, with and without a R98S mutation. The mutation status is stored along with other information in a single string (this is not very convenient for us). We need to extract the mutation status information into its own column to make it easier to use.

    metadata <- metadata %>%
    -  # The last bit of the title, separated by "-" contains the mutation
    -  # information that we want to extract
    -  dplyr::mutate(asxl_mutation_status = stringr::word(refinebio_title,
    -    -1,
    -    sep = "-"
    -  )) %>%
    -  # Now let's summarized the ASXL1 mutation status from this variable
    -  dplyr::mutate(asxl_mutation_status = dplyr::case_when(
    -    grepl("ASXL1|ASXL2", asxl_mutation_status) ~ "asxl_mutation",
    -    grepl("ASXLwt", asxl_mutation_status) ~ "no_mutation"
    -  ))
    -

    Let’s take a look at metadata_df to see if this worked.

    -
    # looking at the first 6 rows of the metadata_df and only at the columns that
    -# contain the title and the mutation status we extracted from the title
    -head(dplyr::select(metadata, refinebio_title, asxl_mutation_status))
    + # Let's get the RPL10 mutation status from this variable + dplyr::mutate(mutation_status = dplyr::case_when( + stringr::str_detect(refinebio_title, "R98S") ~ "R98S", + stringr::str_detect(refinebio_title, "WT") ~ "reference" + ))
    +

    Let’s take a look at metadata to see if this worked by looking at the refinebio_title and mutation_status columns.

    +
    # Let's take a look at the original metadata column's info
    +# and our new `mutation_status` column
    +dplyr::select(metadata, refinebio_title, mutation_status)

    Before we set up our model in the next step, we want to check if our modeling variable is set correctly. We want our “control” to to be set as the first level in the variable we provide as our experimental variable. Here we will use the str() function to print out a preview of the structure of our variable

    -
    # Print out a preview of `asxl_mutation_status`
    -str(metadata$asxl_mutation_status)
    -
    ##  chr [1:16] "no_mutation" "no_mutation" "no_mutation" ...
    -

    Currently, asxl_mutation_status is a character. To make sure it is set how we want for the DESeq object and subsequent testing, let’s mutate it to a factor so we can explicitly set the levels.

    -
    # Make asxl_mutation_status a factor and set the levels appropriately
    +
    # Print out a preview of `mutation_status`
    +str(metadata$mutation_status)
    +
    ##  chr [1:6] "R98S" "R98S" "R98S" "reference" "reference" ...
    +

    Currently, mutation_status is a character, which is not necessarily what we want. To make sure it is set how we want for the DESeq object and subsequent testing, let’s change it to a factor so we can explicitly set the levels.

    +

    In the levels argument, we will list reference first since that is our control group.

    +
    # Make mutation_status a factor and set the levels appropriately
     metadata <- metadata %>%
       dplyr::mutate(
         # Here we will set up the factor aspect of our new variable.
    -    asxl_mutation_status = factor(asxl_mutation_status, levels = c("no_mutation", "asxl_mutation"))
    +    mutation_status = factor(mutation_status, levels = c("reference", "R98S"))
       )
    +

    Note if you don’t specify levels, the factor() function will set levels in alphabetical order – which sometimes means your control group will not be listed first!

    Let’s double check if the levels are what we want using the levels() function.

    -
    levels(metadata$asxl_mutation_status)
    -
    ## [1] "no_mutation"   "asxl_mutation"
    -

    Yes! no_mutation is the first level as we want it to be. We’re all set and ready to move on to making our DESeq2Dataset object.

    +
    levels(metadata$mutation_status)
    +
    ## [1] "reference" "R98S"
    +

    Yes! reference is the first level as we want it to be. We’re all set and ready to move on to making our DESeq2Dataset object.

    +
    +
    +

    4.4 Define a minimum counts cutoff

    +

    We want to filter out the genes that have not been expressed or that have low expression counts, since these do not have high enough counts to yield reliable differential expression results. Removing these genes saves on memory usage during the tests. We are going to do some pre-filtering to keep only genes with 10 or more reads in total across the samples.

    +
    # Define a minimum counts cutoff and filter the data to include
    +# only rows (genes) that have total counts above the cutoff
    +filtered_expression_df <- expression_df %>%
    +  dplyr::filter(rowSums(.) >= 10)
    +

    If you have a bigger dataset, you will probably want to make this cutoff larger.

    -

    4.4 Create a DESeq2Dataset

    +

    4.5 Create a DESeq2Dataset

    We will be using the DESeq2 package for differential expression testing, which requires us to format our data into a DESeqDataSet object. First we need to prep our gene expression data frame so it’s in the format that is compatible with the DESeqDataSetFromMatrix() function in the next step.

    -
    # We are making our data frame into a matrix and rounding the numbers
    -gene_matrix <- round(as.matrix(df))
    -

    Now we need to create DESeqDataSet from our expression dataset. We use the asxl_mutation_status variable we created in the design formula because that will allow us to model the presence/absence of ASXL1/2 mutation.

    -
    ddset <- DESeqDataSetFromMatrix(
    -  countData = gene_matrix,
    -  colData = metadata,
    -  design = ~asxl_mutation_status
    -)
    +
    # We are making our data frame into a matrix and rounding the numbers
    +gene_matrix <- round(as.matrix(filtered_expression_df))
    +

    Now we need to create DESeqDataSet from our expression dataset. We use the mutation_status variable we created in the design formula because that will allow us to model the presence/absence of R98S mutation.

    +
    ddset <- DESeqDataSetFromMatrix(
    +  # Here we supply non-normalized count data
    +  countData = gene_matrix,
    +  # Supply the `colData` with our metadata data frame
    +  colData = metadata,
    +  # Supply our experimental variable to `design`
    +  design = ~mutation_status
    +)
    ## converting counts to integer mode
    -
    -

    4.5 Define a minimum counts cutoff

    -

    We want to filter out the genes that have not been expressed or that have low expression counts, since these do not have high enough counts to yield reliable differential expression results. Removing these genes saves on memory usage during the tests. We are going to do some pre-filtering to keep only genes with 10 or more reads in total across the samples.

    -
    # Define a minimum counts cutoff and filter `DESeqDataSet` object to include
    -# only rows that have counts above the cutoff
    -genes_to_keep <- rowSums(counts(ddset)) >= 10
    -ddset <- ddset[genes_to_keep, ]
    -

    4.6 Run differential expression analysis

    -

    We’ll use the wrapper function DESeq() to do our differential expression analysis. In our DESeq2 object we designated our asxl_mutation_status variable as the model argument. Because of this, the DESeq function will use groups defined by asxl_mutation_status to test for differential expression.

    +

    We’ll use the wrapper function DESeq() to do our differential expression analysis. In our DESeq2 object we designated our mutation_status variable as the model argument. Because of this, the DESeq function will use groups defined by mutation_status to test for differential expression.

    deseq_object <- DESeq(ddset)
    ## estimating size factors
    ## estimating dispersions
    @@ -4044,90 +4041,91 @@

    4.6 Run differential expression a
    ## mean-dispersion relationship
    ## final dispersion estimates
    ## fitting model and testing
    -
    ## -- replacing outliers and refitting for 745 genes
    -## -- DESeq argument 'minReplicatesForReplace' = 7 
    -## -- original counts are preserved in counts(dds)
    -
    ## estimating dispersions
    -
    ## fitting model and testing

    Let’s extract the results table from the DESeq object.

    -
    deseq_results <- results(deseq_object)
    +
    deseq_results <- results(deseq_object)

    Here we will use lfcShrink() function to obtain shrunken log fold change estimates based on negative binomial distribution. This will add the estimates to your results table. Using lfcShrink() can help decrease noise and preserve large differences between groups (it requires that apeglm package be installed).

    -
    deseq_results <- lfcShrink(deseq_object, # This is the original DESeq2 object with DESeq() already having been ran
    -  coef = 2, # This is based on what log fold change coefficient was used in DESeq(), the default is 2.
    -  res = deseq_results # This needs to be the DESeq2 results table
    -)
    +
    deseq_results <- lfcShrink(deseq_object, # This is the original DESeq2 object with DESeq() already having been ran
    +  coef = 2, # This is based on what log fold change coefficient was used in DESeq(), the default is 2.
    +  res = deseq_results # This needs to be the DESeq2 results table
    +)
    ## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
     ##     Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
     ##     sequence count data: removing the noise and preserving large differences.
     ##     Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895

    Now let’s take a peek at what our results table looks like.

    -
    head(deseq_results)
    -
    ## log2 fold change (MAP): asxl mutation status asxl mutation vs no mutation 
    -## Wald test p-value: asxl mutation status asxl mutation vs no mutation 
    +
    head(deseq_results)
    +
    ## log2 fold change (MAP): mutation status R98S vs reference 
    +## Wald test p-value: mutation status R98S vs reference 
     ## DataFrame with 6 rows and 5 columns
    -##                    baseMean log2FoldChange      lfcSE    pvalue
    -##                   <numeric>      <numeric>  <numeric> <numeric>
    -## ENSG00000000003   52.852059    9.64776e-07 0.00144269 0.6604581
    -## ENSG00000000005    0.260056    2.03308e-07 0.00144269 0.5791283
    -## ENSG00000000419  406.161355   -1.38160e-06 0.00144268 0.7076357
    -## ENSG00000000457  564.784021   -2.36497e-06 0.00144268 0.3719972
    -## ENSG00000000460  401.130684    3.87280e-06 0.00144269 0.1627644
    -## ENSG00000000938 1500.527448    3.07435e-06 0.00144269 0.0354596
    -##                      padj
    -##                 <numeric>
    -## ENSG00000000003  0.998525
    -## ENSG00000000005        NA
    -## ENSG00000000419  0.998525
    -## ENSG00000000457  0.998525
    -## ENSG00000000460  0.998525
    -## ENSG00000000938  0.720601
    +## baseMean log2FoldChange lfcSE pvalue +## <numeric> <numeric> <numeric> <numeric> +## ENSMUSG00000000001 9579.0571 -0.4349384 0.160640 2.59595e-03 +## ENSMUSG00000000028 1199.7333 0.0647514 0.134708 6.04429e-01 +## ENSMUSG00000000056 1287.5086 0.3243824 0.272978 1.02032e-01 +## ENSMUSG00000000058 20.1703 5.0170059 1.515508 6.85780e-05 +## ENSMUSG00000000078 4939.6277 -0.9574237 0.234363 4.75060e-06 +## ENSMUSG00000000085 1150.9626 0.0929495 0.126941 4.32755e-01 +## padj +## <numeric> +## ENSMUSG00000000001 0.019791734 +## ENSMUSG00000000028 0.808664075 +## ENSMUSG00000000056 0.283225795 +## ENSMUSG00000000058 0.001074535 +## ENSMUSG00000000078 0.000113951 +## ENSMUSG00000000085 0.682936007

    Note it is not filtered or sorted, so we will use tidyverse to do this before saving our results to a file. Sort and filter the results.

    -
    # this is of class DESeqResults -- we want a data frame
    -deseq_df <- deseq_results %>%
    -  # make into data.frame
    -  as.data.frame() %>%
    -  # the gene names are rownames -- let's make this it's own column for easy
    -  # display
    -  tibble::rownames_to_column("Gene") %>%
    -  dplyr::mutate(threshold = padj < 0.05) %>%
    -  # let's sort by statistic -- the highest values should be what is up in the
    -  # ASXL mutated samples
    -  dplyr::arrange(dplyr::desc(log2FoldChange))
    +
    # this is of class DESeqResults -- we want a data frame
    +deseq_df <- deseq_results %>%
    +  # make into data.frame
    +  as.data.frame() %>%
    +  # the gene names are rownames -- let's make this it's own column for easy
    +  # display
    +  tibble::rownames_to_column("Gene") %>%
    +  dplyr::mutate(threshold = padj < 0.05) %>%
    +  # sort by statistic -- the highest values should be genes with higher expression
    +  # RPL10 mutated samples
    +  dplyr::arrange(dplyr::desc(log2FoldChange))

    Let’s print out what the top results are.

    -
    head(deseq_df)
    +
    head(deseq_df)

    4.6.1 Check results by plotting one gene

    To double check what a differentially expressed gene looks like, we can plot one with DESeq2::plotCounts() function.

    -
    plotCounts(ddset, gene = "ENSG00000196074", intgroup = "asxl_mutation_status")
    -

    -

    The mutation group samples have higher expression of this gene than the control group, which helps assure us that the results are showing us what we are looking for.

    +
    plotCounts(ddset, gene = "ENSMUSG00000026623", intgroup = "mutation_status")
    +

    +

    The R98S mutated samples have higher expression of this gene than the control group, which helps assure us that the results are showing us what we are looking for.

    4.7 Save results to TSV

    Write the results table to file.

    -
    readr::write_tsv(
    -  deseq_df,
    -  file.path(
    -    results_dir,
    -    "SRP078441_differential_expression_results.tsv" # Replace with a relevant output file name
    -  )
    -)
    +
    readr::write_tsv(
    +  deseq_df,
    +  file.path(
    +    results_dir,
    +    "SRP123625_differential_expression_results.tsv" # Replace with a relevant output file name
    +  )
    +)

    4.8 Create a volcano plot

    -

    We’ll use the EnhancedVolcano package’s main function to plot our data (Zhu et al. 2018). Here we are plotting the log2FoldChange (which was estimated by lfcShrink step) on the x axis and padj on the y axis. The padj variable are the p values corrected with Benjamini-Hochberg (the default from the results() step).

    -
    EnhancedVolcano::EnhancedVolcano(
    -  deseq_df,
    -  lab = deseq_df$Gene, # A vector that contains our gene names
    -  x = "log2FoldChange", # The variable in `deseq_df` you want to be plotted on the x axis
    -  y = "padj" # The variable in `deseq_df` you want to be plotted on the y axis
    -)
    +

    We’ll use the EnhancedVolcano package’s main function to plot our data (Zhu et al. 2018).

    +

    Here we are plotting the log2FoldChange (which was estimated by lfcShrink step) on the x axis and padj on the y axis. The padj variable are the p values corrected with Benjamini-Hochberg (the default from the results() step).

    +

    Because we are using adjusted p values we can feel safe in making our pCutoff argument 0.01 (default is 1e-05).
    +Take a look at all the options for tailoring this plot using ?EnhancedVolcano.

    +

    We will save the plot to our environment as volcano_plot to make it easier to save the figure separately later.

    +
    # We'll assign this as `volcano_plot`
    +volcano_plot <- EnhancedVolcano::EnhancedVolcano(
    +  deseq_df,
    +  lab = deseq_df$Gene,
    +  x = "log2FoldChange",
    +  y = "padj",
    +  pCutoff = 0.01 # Loosen the cutoff since we supplied corrected p-values
    +)
    ## Registered S3 methods overwritten by 'ggalt':
     ##   method                  from   
     ##   grid.draw.absoluteGrob  ggplot2
    @@ -4135,27 +4133,14 @@ 

    4.8 Create a volcano plot

    ## grobWidth.absoluteGrob ggplot2 ## grobX.absoluteGrob ggplot2 ## grobY.absoluteGrob ggplot2
    -

    -

    Here the red point is the gene that meets both the default p value and log2 fold change cutoff (which are 10e-6 and 1 respectively).

    -

    We used the adjusted p values for our plot above, so you may want to loosen this cutoff with the pCutoff argument (Take a look at all the options for tailoring this plot using ?EnhancedVolcano).

    -

    Let’s make the same plot again, but adjust the pCutoff since we are using multiple-testing corrected p values and this time we will assign the plot to our environment as volcano_plot.

    -
    # We'll assign this as `volcano_plot` this time
    -volcano_plot <- EnhancedVolcano::EnhancedVolcano(
    -  deseq_df,
    -  lab = deseq_df$Gene,
    -  x = "log2FoldChange",
    -  y = "padj",
    -  pCutoff = 0.01 # Loosen the cutoff since we supplied corrected p-values
    -)
    -
    -# Print out plot here
    -volcano_plot
    -

    -

    This looks pretty good. Let’s save it to a PNG.

    -
    ggsave(
    -  plot = volcano_plot,
    -  file.path(plots_dir, "SRP078441_volcano_plot.png")
    -) # Replace with a plot name relevant to your data
    +
    # Print out plot here
    +volcano_plot
    +

    +

    This looks pretty good! Let’s save it to a PNG.

    +
    ggsave(
    +  plot = volcano_plot,
    +  file.path(plots_dir, "SRP123625_volcano_plot.png")
    +) # Replace with a plot name relevant to your data
    ## Saving 7 x 5 in image

    Heatmaps are also a pretty common way to show differential expression results. You can take your results from this example and make a heatmap following our heatmap module.

    @@ -4164,16 +4149,16 @@

    4.8 Create a volcano plot

    5 Further learning resources about this analysis

    6 Session info

    At the end of every analysis, before saving your notebook, we recommend printing out your session info. This helps make your code more reproducible by recording what versions of software and packages you used to run this.

    -
    # Print session info
    -sessioninfo::session_info()
    +
    # Print session info
    +sessioninfo::session_info()
    ## ─ Session info ─────────────────────────────────────────────────────
     ##  setting  value                       
     ##  version  R version 4.0.2 (2020-06-22)
    @@ -4184,7 +4169,7 @@ 

    6 Session info

    ## collate en_US.UTF-8 ## ctype en_US.UTF-8 ## tz Etc/UTC -## date 2020-12-02 +## date 2020-12-07 ## ## ─ Packages ───────────────────────────────────────────────────────── ## package * version date lib source @@ -4225,7 +4210,7 @@

    6 Session info

    ## geneplotter 1.68.0 2020-10-27 [1] Bioconductor ## generics 0.0.2 2018-11-29 [1] RSPM (R 4.0.0) ## GenomeInfoDb * 1.26.1 2020-11-20 [1] Bioconductor -## GenomeInfoDbData 1.2.4 2020-12-01 [1] Bioconductor +## GenomeInfoDbData 1.2.4 2020-11-25 [1] Bioconductor ## GenomicRanges * 1.42.0 2020-10-27 [1] Bioconductor ## getopt 1.20.3 2019-03-22 [1] RSPM (R 4.0.0) ## ggalt 0.4.0 2017-02-15 [1] RSPM (R 4.0.0) @@ -4307,12 +4292,12 @@

    References

    Blighe K., S. Rana, and M. Lewis, 2020 EnhancedVolcano: Publication-ready volcano plots with enhanced colouring and labeling. https://github.com/kevinblighe/EnhancedVolcano

    +
    +

    Kampen K. R., L. Fancello, T. Girardi, G. Rinaldi, and M. Planque et al., 2019 Translatome analysis reveals altered serine and glycine metabolism in t-cell acute lymphoblastic leukemia cells. Nature Communications 10. https://doi.org/10.1038/s41467-019-10508-2

    +

    Love M. I., W. Huber, and S. Anders, 2014 Moderated estimation of fold change and dispersion for RNA-Seq data with DESeq2. Genome Biology 15. https://doi.org/10.1186/s13059-014-0550-8

    -
    -

    Micol J. B., A. Pastore, D. Inoue, N. Duployez, and E. Kim et al., 2017 ASXL2 is essential for haematopoiesis and acts as a haploinsufficient tumour suppressor in leukemia. Nature Communications 8: 15429. https://doi.org/10.1038/ncomms15429

    -

    Zhu A., J. G. Ibrahim, and M. I. Love, 2018 Heavy-tailed prior distributions for sequence count data: Removing the noise and preserving large differences. Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895

    diff --git a/03-rnaseq/pathway-analysis_rnaseq_03_gsva.Rmd b/03-rnaseq/pathway-analysis_rnaseq_03_gsva.Rmd index 4d168388..0b410421 100644 --- a/03-rnaseq/pathway-analysis_rnaseq_03_gsva.Rmd +++ b/03-rnaseq/pathway-analysis_rnaseq_03_gsva.Rmd @@ -20,7 +20,7 @@ This means that GSVA scores will depend on the samples included in the dataset w GSVA determines the relative enrichment of gene sets across samples using a non-parametric approach. GSVA transforms a gene by sample gene expression matrix into a gene set by sample pathway enrichment matrix [@Hanzelmann-github]. -You can use these scores for other downstream analyses like differential expressioon. +You can use these scores for other downstream analyses like differential expression. ⬇️ [**Jump to the analysis code**](#analysis) ⬇️ diff --git a/components/dictionary.txt b/components/dictionary.txt index eb2faac1..f3a79689 100644 --- a/components/dictionary.txt +++ b/components/dictionary.txt @@ -89,6 +89,7 @@ JPEG KEGG limma logFC +lymphoblastic maxBlockSize medulloblastoma microarray’s @@ -120,6 +121,7 @@ prostatectomy PRPS PRS QN +quartile qusage QuSAGE README @@ -128,6 +130,7 @@ ribosomes RMA Rmd RPKMs +RPL RStudio sina ssGSEA @@ -154,6 +157,7 @@ UpSet vivo WGCNA WGCNA's +wild-type WNT Yaari zebrafish diff --git a/components/references.bib b/components/references.bib index d599df57..019e8c45 100644 --- a/components/references.bib +++ b/components/references.bib @@ -354,6 +354,19 @@ @website{intro-microarray-slides url = {http://web1.sph.emory.edu/users/hwu30/teaching/bioc/GE1.pdf} } +@article{Kampen2019, + doi = {10.1038/s41467-019-10508-2}, + year = {2019}, + month = jun, + publisher = {Springer Science and Business Media {LLC}}, + volume = {10}, + number = {1}, + author = {Kim R. Kampen and Laura Fancello and Tiziana Girardi and Gianmarco Rinaldi and M{\'{e}}lanie Planque and Sergey O. Sulima and Fabricio Loayza-Puch and Benno Verbelen and Stijn Vereecke and Jelle Verbeeck and Joyce Op de Beeck and Jonathan Royaert and Pieter Vermeersch and David Cassiman and Jan Cools and Reuven Agami and Mark Fiers and Sarah-Maria Fendt and Kim De Keersmaecker}, + title = {Translatome analysis reveals altered serine and glycine metabolism in T-cell acute lymphoblastic leukemia cells}, + journal = {Nature Communications}, + url = {https://doi.org/10.1038/s41467-019-10508-2}, +} + @article{Kanehisa2000, author = {Kanehisa, M. and Goto, S. }, title = {{KEGG}: {Kyoto} encyclopedia of genes and genomes},