diff --git a/.Rbuildignore b/.Rbuildignore index 6ca55cba..41e4608e 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -14,3 +14,6 @@ $run_dev.* ^pkgdown$ ^codecov\.yml$ ^\.github$ +^spatialLIBD\.Rproj$ +^doc$ +^Meta$ diff --git a/.github/workflows/check-bioc.yml b/.github/workflows/check-bioc.yml index 930e71a2..4350463b 100644 --- a/.github/workflows/check-bioc.yml +++ b/.github/workflows/check-bioc.yml @@ -52,9 +52,9 @@ jobs: fail-fast: false matrix: config: - - { os: ubuntu-latest, r: '4.2', bioc: '3.16', cont: "bioconductor/bioconductor_docker:RELEASE_3_16", rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest" } - #- { os: macOS-latest, r: '4.2', bioc: '3.16'} - - { os: windows-latest, r: '4.2', bioc: '3.16'} + - { os: ubuntu-latest, r: '4.4', bioc: '3.20', cont: "bioconductor/bioconductor_docker:RELEASE_3_20", rspm: "https://packagemanager.rstudio.com/cran/__linux__/jammy/latest" } + - { os: macOS-latest, r: '4.4', bioc: '3.20'} + - { os: windows-latest, r: '4.4', bioc: '3.20'} ## Check https://github.com/r-lib/actions/tree/master/examples ## for examples using the http-user-agent env: @@ -105,23 +105,23 @@ jobs: uses: actions/cache@v3 with: path: ${{ env.R_LIBS_USER }} - key: ${{ env.cache-version }}-${{ runner.os }}-biocversion-RELEASE_3_16-r-4.2-${{ hashFiles('.github/depends.Rds') }} - restore-keys: ${{ env.cache-version }}-${{ runner.os }}-biocversion-RELEASE_3_16-r-4.2- + key: ${{ env.cache-version }}-${{ runner.os }}-biocversion-RELEASE_3_20-r-4.4-${{ hashFiles('.github/depends.Rds') }} + restore-keys: ${{ env.cache-version }}-${{ runner.os }}-biocversion-RELEASE_3_20-r-4.4- - name: Cache R packages on Linux if: "!contains(github.event.head_commit.message, '/nocache') && runner.os == 'Linux' " uses: actions/cache@v3 with: path: /home/runner/work/_temp/Library - key: ${{ env.cache-version }}-${{ runner.os }}-biocversion-RELEASE_3_16-r-4.2-${{ hashFiles('.github/depends.Rds') }} - restore-keys: ${{ env.cache-version }}-${{ runner.os }}-biocversion-RELEASE_3_16-r-4.2- + key: ${{ env.cache-version }}-${{ runner.os }}-biocversion-RELEASE_3_20-r-4.4-${{ hashFiles('.github/depends.Rds') }} + restore-keys: ${{ env.cache-version }}-${{ runner.os }}-biocversion-RELEASE_3_20-r-4.4- - - name: Install Linux system dependencies - if: runner.os == 'Linux' - run: | - sysreqs=$(Rscript -e 'cat("apt-get update -y && apt-get install -y", paste(gsub("apt-get install -y ", "", remotes::system_requirements("ubuntu", "20.04")), collapse = " "))') - echo $sysreqs - sudo -s eval "$sysreqs" + # - name: Install Linux system dependencies + # if: runner.os == 'Linux' + # run: | + # sysreqs=$(Rscript -e 'cat("apt-get update -y && apt-get install -y", paste(gsub("apt-get install -y ", "", remotes::system_requirements("ubuntu", "20.04")), collapse = " "))') + # echo $sysreqs + # sudo -s eval "$sysreqs" - name: Install macOS system dependencies if: matrix.config.os == 'macOS-latest' @@ -143,6 +143,17 @@ jobs: ## Required for tcltk brew install xquartz --cask + ## Latest curl + brew install curl pkg-config + + - name: Install macOS curl from source + if: matrix.config.os == 'macOS-latest' + run: | + message(paste('****', Sys.time(), 'installing curl from source ****')) + Sys.setenv(PKG_CONFIG_PATH="/opt/homebrew/opt/curl/lib/pkgconfig:/usr/local/opt/curl/lib/pkgconfig") + install.packages("curl", type = "source") + shell: Rscript {0} + - name: Install Windows system dependencies if: runner.os == 'Windows' run: | @@ -169,35 +180,19 @@ jobs: ## https://github.com/r-lib/remotes/issues/296 ## Ideally, all dependencies should get installed in the first pass. - ## Set the repos source depending on the OS - ## Alternatively use https://storage.googleapis.com/bioconductor_docker/packages/ - ## though based on https://bit.ly/bioc2021-package-binaries - ## the Azure link will be the main one going forward. - gha_repos <- if( - .Platform$OS.type == "unix" && Sys.info()["sysname"] != "Darwin" - ) c( - "AnVIL" = "https://bioconductordocker.blob.core.windows.net/packages/3.16/bioc", - BiocManager::repositories() - ) else BiocManager::repositories() - ## For running the checks message(paste('****', Sys.time(), 'installing rcmdcheck and BiocCheck ****')) - install.packages(c("rcmdcheck", "BiocCheck"), repos = gha_repos) + install.packages(c("rcmdcheck", "BiocCheck"), repos = BiocManager::repositories()) ## Pass #1 at installing dependencies - ## This pass uses AnVIL-powered fast binaries - ## details at https://github.com/nturaga/bioc2021-bioconductor-binaries - ## The speed gains only apply to the docker builds. message(paste('****', Sys.time(), 'pass number 1 at installing dependencies: local dependencies ****')) - remotes::install_local(dependencies = TRUE, repos = gha_repos, build_vignettes = FALSE, upgrade = TRUE) + remotes::install_local(dependencies = TRUE, repos = BiocManager::repositories(), build_vignettes = FALSE, upgrade = TRUE) continue-on-error: true shell: Rscript {0} - name: Install dependencies pass 2 run: | ## Pass #2 at installing dependencies - ## This pass does not use AnVIL and will thus update any packages - ## that have seen been updated in Bioconductor message(paste('****', Sys.time(), 'pass number 2 at installing dependencies: any remaining dependencies ****')) remotes::install_local(dependencies = TRUE, repos = BiocManager::repositories(), build_vignettes = TRUE, upgrade = TRUE, force = TRUE) shell: Rscript {0} @@ -210,13 +205,13 @@ jobs: shell: Rscript {0} - name: Install covr - if: github.ref == 'refs/heads/master' && env.run_covr == 'true' && runner.os == 'Linux' + if: github.ref == 'refs/heads/devel' && env.run_covr == 'true' && runner.os == 'Linux' run: | remotes::install_cran("covr") shell: Rscript {0} - name: Install pkgdown - if: github.ref == 'refs/heads/master' && env.run_pkgdown == 'true' && runner.os == 'Linux' + if: github.ref == 'refs/heads/devel' && env.run_pkgdown == 'true' && runner.os == 'Linux' run: | remotes::install_cran("pkgdown") shell: Rscript {0} @@ -237,7 +232,7 @@ jobs: rcmdcheck::rcmdcheck( args = c("--no-manual", "--no-vignettes", "--timings"), build_args = c("--no-manual", "--keep-empty-dirs", "--no-resave-data"), - error_on = "error", + error_on = "warning", check_dir = "check" ) shell: Rscript {0} @@ -267,17 +262,17 @@ jobs: shell: Rscript {0} - name: Test coverage - if: github.ref == 'refs/heads/master' && env.run_covr == 'true' && runner.os == 'Linux' + if: github.ref == 'refs/heads/devel' && env.run_covr == 'true' && runner.os == 'Linux' run: | - covr::codecov() + covr::codecov(coverage = covr::package_coverage(type = "all")) shell: Rscript {0} - name: Install package - if: github.ref == 'refs/heads/master' && env.run_pkgdown == 'true' && runner.os == 'Linux' + if: github.ref == 'refs/heads/devel' && env.run_pkgdown == 'true' && runner.os == 'Linux' run: R CMD INSTALL . - name: Build pkgdown site - if: github.ref == 'refs/heads/master' && env.run_pkgdown == 'true' && runner.os == 'Linux' + if: github.ref == 'refs/heads/devel' && env.run_pkgdown == 'true' && runner.os == 'Linux' run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) shell: Rscript {0} ## Note that you need to run pkgdown::deploy_to_branch(new_process = FALSE) @@ -286,12 +281,12 @@ jobs: ## makes the git history recognizable by pkgdown. - name: Install deploy dependencies - if: github.ref == 'refs/heads/master' && env.run_pkgdown == 'true' && runner.os == 'Linux' + if: github.ref == 'refs/heads/devel' && env.run_pkgdown == 'true' && runner.os == 'Linux' run: | apt-get update && apt-get -y install rsync - name: Deploy pkgdown site to GitHub pages 🚀 - if: github.ref == 'refs/heads/master' && env.run_pkgdown == 'true' && runner.os == 'Linux' + if: github.ref == 'refs/heads/devel' && env.run_pkgdown == 'true' && runner.os == 'Linux' uses: JamesIves/github-pages-deploy-action@releases/v4 with: clean: false @@ -302,21 +297,54 @@ jobs: if: failure() uses: actions/upload-artifact@master with: - name: ${{ runner.os }}-biocversion-RELEASE_3_16-r-4.2-results + name: ${{ runner.os }}-biocversion-RELEASE_3_20-r-4.4-results path: check - ## Note that DOCKER_PASSWORD is really a token for your dockerhub - ## account, not your actual dockerhub account password. - ## This comes from - ## https://seandavi.github.io/BuildABiocWorkshop/articles/HOWTO_BUILD_WORKSHOP.html#6-add-secrets-to-github-repo - ## Check https://github.com/docker/build-push-action/tree/releases/v1 - ## for more details. - - uses: docker/build-push-action@v1 - if: "!contains(github.event.head_commit.message, '/nodocker') && env.run_docker == 'true' && runner.os == 'Linux' " + + ## Code adapted from + ## https://github.com/waldronlab/cBioPortalData/blob/e0440a4445f0cc731e426363a76faa22ee5e0f9d/.github/workflows/devel_check_dock.yml#L65-L92 + docker-build-and-push: + runs-on: ubuntu-latest + needs: build-check + steps: + - name: Checkout Repository + if: "!contains(github.event.head_commit.message, '/nodocker') && env.run_docker == 'true' && github.ref == 'refs/heads/devel'" + uses: actions/checkout@v3 + + - name: Register repo name + if: "!contains(github.event.head_commit.message, '/nodocker') && env.run_docker == 'true' && github.ref == 'refs/heads/devel'" + id: reg_repo_name + run: | + echo CONT_IMG_NAME=$(echo ${{ github.event.repository.name }} | tr '[:upper:]' '[:lower:]') >> $GITHUB_ENV + + - name: Set up QEMU + if: "!contains(github.event.head_commit.message, '/nodocker') && env.run_docker == 'true' && github.ref == 'refs/heads/devel'" + uses: docker/setup-qemu-action@v2 + + - name: Set up Docker Buildx + if: "!contains(github.event.head_commit.message, '/nodocker') && env.run_docker == 'true' && github.ref == 'refs/heads/devel'" + uses: docker/setup-buildx-action@v2 + + - name: Login to Docker Hub + if: "!contains(github.event.head_commit.message, '/nodocker') && env.run_docker == 'true' && github.ref == 'refs/heads/devel'" + uses: docker/login-action@v2 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + ## Note that DOCKERHUB_TOKEN is really a token for your dockerhub + ## account, not your actual dockerhub account password. You can get it + ## from https://hub.docker.com/settings/security. + ## Check https://github.com/docker/build-push-action/tree/v4.0.0 + ## for more details. + ## Alternatively, try checking + ## https://seandavi.github.io/BuildABiocWorkshop/articles/HOWTO_BUILD_WORKSHOP.html. + + - name: Build and Push Docker + if: "!contains(github.event.head_commit.message, '/nodocker') && env.run_docker == 'true' && github.ref == 'refs/heads/devel' && success()" + uses: docker/build-push-action@v4 with: - username: ${{ secrets.DOCKER_USERNAME }} - password: ${{ secrets.DOCKER_PASSWORD }} - repository: lieberinstitute/spatiallibd - tag_with_ref: true - tag_with_sha: true - tags: latest + context: . + push: true + tags: > + ${{ secrets.DOCKERHUB_USERNAME }}/${{ env.CONT_IMG_NAME }}:latest, + ${{ secrets.DOCKERHUB_USERNAME }}/${{ env.CONT_IMG_NAME }}:devel diff --git a/.gitignore b/.gitignore index c70047e0..2064dd51 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,5 @@ data-raw/spatialLIBD_files docs/ rsconnect *.Rproj +/doc/ +/Meta/ diff --git a/DESCRIPTION b/DESCRIPTION index 67777959..d527ed43 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: spatialLIBD Title: spatialLIBD: an R/Bioconductor package to visualize spatially-resolved transcriptomics data -Version: 1.11.8 -Date: 2023-02-22 +Version: 1.19.3 +Date: 2024-12-12 Authors@R: c( person("Leonardo", "Collado-Torres", role = c("aut", "cre"), @@ -55,7 +55,6 @@ Imports: scater, DT, ExperimentHub, - RColorBrewer, SummarizedExperiment, stats, graphics, @@ -76,8 +75,13 @@ Imports: scuttle, edgeR, limma, - statmod -RoxygenNote: 7.2.3 + statmod, + MatrixGenerics, + rlang, + dplyr, + ComplexHeatmap, + circlize +RoxygenNote: 7.3.2 Roxygen: list(markdown = TRUE) URL: https://github.com/LieberInstitute/spatialLIBD BugReports: https://support.bioconductor.org/tag/spatialLIBD @@ -90,7 +94,9 @@ Suggests: covr, here, BiocManager, - lobstr + lobstr, + DropletUtils, + RColorBrewer VignetteBuilder: knitr biocViews: Homo_sapiens_Data, ExperimentHub, SequencingData, SingleCellData, ExpressionData, Tissue, PackageTypeData, SpatialData diff --git a/NAMESPACE b/NAMESPACE index d60670e7..06db229f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,6 +3,7 @@ export(add10xVisiumAnalysis) export(add_images) export(add_key) +export(add_qc_metrics) export(annotate_registered_clusters) export(check_modeling_results) export(check_sce) @@ -46,6 +47,7 @@ export(vis_gene_p) export(vis_grid_clus) export(vis_grid_gene) import(ExperimentHub) +import(MatrixGenerics) import(SingleCellExperiment) import(ggplot2) import(grid) @@ -54,12 +56,15 @@ import(plotly, except = last_plot) import(shiny) importFrom(AnnotationHub,query) importFrom(BiocGenerics,which) +importFrom(ComplexHeatmap,Heatmap) +importFrom(ComplexHeatmap,anno_barplot) +importFrom(ComplexHeatmap,columnAnnotation) +importFrom(ComplexHeatmap,rowAnnotation) importFrom(DT,DTOutput) importFrom(DT,renderDT) importFrom(GenomicRanges,seqnames) importFrom(IRanges,CharacterList) importFrom(IRanges,IntegerList) -importFrom(RColorBrewer,brewer.pal) importFrom(S4Vectors,"mcols<-") importFrom(S4Vectors,DataFrame) importFrom(S4Vectors,mcols) @@ -82,14 +87,20 @@ importFrom(SummarizedExperiment,"rowRanges<-") importFrom(SummarizedExperiment,assay) importFrom(SummarizedExperiment,assayNames) importFrom(SummarizedExperiment,assays) +importFrom(SummarizedExperiment,colData) importFrom(benchmarkme,get_ram) +importFrom(circlize,colorRamp2) importFrom(cowplot,plot_grid) +importFrom(dplyr,group_by) +importFrom(dplyr,left_join) +importFrom(dplyr,mutate) +importFrom(dplyr,select) +importFrom(dplyr,summarize) importFrom(edgeR,calcNormFactors) importFrom(edgeR,filterByExpr) importFrom(fields,image.plot) importFrom(golem,with_golem_options) importFrom(grDevices,as.raster) -importFrom(grDevices,colorRampPalette) importFrom(grDevices,dev.off) importFrom(grDevices,pdf) importFrom(graphics,abline) @@ -121,9 +132,11 @@ importFrom(magick,image_transparent) importFrom(methods,is) importFrom(methods,new) importFrom(png,readPNG) +importFrom(rlang,arg_match) importFrom(rtracklayer,import) importFrom(scater,plotReducedDim) importFrom(scuttle,aggregateAcrossCells) +importFrom(scuttle,isOutlier) importFrom(sessioninfo,session_info) importFrom(shiny,shinyApp) importFrom(shinyWidgets,pickerInput) @@ -137,6 +150,7 @@ importFrom(stats,hclust) importFrom(stats,median) importFrom(stats,model.matrix) importFrom(stats,p.adjust) +importFrom(stats,prcomp) importFrom(stats,reshape) importFrom(stats,setNames) importFrom(tibble,tibble) diff --git a/NEWS.md b/NEWS.md index e43f212a..1dc4a516 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,178 @@ +# spatialLIBD 1.19.3 + +BUG FIXES + +* Resolved which made +`add_key()` too strict and would create issues with `export_cluster()`. Reported +by @lahuuki and @manishabarse. + +# spatialLIBD 1.19.2 + +BUG FIXES + +* Merged by @lahuuki. +This fixes https://github.com/LieberInstitute/spatialLIBD/issues/72 and +https://github.com/LieberInstitute/spatialLIBD/issues/48 by making +`registration_pseudobulk()` more robust. The original issues were reported by +@boyiguo1 and @berniejmulvey. + +# spatialLIBD 1.19.1 + +NEW FEATURES + +* Merged by @lahuuki. +This pull request fully re-implemented `layer_stat_cor_plot()` with a version +that uses `ComplexHeatmap::Heatmap()` internally. It also adds support for +incorporating the automatic annotation results from +`annotate_registered_clusters()`. NOTE that the `max` argument was renamed to +`color_max`, as well as `min` to `color_min`. Also, the default for `min` used +to be `-max` and now for `color_min` the default is the `min()` correlation +observed. The default for `max` was 0.81 and the default for `color_max()` is +the `max()` observed correlation. +* `run_app()` was also updated to match the updated in `layer_stat_cor_plot()` +and now has 2 new inputs for controlling the annotation process with +`annotate_registered_clusters()`. It also allows downloading a CSV file with +the annotation results. + + +# spatialLIBD 1.17.10 + +BUG FIXES + +* `registration_wrapper()` now automatically handles the scenario where `k = 2` +by not using `registration_stats_anova()` and providing an apporpriate warning. +Implemented by @lahuuki at +. + +# spatialLIBD 1.17.9 + +BUG FIXES + +* `read10xVisiumWrapper()` is now able to detect the GTF file used by +`SpaceRanger` for version 3.0.0+. Implemented by @nick-eagles at +. + + +# spatialLIBD 1.17.6 + +BUG FIXES + +* Fixed the bug reported by @lahuuki about `vis_grid_clus()` not handling +`logical()` cluster variables. +See . To resolve this, +`sort_clusters()` and `get_colors()` had to change internally. Examples and +documentation for both functions have now been updated to showcase what happens +when you provide a `logical()` vector as an input. + +# spatialLIBD 1.17.5 + +NEW FEATURES + +* Added `add_qc_metrics()` inspired by + +which adds seven new columns to the `colData(spe)` that can be useful when +performing quality control of the data. Developed by @lahuuki. + +# spatialLIBD 1.17.3 + +NEW FEATURES + +* Added support for `SpatialExperiment` objects created with +`visiumStitched::build_spe()` + that +stitch together multiple Visium capture areas. Developed by @Nick-Eagles. + +# spatialLIBD 1.15.2 + +SIGNIFICANT USER-VISIBLE CHANGES + +* `vis_gene()` now has a `multi_gene_method` argument which provides 3 methods +for combining multiple continuous variables: `z_score`, `pca`, and `sparsity`. +These options can now be used with `run_app()` (the interactive websites). These +methods are further illustrated and documented in a new vignette available at +. This +work was contributed by @Nick-Eagles. + +# spatialLIBD 1.13.6 + +NEW FEATURES + +* `vis_clus_p()`, `vis_clus()`, and `vis_grid_clus()` now all use implement the +`na_color` argument that was present in the `vis_gene()` functions. This +resolves https://github.com/LieberInstitute/spatialLIBD/issues/43 by @boyiguo1. + +# spatialLIBD 1.13.5 + +NEW FEATURES + +* `run_app()` now has a `auto_crop_default` argument set to `TRUE` by default. +It can be turned off in cases where you are displaying images that do not +follow the expected Visium grid dimensions, such as manually stitched images +that you don't want to automatically crop. + +# spatialLIBD 1.13.4 + +NEW FEATURES + +* Added `fetch_data("spatialDLPFC_Visium_example_subset")` which is a subset +of 3 samples with only the `lowres` images that can be used for example / +tutorial purposes. + +# spatialLIBD 1.13.2 + +NEW FEATURES + +* Louise A. Huuki-Myers @lahuuki added a vignette explaining the spatial +registration process and all related functions. See + for the full pull +request. + +# spatialLIBD 1.11.13 + +SIGNIFICANT USER-VISIBLE CHANGES + +* The vignette now has a section describing the data from the `spatialDLFPC`, +`Visium_SPG_AD`, and `locus-c` projects that were done by members of the +Keri Martinowich, Kristen R. Maynard, and Leonardo Collado-Torres LIBD teams as +well as our collaborators. + +# spatialLIBD 1.11.12 + +SIGNIFICANT USER-VISIBLE CHANGES + +* `fetch_data("Visium_SPG_AD_Visium_wholegenome_spe"")`, +`fetch_data("Visium_SPG_AD_Visium_targeted_spe")`, +`fetch_data("Visium_SPG_AD_Visium_wholegenome_pseudobulk_spe")`, and + `fetch_data("Visium_SPG_AD_Visium_wholegenome_modeling_results")` have been + added. Use this to access data from the + project. + +# spatialLIBD 1.11.11 + +SIGNIFICANT USER-VISIBLE CHANGES + +* `fetch_data("spatialDLPFC_snRNAseq")` now works if you want to download +the snRNA-seq data used in . + +# spatialLIBD 1.11.10 + +BUG FIXES + +* `read10xVisiumAnalysis()` now supports `spaceranger` version 2023.0208.0 +(internal 10x Genomics version) output files that store analysis CSVs under the +`outs/analysis_csv` directory instead of `outs/analysis` and also use the +`gene_expression_` prefix for each of the analysis directories. This was +tested with @heenadivecha on files from +. + +# spatialLIBD 1.11.9 + +SIGNIFICANT USER-VISIBLE CHANGES + +* `gene_set_enrichment()` now internally uses +`fisher.test(alternative = "greater")` to test for odds ratios greater than 1. +Otherwise odds ratios of 0 could be significant. + # spatialLIBD 1.11.4 SIGNIFICANT USER-VISIBLE CHANGES diff --git a/R/add10xVisiumAnalysis.R b/R/add10xVisiumAnalysis.R index dbe49f86..de4f5fee 100644 --- a/R/add10xVisiumAnalysis.R +++ b/R/add10xVisiumAnalysis.R @@ -29,9 +29,8 @@ #' #' ## Note that ?SpatialExperiment::read10xVisium doesn't include all the files #' ## we need to illustrate read10xVisiumWrapper(). -add10xVisiumAnalysis <- function( - spe, - visium_analysis) { +add10xVisiumAnalysis <- function(spe, + visium_analysis) { col_info <- colData(spe) barcode_present <- "barcode" %in% colnames(col_info) if (!barcode_present) { diff --git a/R/add_images.R b/R/add_images.R index e7b09e5c..bfdddbe5 100644 --- a/R/add_images.R +++ b/R/add_images.R @@ -43,13 +43,12 @@ #' )) #' } add_images <- - function( - spe, - image_dir, - image_pattern, - image_id_current = "lowres", - image_id = image_pattern, - image_paths = locate_images(spe, image_dir, image_pattern)) { + function(spe, + image_dir, + image_pattern, + image_id_current = "lowres", + image_id = image_pattern, + image_paths = locate_images(spe, image_dir, image_pattern)) { stopifnot(length(names(image_paths)) > 0) stopifnot(all(names(image_paths) %in% unique(spe$sample_id))) stopifnot(!any(duplicated(names(image_paths)))) diff --git a/R/add_key.R b/R/add_key.R index 648ecc30..8dab3a32 100644 --- a/R/add_key.R +++ b/R/add_key.R @@ -21,13 +21,20 @@ #' head(spe$key) #' #' ## We can clean it +#' spe$key_original <- spe$key #' spe$key <- NULL #' #' ## and then add it back -#' head(add_key(spe)$key) +#' spe <- add_key(spe) +#' head(spe$key) #' #' ## Note that the original 'key' order was 'sample_id'_'barcode' and we' #' ## have since changed it to 'barcode'_'sample_id'. +#' +#' ## Below we restore the original 'key' +#' spe$key <- spe$key_original +#' spe$key_original <- NULL +#' head(spe$key) #' } add_key <- function(spe, overwrite = TRUE) { if ("key" %in% colnames(colData(spe))) { @@ -35,14 +42,17 @@ add_key <- function(spe, overwrite = TRUE) { message( "Overwriting 'spe$key'. Set 'overwrite = FALSE' if you do not want to overwrite it." ) - } else { - stop( - "'spe$key' already exists. Set 'overwrite = TRUE' if you want to replace it.", + spe$key <- paste0(colnames(spe), "_", spe$sample_id) + stopifnot(!any(duplicated(spe$key))) + } else if (any(duplicated(spe$key))) { + warning( + "'spe$key' already exists and is not unique. Set 'overwrite = TRUE' to replace 'spe$key' with unique values.", call. = FALSE ) } + } else { + spe$key <- paste0(colnames(spe), "_", spe$sample_id) + stopifnot(!any(duplicated(spe$key))) } - spe$key <- paste0(colnames(spe), "_", spe$sample_id) - stopifnot(!any(duplicated(spe$key))) return(spe) } diff --git a/R/add_qc_metrics.R b/R/add_qc_metrics.R new file mode 100644 index 00000000..a5c7ca32 --- /dev/null +++ b/R/add_qc_metrics.R @@ -0,0 +1,204 @@ +#' Quality Control for Spatial Data +#' +#' This function identify spots in a +#' [SpatialExperiment-class][SpatialExperiment::SpatialExperiment-class] (SPE) +#' with outlier quality control values: low `sum_umi` or `sum_gene`, or high +#' `expr_chrM_ratio`, utilizing [scuttle::isOutlier][scuttle::isOutlier]. Also identifies in-tissue +#' edge spots and distance to the edge for each spot. +#' +#' The initial version of this function lives at +#' . +#' +#' @param spe a [SpatialExperiment][SpatialExperiment::SpatialExperiment-class] +#' object that has `sum_umi`, `sum_gene`, `expr_chrM_ratio`, and `in_tissue` +#' variables in the `colData(spe)`. Note that these are automatically created +#' when you build your `spe` object with `spatialLIBD::read10xVisiumWrapper()`. +#' @param overwrite a `logical(1)` specifying whether to overwrite the 7 +#' `colData(spe)` columns that this function creates. If set to `FALSE` and any +#' of them are present, the function will return an error. +#' +#' @return A [SpatialExperiment][SpatialExperiment::SpatialExperiment-class] +#' with added quality control information added to the `colData()`. +#' \describe{ +#' \item{`scran_low_lib_size`}{shows spots that have a low library size.} +#' \item{`scran_low_n_features`}{spots with a low number of expressed genes.} +#' \item{`scran_high_Mito_percent`}{spots with a high percent of mitochondrial gene expression.} +#' \item{`scran_discard`}{spots belonging to either `scran_low_lib_size`, +#' `scran_low_n_feature`, or `scran_high_Mito_percent`.} +#' \item{`edge_spot`}{spots that are automatically detected as the edge spots +#' of the `in_tissue` section.} +#' \item{`edge_distance`}{closest distance in number of spots to either the +#' vertical or horizontal edge.} +#' \item{`scran_low_lib_size_edge`}{spots that have a low library size and +#' are an edge spot.} +#' } +#' +#' @export +#' @importFrom dplyr group_by summarize left_join select mutate +#' @importFrom SummarizedExperiment colData +#' @importFrom scuttle isOutlier +#' @author Louise A. Huuki-Myers +#' +#' @examples +#' ## Obtain the necessary data +#' spe_pre_qc <- fetch_data("spatialDLPFC_Visium_example_subset") +#' +#' ## For now, we fake out tissue spots in example data +#' spe_qc <- spe_pre_qc +#' spe_qc$in_tissue[spe_qc$array_col < 10] <- FALSE +#' +#' ## adds QC metrics to colData of the spe +#' spe_qc <- add_qc_metrics(spe_qc, overwrite = TRUE) +#' vars <- colnames(colData(spe_qc)) +#' vars[grep("^(scran|edge)", vars)] +#' +#' ## visualize edge spots +#' vis_clus(spe_qc, sampleid = "Br6432_ant", clustervar = "edge_spot") +#' +#' ## specify your own colors +#' vis_clus( +#' spe_qc, +#' sampleid = "Br6432_ant", +#' clustervar = "edge_spot", +#' colors = c( +#' "TRUE" = "lightgreen", +#' "FALSE" = "pink", +#' "NA" = "red" +#' ) +#' ) +#' vis_gene(spe_qc, sampleid = "Br6432_ant", geneid = "edge_distance", minCount = -1) +#' +#' ## Visualize scran QC flags +#' +#' ## Check the spots with low library size as detected by scran::isOutlier() +#' vis_clus(spe_qc, sample_id = "Br6432_ant", clustervar = "scran_low_lib_size") +#' +#' ## Violin plot of library size with low library size highlighted in a +#' ## different color. +#' scater::plotColData(spe_qc[, spe_qc$in_tissue], x = "sample_id", y = "sum_umi", colour_by = "scran_low_lib_size") +#' +#' ## Check any spots that scran::isOutlier() flagged +#' vis_clus(spe_qc, sampleid = "Br6432_ant", clustervar = "scran_discard") +#' +#' ## Low library spots that are on the edge of the tissue +#' vis_clus(spe_qc, sampleid = "Br6432_ant", clustervar = "scran_low_lib_size_edge") +#' +#' ## Use `low_library_size` (or other variables) and `edge_distance` as you +#' ## please. +#' spe_qc$our_low_lib_edge <- spe_qc$scran_low_lib_size & spe_qc$edge_distance < 5 +#' +#' vis_clus(spe_qc, sample_id = "Br6432_ant", clustervar = "our_low_lib_edge") +#' +#' ## Clean up +#' rm(spe_qc, spe_pre_qc, vars) +#' +add_qc_metrics <- function(spe, overwrite = FALSE) { + stopifnot("in_tissue" %in% colnames(colData(spe))) + stopifnot("sum_umi" %in% colnames(colData(spe))) + stopifnot("sum_gene" %in% colnames(colData(spe))) + stopifnot("expr_chrM_ratio" %in% colnames(colData(spe))) + + + if (!overwrite) { + new_vars <- c( + "scran_discard", + "scran_low_lib_size", + "scran_low_n_features", + "scran_high_Mito_percent", + "edge_spot", + "edge_distance", + "scran_low_lib_size_edge" + ) + present <- new_vars %in% colnames(colData(spe)) + if (any(present)) { + stop( + paste(new_vars[present], collapse = ", "), + " are all present in the colData(spe). If you want to overwrite them use add_qc_metric(overwrite = TRUE).", + call. = FALSE + ) + } + } + + spe$in_tissue <- as.logical(spe$in_tissue) + spe_in <- spe[, spe$in_tissue] + + ## QC in-tissue spots + + # define variables + low_lib_size <- low_n_features <- in_tissue <- sample_id <- NULL + + qc_df <- data.frame( + log2sum = log2(spe_in$sum_umi), + log2detected = log2(spe_in$sum_gene), + subsets_Mito_percent = spe_in$expr_chrM_ratio * 100, + sample_id = spe_in$sample_id + ) + + qcfilter <- data.frame( + low_lib_size = scater::isOutlier(qc_df$log2sum, type = "lower", log = TRUE, batch = qc_df$sample_id), + low_n_features = scater::isOutlier(qc_df$log2detected, type = "lower", log = TRUE, batch = qc_df$sample_id), + high_subsets_Mito_percent = scater::isOutlier(qc_df$subsets_Mito_percent, type = "higher", batch = qc_df$sample_id) + ) |> + dplyr::mutate(discard = (low_lib_size | low_n_features) | high_subsets_Mito_percent) + + ## Add qcfilter cols to colData(spe) after factoring + ## discard + spe$scran_discard <- NA + spe$scran_discard[which(spe$in_tissue)] <- qcfilter$discard + + ## low_lib_size + spe$scran_low_lib_size <- NA + spe$scran_low_lib_size[which(spe$in_tissue)] <- qcfilter$low_lib_size + + ## low_n_features + spe$scran_low_n_features <- NA + spe$scran_low_n_features[which(spe$in_tissue)] <- qcfilter$low_n_features + + ## high mito percent + spe$scran_high_Mito_percent <- NA + spe$scran_high_Mito_percent[which(spe$in_tissue)] <- + qcfilter$high_subsets_Mito_percent + + ## Find edge spots + # define variables + array_row <- array_col <- edge_row <- edge_col <- row_distance <- NULL + col_distance <- high_subsets_Mito_percent <- NULL + + spot_coords <- colData(spe_in) |> + as.data.frame() |> + select(in_tissue, sample_id, array_row, array_col) |> + group_by(sample_id, array_row) |> + mutate( + edge_col = array_col == min(array_col) | array_col == max(array_col), + col_distance = pmin( + abs(array_col - min(array_col)), + abs(array_col - max(array_col)) + ) + ) |> + group_by(sample_id, array_col) |> + mutate( + edge_row = array_row == min(array_row) | array_row == max(array_row), + row_distance = pmin( + abs(array_row - min(array_row)), + abs(array_row - max(array_row)) + ) + ) |> + group_by(sample_id) |> + mutate( + edge_spot = edge_row | edge_col, + edge_distance = pmin(row_distance, col_distance) + ) + + + ## Add Edge info to spe + spe$edge_spot <- NA + spe$edge_spot[which(spe$in_tissue)] <- spot_coords$edge_spot + + spe$edge_distance <- NA + spe$edge_distance[which(spe$in_tissue)] <- spot_coords$edge_distance + + spe$scran_low_lib_size_edge <- NA + spe$scran_low_lib_size_edge[which(spe$in_tissue)] <- qcfilter$low_lib_size & spot_coords$edge_spot + + return(spe) +} diff --git a/R/annotate_registered_clusters.R b/R/annotate_registered_clusters.R index 13e1d5ab..505ea600 100644 --- a/R/annotate_registered_clusters.R +++ b/R/annotate_registered_clusters.R @@ -48,10 +48,9 @@ #' ## More relaxed merging threshold #' annotate_registered_clusters(cor_stats_layer, cutoff_merge_ratio = 1) annotate_registered_clusters <- - function( - cor_stats_layer, - confidence_threshold = 0.25, - cutoff_merge_ratio = 0.25) { + function(cor_stats_layer, + confidence_threshold = 0.25, + cutoff_merge_ratio = 0.25) { annotated <- apply(cor_stats_layer, 1, @@ -59,13 +58,6 @@ annotate_registered_clusters <- cutoff_merge_ratio = cutoff_merge_ratio ) - if (all(colnames(cor_stats_layer) %in% c("WM", paste0("Layer", seq_len(6))))) { - ## Simplify names when working with the default data - annotated <- gsub("ayer", "", annotated) - annotated <- gsub("\\/L", "\\/", annotated) - annotated <- gsub("^WM\\/", "WM\\/L", annotated) - } - confidence <- apply(cor_stats_layer, 1, max) > confidence_threshold result <- data.frame( @@ -83,15 +75,24 @@ annotate_registered_clusters <- result$layer_label, ifelse(result$layer_confidence == "good", "", "*") ) + + ## Add simplified label for WM/Layer annotations + if (all(colnames(cor_stats_layer) %in% c("WM", paste0("Layer", seq_len(6))))) { + result$layer_label_simple <- result$layer_label + ## Simplify names when working with the default data + result$layer_label_simple <- gsub("ayer", "", result$layer_label_simple) + result$layer_label_simple <- gsub("\\/L", "\\/", result$layer_label_simple) + result$layer_label_simple <- gsub("^WM\\/", "WM\\/L", result$layer_label_simple) + } + return(result) } annotate_registered_cluster <- - function( - remaining, - label = "", - current = NULL, - cutoff_merge_ratio = 0.25) { + function(remaining, + label = "", + current = NULL, + cutoff_merge_ratio = 0.25) { ## Filter negative correlations remaining <- remaining[remaining > 0] diff --git a/R/app_server.R b/R/app_server.R index 865e1cb2..425e7e0b 100644 --- a/R/app_server.R +++ b/R/app_server.R @@ -25,12 +25,13 @@ app_server <- function(input, output, session) { modeling_results <- golem::get_golem_options("modeling_results") sig_genes <- golem::get_golem_options("sig_genes") default_cluster <- golem::get_golem_options("default_cluster") + is_stitched <- golem::get_golem_options("is_stitched") # List the first level callModules here ## Global variables needed throughout the app - rv <- reactiveValues(ManualAnnotation = rep("NA", ncol(spe))) + rv <- reactiveValues(ManualAnnotation = rep("NA", ncol(spe)), ContCount = data.frame(key = spe$key, COUNT = NA)) ## From /dcs04/lieber/lcolladotor/with10x_LIBD001/HumanPilot/Analysis/rda_scran/clust_10x_layer_maynard_martinowich.Rdata # cat(paste0("'", names(cols_layers_martinowich), "' = '", cols_layers_martinowich, "',\n")) @@ -171,6 +172,7 @@ app_server <- function(input, output, session) { alpha = input$alphalevel, point_size = input$pointsize, auto_crop = input$auto_crop, + is_stitched = is_stitched, ... = paste(" with", input$cluster) ) if (!input$side_by_side_histology) { @@ -211,6 +213,7 @@ app_server <- function(input, output, session) { sample_order = isolate(input$grid_samples), point_size = isolate(input$pointsize), auto_crop = isolate(input$auto_crop), + is_stitched = is_stitched, ... = paste(" with", isolate(input$cluster)) ) cowplot::plot_grid( @@ -221,60 +224,86 @@ app_server <- function(input, output, session) { }) static_gene <- reactive({ - p <- vis_gene( - spe, - sampleid = input$sample, - geneid = input$geneid, - assayname = input$assayname, - minCount = input$minCount, - cont_colors = cont_colors(), - image_id = input$imageid, - alpha = input$alphalevel, - point_size = input$pointsize, - auto_crop = input$auto_crop + gene_warning <- NULL + withCallingHandlers( + { + p <- vis_gene( + spe, + sampleid = input$sample, + geneid = input$geneid, + multi_gene_method = input$multi_gene_method, + assayname = input$assayname, + minCount = input$minCount, + cont_colors = cont_colors(), + image_id = input$imageid, + alpha = input$alphalevel, + point_size = input$pointsize, + auto_crop = input$auto_crop, + is_stitched = is_stitched + ) + if (!input$side_by_side_gene) { + p_result <- p + } else { + p_no_spots <- p + p_no_spots$layers[[2]] <- NULL + + p_no_spatial <- p + p_no_spatial$layers[[1]] <- NULL + p_result <- cowplot::plot_grid( + plotlist = list( + p_no_spots, + p_no_spatial + ggplot2::theme(legend.position = "none") + ), + nrow = 1, + ncol = 2 + ) + } + }, + warning = function(w) { + gene_warning <<- conditionMessage(w) + invokeRestart("muffleWarning") + } ) - if (!input$side_by_side_gene) { - return(p) - } else { - p_no_spots <- p - p_no_spots$layers[[2]] <- NULL - - p_no_spatial <- p - p_no_spatial$layers[[1]] <- NULL - cowplot::plot_grid( - plotlist = list( - p_no_spots, - p_no_spatial + ggplot2::theme(legend.position = "none") - ), - nrow = 1, - ncol = 2 - ) - } + return(list(p = p_result, gene_warning = gene_warning)) }) static_gene_grid <- reactive({ input$gene_grid_update - plots <- - vis_grid_gene( - spe, - geneid = isolate(input$geneid), - assayname = isolate(input$assayname), - minCount = isolate(input$minCount), - return_plots = TRUE, - spatial = isolate(input$grid_spatial_gene), - cont_colors = isolate(cont_colors()), - image_id = isolate(input$imageid), - alpha = isolate(input$alphalevel), - point_size = isolate(input$pointsize), - sample_order = isolate(input$gene_grid_samples), - auto_crop = isolate(input$auto_crop) - ) - cowplot::plot_grid( + gene_grid_warnings <- NULL + withCallingHandlers( + { + plots <- + vis_grid_gene( + spe, + geneid = isolate(input$geneid), + multi_gene_method = input$multi_gene_method, + assayname = isolate(input$assayname), + minCount = isolate(input$minCount), + return_plots = TRUE, + spatial = isolate(input$grid_spatial_gene), + cont_colors = isolate(cont_colors()), + image_id = isolate(input$imageid), + alpha = isolate(input$alphalevel), + point_size = isolate(input$pointsize), + sample_order = isolate(input$gene_grid_samples), + auto_crop = isolate(input$auto_crop), + is_stitched = is_stitched + ) + }, + warning = function(w) { + gene_grid_warnings <<- c(gene_grid_warnings, conditionMessage(w)) + invokeRestart("muffleWarning") + } + ) + + p_result <- cowplot::plot_grid( plotlist = plots, nrow = isolate(input$gene_grid_nrow), ncol = isolate(input$gene_grid_ncol) ) + + return(list(p = p_result, gene_grid_warnings = gene_grid_warnings)) }) editImg_manipulations <- reactive({ @@ -416,7 +445,7 @@ app_server <- function(input, output, session) { "_", paste0( "spatialLIBD_static_gene_", - input$geneid, + paste0(input$geneid, collapse = "_"), "_", input$sample, "_", @@ -432,7 +461,7 @@ app_server <- function(input, output, session) { height = 8, width = 8 * ifelse(input$side_by_side_gene, 2, 1) ) - print(static_gene()) + print(static_gene()[["p"]]) dev.off() } ) @@ -444,7 +473,7 @@ app_server <- function(input, output, session) { "_", paste0( "spatialLIBD_static_gene_grid_", - input$geneid, + paste0(input$geneid, collapse = "_"), "_", paste0(input$grid_samples, collapse = "_"), "_", @@ -460,7 +489,7 @@ app_server <- function(input, output, session) { height = 8 * isolate(input$gene_grid_nrow), width = 8 * isolate(input$gene_grid_ncol) ) - print(static_gene_grid()) + print(static_gene_grid()[["p"]]) dev.off() } ) @@ -523,10 +552,9 @@ app_server <- function(input, output, session) { height = "auto" ) - output$gene <- renderPlot( { - static_gene() + static_gene()[["p"]] }, width = function() { 600 * ifelse(input$side_by_side_gene, 2, 1) @@ -534,6 +562,25 @@ app_server <- function(input, output, session) { height = 600 ) + output$gene_warnings <- renderText({ + # Since 'static_gene()' is invoked twice (once also in the assignment + # of 'output$gene'), we silence any errors that occur in this second + # invocation to not duplicate error messages + this_warning <- NULL + temp <- try( + { + this_warning <- static_gene()[["gene_warning"]] + }, + silent = TRUE + ) + + if (!is.null(this_warning)) { + paste("Warning:", this_warning) + } else { + "" + } + }) + output$gene_grid_static <- renderUI({ input$gene_grid_update @@ -547,12 +594,31 @@ app_server <- function(input, output, session) { output$gene_grid <- renderPlot( { - print(static_gene_grid()) + print(static_gene_grid()[["p"]]) }, width = "auto", height = "auto" ) + output$gene_grid_warnings <- renderText({ + # Since 'static_gene_grid()' is invoked twice (once also in the + # assignment of 'output$gene_grid'), we silence any errors that occur + # in this second invocation to not duplicate error messages + these_warnings <- NULL + temp <- try( + { + these_warnings <- static_gene_grid()[["gene_grid_warnings"]] + }, + silent = TRUE + ) + + if (!is.null(these_warnings)) { + paste("Warnings:", paste(these_warnings, collapse = "; ")) + } else { + "" + } + }) + output$editImg_plot <- renderPlot( { plot(editImg_manipulations()) @@ -605,22 +671,77 @@ app_server <- function(input, output, session) { } ## From vis_gene() in global.R + spe_sub <- spe[, spe$sample_id == sampleid] + + point_size <- input$pointsize + if (is_stitched) { + # Drop excluded spots and calculate an appropriate point size + temp <- prep_stitched_data(spe_sub, point_size, input$imageid) + spe_sub <- temp$spe + point_size <- temp$point_size + } + d <- - as.data.frame(cbind(colData(spe), SpatialExperiment::spatialCoords(spe))[spe$sample_id == sampleid, ], + as.data.frame(cbind(colData(spe_sub), SpatialExperiment::spatialCoords(spe_sub)), optional = TRUE ) - if (geneid %in% colnames(d)) { - d$COUNT <- d[[geneid]] + # Grab any continuous colData columns + cont_cols <- as.matrix( + colData(spe_sub)[ + , geneid[geneid %in% colnames(colData(spe_sub))], + drop = FALSE + ] + ) + + # Get the integer indices of each gene in the SpatialExperiment, since we + # aren't guaranteed that rownames are gene names + remaining_geneid <- geneid[!(geneid %in% colnames(colData(spe_sub)))] + valid_gene_indices <- unique( + c( + match(remaining_geneid, rowData(spe_sub)$gene_search), + match(remaining_geneid, rownames(spe_sub)) + ) + ) + valid_gene_indices <- valid_gene_indices[!is.na(valid_gene_indices)] + + # Grab any genes + gene_cols <- t( + as.matrix(assays(spe_sub[valid_gene_indices, ])[[assayname]]) + ) + + # Combine into one matrix where rows are genes and columns are continuous + # features + cont_matrix <- cbind(cont_cols, gene_cols) + + # Determine plot and legend titles + if (ncol(cont_matrix) == 1) { + if (!(geneid %in% colnames(colData(spe_sub)))) { + plot_title <- sprintf( + "%s %s %s min > %s", sampleid, geneid, assayname, minCount + ) + } else { + plot_title <- sprintf( + "%s %s min > %s", sampleid, geneid, minCount + ) + } + d$COUNT <- cont_matrix[, 1] } else { - d$COUNT <- - assays(spe)[[assayname]][which(rowData(spe)$gene_search == geneid), spe$sample_id == sampleid] + if (input$multi_gene_method == "z_score") { + d$COUNT <- multi_gene_z_score(cont_matrix) + plot_title <- paste(sampleid, "Z-score min > ", minCount) + } else if (input$multi_gene_method == "sparsity") { + d$COUNT <- multi_gene_sparsity(cont_matrix) + plot_title <- paste(sampleid, "Prop. nonzero min > ", minCount) + } else { # must be 'pca' + d$COUNT <- multi_gene_pca(cont_matrix) + plot_title <- paste(sampleid, "PC1 min >", minCount) + } } d$COUNT[d$COUNT <= minCount] <- NA ## Add the reduced dims if (reduced_name != "") { - red_dims <- - reducedDim(spe, reduced_name)[spe$sample_id == sampleid, ] + red_dims <- reducedDim(spe_sub, reduced_name) colnames(red_dims) <- paste(reduced_name, "dim", seq_len(ncol(red_dims))) d <- cbind(d, red_dims) @@ -640,21 +761,10 @@ app_server <- function(input, output, session) { sampleid = sampleid, colors = get_colors(colors, d[, clustervar]), spatial = FALSE, - title = paste( - sampleid, - clustervar, - geneid, - if (!geneid %in% colnames(colData(spe))) { - assayname - } else { - NULL - }, - "min >", - minCount - ), + title = plot_title, image_id = input$imageid, alpha = input$alphalevel, - point_size = input$pointsize, + point_size = point_size, auto_crop = input$auto_crop ) @@ -668,11 +778,11 @@ app_server <- function(input, output, session) { cont_colors = cont_colors(), image_id = input$imageid, alpha = input$alphalevel, - point_size = input$pointsize, + point_size = point_size, auto_crop = input$auto_crop ) + geom_point( shape = 21, - size = input$pointsize, + size = point_size, stroke = 0, alpha = input$alphalevel ) @@ -690,7 +800,7 @@ app_server <- function(input, output, session) { ) + geom_point( shape = 21, - size = input$pointsize, + size = point_size, stroke = 0 ) + scale_fill_manual(values = get_colors(colors, colData(spe)[[clustervar]][spe$sample_id == sampleid])) + @@ -718,7 +828,7 @@ app_server <- function(input, output, session) { ) + geom_point( shape = 21, - size = input$pointsize, + size = point_size, stroke = 0 ) } else { @@ -774,7 +884,7 @@ app_server <- function(input, output, session) { opacity = 0.8 ) ), - dragmode = "select" + dragmode = "lasso" ) plotly_gene <- layout( @@ -799,7 +909,7 @@ app_server <- function(input, output, session) { opacity = 0.8 ) ), - dragmode = "select" + dragmode = "lasso" ) plotly_dim <- @@ -900,17 +1010,12 @@ app_server <- function(input, output, session) { return(NULL) } - gene_selected <- ifelse( - input$geneid %in% rowData(spe)$gene_search, - which(rowData(spe)$gene_search == input$geneid), - 1 - ) - p <- vis_gene( - spe[gene_selected, cluster_opts], + spe[, cluster_opts], sampleid = input$sample, geneid = input$geneid, + multi_gene_method = input$multi_gene_method, assayname = input$assayname, minCount = input$minCount, spatial = FALSE, @@ -918,7 +1023,8 @@ app_server <- function(input, output, session) { image_id = input$imageid, alpha = input$alphalevel, point_size = input$pointsize, - auto_crop = input$auto_crop + auto_crop = input$auto_crop, + is_stitched = is_stitched ) + geom_point( shape = 21, @@ -927,6 +1033,9 @@ app_server <- function(input, output, session) { alpha = input$alphalevel ) + ## Update the reactiveValues data + rv$ContCount <- p$data[, c("key", "COUNT")] + ## Read in the histology image img <- SpatialExperiment::imgRaster(spe, @@ -968,7 +1077,7 @@ app_server <- function(input, output, session) { opacity = 0.8 ) ), - dragmode = "select" + dragmode = "lasso" ) ))) }) @@ -1016,25 +1125,9 @@ app_server <- function(input, output, session) { event.data <- NULL } if (!is.null(event.data)) { - ## Prepare the data - d <- - as.data.frame( - cbind( - colData(spe), - SpatialExperiment::spatialCoords(spe) - )[spe$key %in% event.data$key, ], - optional = TRUE - ) - if (input$geneid %in% colnames(d)) { - d$COUNT <- d[[input$geneid]] - } else { - d$COUNT <- - assays(spe)[[input$assayname]][which(rowData(spe)$gene_search == input$geneid), spe$key %in% event.data$key] - } - d$COUNT[d$COUNT <= input$minCount] <- NA - isolate({ ## Now update with the ManualAnnotation input + d <- subset(rv$ContCount, key %in% event.data$key) rv$ManualAnnotation[spe$key %in% d$key[!is.na(d$COUNT)]] <- input$label_manual_ann_gene }) @@ -1043,7 +1136,7 @@ app_server <- function(input, output, session) { output$click_gene <- renderPrint({ if (!is.null(input$gene_plotly_cluster_subset)) { - event.data <- event_data("plotly_click", source = "plotly_gene") + event.data <- event_data("plotly_click", source = "plotly_gene", priority = "event") } else { event.data <- NULL } @@ -1051,32 +1144,15 @@ app_server <- function(input, output, session) { return( "Single points clicked and updated with a manual annotation appear here (double-click to clear)" ) - } else { - ## Prepare the data - d <- - as.data.frame( - cbind( - colData(spe), - SpatialExperiment::spatialCoords(spe) - )[spe$key %in% event.data$key, ], - optional = TRUE - ) - if (input$geneid %in% colnames(d)) { - d$COUNT <- d[[input$geneid]] - } else { - d$COUNT <- - assays(spe)[[input$assayname]][which(rowData(spe)$gene_search == input$geneid), spe$key %in% event.data$key] - } - d$COUNT[d$COUNT <= input$minCount] <- NA - + } else if (input$label_click_gene) { isolate({ ## Now update with the ManualAnnotation input - if (input$label_click_gene) { - rv$ManualAnnotation[spe$key %in% d$key[!is.na(d$COUNT)]] <- - input$label_manual_ann_gene - } + d <- subset(rv$ContCount, key %in% event.data$key) + rv$ManualAnnotation[spe$key %in% d$key[!is.na(d$COUNT)]] <- + input$label_manual_ann_gene + + return(event.data$key) }) - return(event.data$key) } }) @@ -1392,10 +1468,29 @@ app_server <- function(input, output, session) { layer_stat_cor(input_stat, modeling_results, input$layer_model) }) + static_layer_external_tstat_annotated_clusters <- reactive({ + annotate_registered_clusters( + static_layer_external_tstat(), + input$layer_confidence_threshold, + input$layer_cutoff_merge_ratio + ) + }) + static_layer_external_tstat_plot <- reactive({ layer_stat_cor_plot( static_layer_external_tstat(), - max(c(0.1, input$layer_tstat_max)) + max(c(0.1, input$layer_tstat_max)), + -max(c(0.1, input$layer_tstat_max)), + cluster_rows = FALSE, + cluster_columns = FALSE, + annotation = static_layer_external_tstat_annotated_clusters(), + query_colors = get_colors( + clusters = rownames(static_layer_external_tstat()) + ), + reference_colors = get_colors( + spatialLIBD::libd_layer_colors, + clusters = colnames(static_layer_external_tstat()) + ) ) }) @@ -1563,10 +1658,8 @@ app_server <- function(input, output, session) { height = 8, width = 12 ) - layer_stat_cor_plot( - static_layer_external_tstat(), - max(c(0.1, input$layer_tstat_max)) - ) + p <- static_layer_external_tstat_plot() + print(p) dev.off() } ) @@ -1792,6 +1885,30 @@ app_server <- function(input, output, session) { } ) + output$layer_downloadTstatCor_annotation <- downloadHandler( + filename = function() { + gsub( + " ", + "_", + paste0( + "spatialLIBD_layer_TstatCor_annotated_clusters_", + input$layer_model, + "_", + Sys.time(), + ".csv" + ) + ) + }, + content = function(file) { + write.csv( + static_layer_external_tstat_annotated_clusters(), + file = file, + quote = FALSE, + row.names = TRUE + ) + } + ) + ## Reproducibility info diff --git a/R/app_ui.R b/R/app_ui.R index 88455b14..d1bf2c91 100644 --- a/R/app_ui.R +++ b/R/app_ui.R @@ -14,6 +14,7 @@ app_ui <- function() { sce_layer <- golem::get_golem_options("sce_layer") modeling_results <- golem::get_golem_options("modeling_results") sig_genes <- golem::get_golem_options("sig_genes") + auto_crop_default <- golem::get_golem_options("auto_crop_default") red_dim_names <- reducedDimNames(spe) if (length(red_dim_names) > 0) { @@ -104,14 +105,27 @@ app_ui <- function() { hr(), pickerInput( inputId = "geneid", - label = "Continuous variable to plot", + label = "Continuous variable(s) to plot", choices = c( golem::get_golem_options("spe_continuous_vars"), sort(rowData(spe)$gene_search) ), - options = pickerOptions(liveSearch = TRUE) + options = pickerOptions(liveSearch = TRUE), + multiple = TRUE, + selected = c( + golem::get_golem_options("spe_continuous_vars"), + sort(rowData(spe)$gene_search) + )[1] + ), + helpText("Typically gene(s) or any other continuous variable(s)."), + hr(), + selectInput( + inputId = "multi_gene_method", + label = "Multi-gene method", + choices = c("z_score", "pca", "sparsity"), + selected = "z_score" ), - helpText("Typically a gene or any other continuous variable."), + helpText("When applicable, the method used to combine multiple continuous variables."), hr(), selectInput( inputId = "assayname", @@ -169,7 +183,7 @@ app_ui <- function() { checkboxInput( "auto_crop", "Should the image be automatically cropped?", - value = TRUE + value = auto_crop_default ), hr(), selectInput( @@ -338,7 +352,8 @@ app_ui <- function() { tags$br(), tags$br(), tags$br(), - tags$br() + tags$br(), + textOutput("gene_warnings") ), tabPanel( "Gene (interactive)", @@ -408,6 +423,7 @@ app_ui <- function() { actionButton("gene_grid_update", label = "Update grid plot"), downloadButton("downloadPlotGeneGrid", "Download PDF"), uiOutput("gene_grid_static"), + textOutput("gene_grid_warnings"), helpText("Click the 'upgrade grid plot' button above to re-make this plot."), tags$br(), tags$br(), @@ -765,7 +781,7 @@ app_ui <- function() { helpText( "It should be a CSV file without row names and similar to ", HTML( - 'this example file.' + 'this example file. For more context, check this figure.' ) ) ), @@ -826,7 +842,7 @@ app_ui <- function() { helpText( "It should be a CSV file similar to ", HTML( - 'this example file.' + 'this example file, documented here. For more context, check this figure.' ) ) ), @@ -840,7 +856,25 @@ app_ui <- function() { max = 1, step = 0.01 ), - helpText("Use a smaller positive number to change the range of the color scale used. Use 1 if you want the color range to reflect the maximum range of correlation values. Default: 0.81.") + helpText("Use a smaller positive number to change the range of the color scale used. Use 1 if you want the color range to reflect the maximum range of correlation values. Default: 0.81."), + numericInput( + "layer_confidence_threshold", + label = "Annotation confidence threshold", + value = 0.25, + min = 0, + max = 1, + step = 0.01 + ), + helpText("Minimum correlation for a high confidence annotation. Higher values are more strict in annotating. Default: 0.25."), + numericInput( + "layer_cutoff_merge_ratio", + label = "Annotation cutoff merge ratio", + value = 0.25, + min = 0, + max = 1, + step = 0.01 + ), + helpText("Merging threshold for the ratio of the difference between the current one to the next closest correlation, relative to the next closest correlation: (current - next_cor) / next_cor. Lower values are more strict in annotating. Default: 0.25.") ) ), hr(), @@ -862,6 +896,8 @@ app_ui <- function() { hr(), downloadButton("layer_downloadTstatCorTable", "Download CSV"), helpText("Correlation matrix that is visually illustrated with the previous plot."), + downloadButton("layer_downloadTstatCor_annotation", "Download CSV"), + helpText("Annotated clusters."), DT::DTOutput("layer_tstat_cor_table") ) ) @@ -874,7 +910,7 @@ app_ui <- function() { "Help or feedback", tagList( HTML( - 'Please get in touch with the spatialLIBD authors through the Bioconductor Support Website (using the spatialLIBD tag) or through GitHub. Remember to help others help you by including all the information required to reproduce the problem you noticed. Thank you!' + 'Please get in touch with the spatialLIBD authors through the Bioconductor Support Website (using the spatialLIBD tag) or through GitHub. Remember to help others help you by including all the information required to reproduce the problem you noticed. Thank you!' ), hr(), p("The following information will be useful to them:"), diff --git a/R/check_sce.R b/R/check_sce.R index 1d625342..0d17423b 100644 --- a/R/check_sce.R +++ b/R/check_sce.R @@ -24,41 +24,40 @@ #' ## Check the object #' check_sce(sce_example) #' } -check_sce <- function( - sce, - variables = c( - "GraphBased", - "ManualAnnotation", - "Maynard", - "Martinowich", - paste0("SNN_k50_k", 4:28), - "spatialLIBD", - "cell_count", - "sum_umi", - "sum_gene", - "expr_chrM", - "expr_chrM_ratio", - "SpatialDE_PCA", - "SpatialDE_pool_PCA", - "HVG_PCA", - "pseudobulk_PCA", - "markers_PCA", - "SpatialDE_UMAP", - "SpatialDE_pool_UMAP", - "HVG_UMAP", - "pseudobulk_UMAP", - "markers_UMAP", - "SpatialDE_PCA_spatial", - "SpatialDE_pool_PCA_spatial", - "HVG_PCA_spatial", - "pseudobulk_PCA_spatial", - "markers_PCA_spatial", - "SpatialDE_UMAP_spatial", - "SpatialDE_pool_UMAP_spatial", - "HVG_UMAP_spatial", - "pseudobulk_UMAP_spatial", - "markers_UMAP_spatial" - )) { +check_sce <- function(sce, + variables = c( + "GraphBased", + "ManualAnnotation", + "Maynard", + "Martinowich", + paste0("SNN_k50_k", 4:28), + "spatialLIBD", + "cell_count", + "sum_umi", + "sum_gene", + "expr_chrM", + "expr_chrM_ratio", + "SpatialDE_PCA", + "SpatialDE_pool_PCA", + "HVG_PCA", + "pseudobulk_PCA", + "markers_PCA", + "SpatialDE_UMAP", + "SpatialDE_pool_UMAP", + "HVG_UMAP", + "pseudobulk_UMAP", + "markers_UMAP", + "SpatialDE_PCA_spatial", + "SpatialDE_pool_PCA_spatial", + "HVG_PCA_spatial", + "pseudobulk_PCA_spatial", + "markers_PCA_spatial", + "SpatialDE_UMAP_spatial", + "SpatialDE_pool_UMAP_spatial", + "HVG_UMAP_spatial", + "pseudobulk_UMAP_spatial", + "markers_UMAP_spatial" + )) { ## Should be a SingleCellExperiment object stopifnot(is(sce, "SingleCellExperiment")) diff --git a/R/check_sce_layer.R b/R/check_sce_layer.R index e7280113..5ca9b5ae 100644 --- a/R/check_sce_layer.R +++ b/R/check_sce_layer.R @@ -14,10 +14,11 @@ #' #' @examples #' -#' ## Obtain the necessary data +#' ## Obtain example data from the HumanPilot project +#' ## (Maynard, Collado-Torres, et al, 2021) #' if (!exists("sce_layer")) sce_layer <- fetch_data("sce_layer") #' -#' ## Check the object +#' ## Check the pseudo-bulked data #' check_sce_layer(sce_layer) check_sce_layer <- function(sce_layer, variables = "spatialLIBD") { ## Should be a SingleCellExperiment object diff --git a/R/check_spe.R b/R/check_spe.R index d0344daa..47de3c0a 100644 --- a/R/check_spe.R +++ b/R/check_spe.R @@ -25,14 +25,13 @@ #' ## Check the object #' check_spe(spe) #' } -check_spe <- function( - spe, - variables = c( - "sum_umi", - "sum_gene", - "expr_chrM", - "expr_chrM_ratio" - )) { +check_spe <- function(spe, + variables = c( + "sum_umi", + "sum_gene", + "expr_chrM", + "expr_chrM_ratio" + )) { ## Should be a SpatialExperiment object stopifnot(is(spe, "SpatialExperiment")) diff --git a/R/data.R b/R/data.R index 6dc68479..789b7483 100644 --- a/R/data.R +++ b/R/data.R @@ -10,9 +10,13 @@ #' Cell cluster t-statistics from Tran et al #' -#' Using the DLPFC snRNA-seq data from Matthew N Tran et al we computed -#' enrichment t-statistics for the cell clusters. This is a subset of them -#' used in examples such as in [layer_stat_cor_plot()]. +#' Using the DLPFC snRNA-seq data from Matthew N Tran et al +#' we computed +#' enrichment t-statistics for the cell clusters. The Tran et al data has been +#' subset to the top 100 DLPFC layer markers found in Maynard, Collado-Torres, +#' et al 2021. This data is used in examples such as in +#' [layer_stat_cor_plot()]. The Tran et al data is from the pre-print version +#' of that project. #' #' @format A matrix with 692 rows and 31 variables where each column is #' a given cell cluster from Tran et al and each row is one gene. The row names diff --git a/R/fetch_data.R b/R/fetch_data.R index 6540ea21..97a75a8b 100644 --- a/R/fetch_data.R +++ b/R/fetch_data.R @@ -1,12 +1,14 @@ #' Download the Human DLPFC Visium data from LIBD #' -#' This function downloads from `ExperimentHub` the dorsolateral prefrontal -#' cortex (DLPFC) human Visium data and results analyzed by LIBD. If -#' `ExperimentHub` is not available, it will download the files from Dropbox -#' using [utils::download.file()] unless the files are present already at -#' `destdir`. Note that `ExperimentHub` will cache the data and automatically -#' detect if you have previously downloaded it, thus making it the preferred -#' way to interact with the data. +#' This function downloads from `ExperimentHub` Visium, Visium Spatial +#' Proteogenomics (Visium-SPG), or single nucleus RNA-seq (snRNA-seq) data +#' and results analyzed by LIBD from multiple projects. +#' If `ExperimentHub` is not available, this function will +#' download the files from Dropbox using [BiocFileCache::bfcrpath()] unless the +#' files are present already at `destdir`. Note that `ExperimentHub` and +#' `BiocFileCache` will cache the data and automatically detect if you have +#' previously downloaded it, thus making it the preferred way to interact with +#' the data. #' #' @param type A `character(1)` specifying which file you want to download. It #' can either be: `sce` for the @@ -19,10 +21,14 @@ #' or `modeling_results` for the list of tables with the `enrichment`, #' `pairwise`, and `anova` model results from the layer-level data. It can also #' be `sce_example` which is a reduced version of `sce` just for example -#' purposes. As of BioC version 3.13 `spe` downloads a +#' purposes. The initial version of `spatialLIBD` downloaded data only from +#' . As of BioC version 3.13 +#' `spe` downloads a #' [SpatialExperiment-class][SpatialExperiment::SpatialExperiment-class] object. -#' As of version 1.11.6 this function also allows downloading data from the -#' project. +#' As of version 1.11.6, this function also allows downloading data from the +#' project. As of version 1.11.12, +#' data from can be +#' downloaded. #' #' @param destdir The destination directory to where files will be downloaded #' to in case the `ExperimentHub` resource is not available. If you already @@ -54,22 +60,54 @@ #' #' ## Explore the data #' sce_layer +#' +#' ## How to download and load "spatialDLPFC_snRNAseq" +#' \dontrun{ +#' sce_path_zip <- fetch_data("spatialDLPFC_snRNAseq") +#' sce_path <- unzip(sce_path_zip, exdir = tempdir()) +#' sce <- HDF5Array::loadHDF5SummarizedExperiment( +#' file.path(tempdir(), "sce_DLPFC_annotated") +#' ) +#' sce +#' #> class: SingleCellExperiment +#' #> dim: 36601 77604 +#' #> metadata(3): Samples cell_type_colors cell_type_colors_broad +#' #> assays(2): counts logcounts +#' #> rownames(36601): MIR1302-2HG FAM138A ... AC007325.4 AC007325.2 +#' #> rowData names(7): source type ... gene_type binomial_deviance +#' #> colnames(77604): 1_AAACCCAAGTTCTCTT-1 1_AAACCCACAAGGTCTT-1 ... 19_TTTGTTGTCTCATTGT-1 19_TTTGTTGTCTTAAGGC-1 +#' #> colData names(32): Sample Barcode ... cellType_layer layer_annotation +#' #> reducedDimNames(4): GLMPCA_approx TSNE UMAP HARMONY +#' #> mainExpName: NULL +#' #> altExpNames(0): +#' lobstr::obj_size(sce) +#' #> 172.28 MB +#' } fetch_data <- - function(type = c( - "sce", - "sce_layer", - "modeling_results", - "sce_example", - "spe", - "spatialDLPFC_Visium", - "spatialDLPFC_Visium_pseudobulk", - "spatialDLPFC_Visium_modeling_results", - "spatialDLPFC_Visium_SPG", - "spatialDLPFC_snRNAseq" - ), - destdir = tempdir(), - eh = ExperimentHub::ExperimentHub(), - bfc = BiocFileCache::BiocFileCache()) { + function( + type = c( + "sce", + "sce_layer", + "modeling_results", + "sce_example", + "spe", + "spatialDLPFC_Visium", + "spatialDLPFC_Visium_example_subset", + "spatialDLPFC_Visium_pseudobulk", + "spatialDLPFC_Visium_modeling_results", + "spatialDLPFC_Visium_SPG", + "spatialDLPFC_snRNAseq", + "Visium_SPG_AD_Visium_wholegenome_spe", + "Visium_SPG_AD_Visium_targeted_spe", + "Visium_SPG_AD_Visium_wholegenome_pseudobulk_spe", + "Visium_SPG_AD_Visium_wholegenome_modeling_results", + "visiumStitched_brain_spe", + "visiumStitched_brain_spaceranger", + "visiumStitched_brain_Fiji_out" + ), + destdir = tempdir(), + eh = ExperimentHub::ExperimentHub(), + bfc = BiocFileCache::BiocFileCache()) { ## Some variables sce <- sce_layer <- modeling_results <- sce_sub <- spe <- NULL @@ -97,7 +135,7 @@ fetch_data <- ) ) } - + tag <- "Human_Pilot_DLPFC_Visium_spatialLIBD" hub_title <- "Human_Pilot_DLPFC_Visium_spatialLIBD_spot_level_SCE" @@ -107,6 +145,7 @@ fetch_data <- url <- "https://www.dropbox.com/s/f4wcvtdq428y73p/Human_DLPFC_Visium_processedData_sce_scran_spatialLIBD.Rdata?dl=1" } else if (type == "sce_layer") { + tag <- "Human_Pilot_DLPFC_Visium_spatialLIBD" hub_title <- "Human_Pilot_DLPFC_Visium_spatialLIBD_layer_level_SCE" ## While EH is not set-up @@ -115,6 +154,7 @@ fetch_data <- url <- "https://www.dropbox.com/s/bg8xwysh2vnjwvg/Human_DLPFC_Visium_processedData_sce_scran_sce_layer_spatialLIBD.Rdata?dl=1" } else if (type == "modeling_results") { + tag <- "Human_Pilot_DLPFC_Visium_spatialLIBD" hub_title <- "Human_Pilot_DLPFC_Visium_spatialLIBD_modeling_results" ## While EH is not set-up @@ -122,6 +162,7 @@ fetch_data <- url <- "https://www.dropbox.com/s/se6rrgb9yhm5gfh/Human_DLPFC_Visium_modeling_results.Rdata?dl=1" } else if (type == "sce_example") { + tag <- "Human_Pilot_DLPFC_Visium_spatialLIBD" hub_title <- "Human_DLPFC_Visium_sce_example" ## While EH is not set-up @@ -137,7 +178,7 @@ fetch_data <- ) ) } - + tag <- "spatialDLPFC_Visium_VisiumSPG_snRNAseq_spatialLIBD" hub_title <- "spatialDLPFC_Visium_spe" ## While EH is not set-up @@ -145,7 +186,16 @@ fetch_data <- "spe_filtered_final_with_clusters_and_deconvolution_results.rds" url <- "https://www.dropbox.com/s/y2ifv5v8g68papf/spe_filtered_final_with_clusters_and_deconvolution_results.rds?dl=1" + } else if (type == "spatialDLPFC_Visium_example_subset") { + tag <- "spatialDLPFC_Visium_VisiumSPG_snRNAseq_spatialLIBD_example_subset" + hub_title <- "spatialDLPFC_Visium_spe_example_subset" + + ## While EH is not set-up + file_name <- "spatialDLPFC_spe_subset_example.rds" + url <- + "https://www.dropbox.com/s/3jm3kjab9lzaemo/spatialDLPFC_spe_subset_example.rds?dl=1" } else if (type == "spatialDLPFC_Visium_pseudobulk") { + tag <- "spatialDLPFC_Visium_VisiumSPG_snRNAseq_spatialLIBD" hub_title <- "spatialDLPFC_Visium_pseudobulk_spe" ## While EH is not set-up @@ -154,7 +204,8 @@ fetch_data <- url <- "https://www.dropbox.com/s/pbti4strsfk1m55/sce_pseudo_BayesSpace_k09.rds?dl=1" } else if (type == "spatialDLPFC_Visium_modeling_results") { - hub_title <- "spatialDLPFC_Visium_modeling_results" + tag <- "spatialDLPFC_Visium_VisiumSPG_snRNAseq_spatialLIBD" + hub_title <- type ## While EH is not set-up file_name <- @@ -162,6 +213,7 @@ fetch_data <- url <- "https://www.dropbox.com/s/srkb2ife75px2yz/modeling_results_BayesSpace_k09.Rdata?dl=1" } else if (type == "spatialDLPFC_Visium_SPG") { + tag <- "spatialDLPFC_Visium_VisiumSPG_snRNAseq_spatialLIBD" hub_title <- "spatialDLPFC_Visium_SPG_spe" ## While EH is not set-up @@ -170,22 +222,70 @@ fetch_data <- url <- "https://www.dropbox.com/s/nbf13dna9ibqfaa/spe.rds?dl=1" } else if (type == "spatialDLPFC_snRNAseq") { - if (!enough_ram(10e+09)) { - warning( - paste( - "Your system might not have enough memory available (10GB).", - "Try with a machine that has more memory." - ) - ) - } - - hub_title <- "spatialDLPFC_snRNAseq" + tag <- "spatialDLPFC_Visium_VisiumSPG_snRNAseq_spatialLIBD" + hub_title <- type ## While EH is not set-up file_name <- - "TBD" + "sce_DLPFC_annotated.zip" + url <- + "https://www.dropbox.com/s/5919zt00vm1ht8e/sce_DLPFC_annotated.zip?dl=1" + } else if (type == "Visium_SPG_AD_Visium_wholegenome_spe") { + tag <- "Visium_SPG_AD_Alzheimer_Disease_ITC_spatialLIBD" + hub_title <- type + + ## While EH is not set-up + file_name <- "Visium_SPG_AD_spe_wholegenome.Rdata" + url <- + "https://www.dropbox.com/s/ng036m63grykdm6/Visium_SPG_AD_spe_wholegenome.Rdata?dl=1" + } else if (type == "Visium_SPG_AD_Visium_targeted_spe") { + tag <- "Visium_SPG_AD_Alzheimer_Disease_ITC_spatialLIBD" + hub_title <- type + + ## While EH is not set-up + file_name <- "Visium_SPG_AD_spe_targeted.Rdata" + url <- + "https://www.dropbox.com/s/kda9160awc2h8jq/Visium_SPG_AD_spe_targeted.Rdata?dl=1" + } else if (type == "Visium_SPG_AD_Visium_wholegenome_pseudobulk_spe") { + tag <- "Visium_SPG_AD_Alzheimer_Disease_ITC_spatialLIBD" + hub_title <- type + + ## While EH is not set-up + file_name <- "sce_pseudo_pathology_wholegenome.rds" + url <- + "https://www.dropbox.com/s/p8foxj6t6inb8uf/sce_pseudo_pathology_wholegenome.rds?dl=1" + } else if (type == "Visium_SPG_AD_Visium_wholegenome_modeling_results") { + tag <- "Visium_SPG_AD_Alzheimer_Disease_ITC_spatialLIBD" + hub_title <- type + + ## While EH is not set-up + file_name <- "Visium_IF_AD_modeling_results.Rdata" + url <- + "https://www.dropbox.com/s/5plupu8bj5m0kfh/Visium_IF_AD_modeling_results.Rdata?dl=1" + } else if (type == "visiumStitched_brain_spe") { + tag <- "visiumStitched_brain_spatialLIBD" + hub_title <- type + + ## While EH is not set-up + file_name <- "visiumStitched_brain_spe.rds" + url <- + "https://www.dropbox.com/scl/fi/9re464y6qaojx3r94nq5u/visiumStitched_brain_spe.rds?rlkey=nq6a82u23xuu9hohr86oodwdi&dl=1" + } else if (type == "visiumStitched_brain_spaceranger") { + tag <- "visiumStitched_brain_spatialLIBD" + hub_title <- type + + ## While EH is not set-up + file_name <- "visiumStitched_brain_spaceranger.zip" + url <- + "https://www.dropbox.com/scl/fi/5jdoaukvhq3v7lk19228y/visiumStitched_brain_spaceranger.zip?rlkey=bdgjc6mgy1ierdad6h6v5g29c&dl=1" + } else if (type == "visiumStitched_brain_Fiji_out") { + tag <- "visiumStitched_brain_spatialLIBD" + hub_title <- type + + ## While EH is not set-up + file_name <- "visiumStitched_brain_fiji_out.zip" url <- - "TBD?dl=1" + "https://www.dropbox.com/scl/fi/bevo52e96f2kdwllf8dkk/visiumStitched_brain_fiji_out.zip?rlkey=ptwal8f5zxakzejwd0oqw0lhj&dl=1" } file_path <- file.path(destdir, file_name) @@ -193,7 +293,7 @@ fetch_data <- if (!file.exists(file_path)) { q <- AnnotationHub::query(eh, - pattern = c("Human_Pilot_DLPFC_Visium_spatialLIBD", hub_title) + pattern = c(tag, hub_title) ) if (length(q) == 1) { @@ -211,7 +311,7 @@ fetch_data <- } } - ## Now load the data + ## Now load the data if possible message(Sys.time(), " loading file ", file_path) if (grepl(".Rdata", file_path)) { load(file_path, verbose = FALSE) @@ -219,13 +319,18 @@ fetch_data <- return(.update_sce(sce)) } else if (type == "sce_layer") { return(.update_sce_layer(sce_layer)) - } else if (type == "modeling_results" || type == "spatialDLPFC_Visium_modeling_results") { + } else if (type == "modeling_results" || type == "spatialDLPFC_Visium_modeling_results" || type == "Visium_SPG_AD_Visium_wholegenome_modeling_results") { return(modeling_results) } else if (type == "sce_example") { return(.update_sce(sce_sub)) + } else if (type == "Visium_SPG_AD_Visium_wholegenome_spe" || type == "Visium_SPG_AD_Visium_targeted_spe") { + return(spe) } + } else if (grepl(".rds", file_path)) { + return(readRDS(file_path)) + } else { + file_path } - readRDS(file_path) } diff --git a/R/frame_limits.R b/R/frame_limits.R index 320cc643..b22b5c8a 100644 --- a/R/frame_limits.R +++ b/R/frame_limits.R @@ -37,17 +37,16 @@ #' } #' frame_limits <- - function( - spe, - sampleid, - image_id = "lowres", - visium_grid = list( - row_min = 0, - row_max = 77, - col_min = 0, - col_max = 127, - fiducial_vs_capture_edge = (8 - 6.5) * 1000 / 2 / 100 - )) { + function(spe, + sampleid, + image_id = "lowres", + visium_grid = list( + row_min = 0, + row_max = 77, + col_min = 0, + col_max = 127, + fiducial_vs_capture_edge = (8 - 6.5) * 1000 / 2 / 100 + )) { ## Subset the info we need for the particular sample d <- as.data.frame(cbind(colData(spe), SpatialExperiment::spatialCoords(spe))[spe$sample_id == sampleid, ], diff --git a/R/gene_set_enrichment.R b/R/gene_set_enrichment.R index 2fcb944f..0d021144 100644 --- a/R/gene_set_enrichment.R +++ b/R/gene_set_enrichment.R @@ -3,7 +3,7 @@ #' Using the layer-level (group-level) data, this function evaluates whether #' list of gene sets (Ensembl gene IDs) are enriched among the significant #' genes (FDR < 0.1 by default) genes for a given model type result. Test the -#' alternative hypothesis that OR > 1, i.e. that gene set is over-represented in the +#' alternative hypothesis that OR > 1, i.e. that gene set is over-represented in the #' set of enriched genes. If you want to check depleted genes, change `reverse` #' to `TRUE`. #' @@ -15,6 +15,16 @@ #' #' @return A table in long format with the enrichment results using #' [stats::fisher.test()]. +#' * `OR` odds ratio. +#' * `Pval` p-value for `fisher.test()`. +#' * `test` group or layer in the `modeling_results`. +#' * `NumSig` Number of genes from the gene set present in `modeling_results` & +#' with `fdr < fdr_cut` and `t_stat > 0` (unless reverse = TRUE) for `test` in +#' modeling results. +#' * `SetSize` Number of genes from `modeling_results` present in `gene_set`. +#' * `ID` name of gene set. +#' * `model_type` record of input model type from `modeling results`. +#' * `fdr_cut` record of input `frd_cut`. #' #' @export #' @importFrom stats fisher.test @@ -58,12 +68,11 @@ #' ## Explore the results #' asd_sfari_enrichment gene_set_enrichment <- - function( - gene_list, - fdr_cut = 0.1, - modeling_results = fetch_data(type = "modeling_results"), - model_type = names(modeling_results)[1], - reverse = FALSE) { + function(gene_list, + fdr_cut = 0.1, + modeling_results = fetch_data(type = "modeling_results"), + model_type = names(modeling_results)[1], + reverse = FALSE) { model_results <- modeling_results[[model_type]] ## Keep only the genes present @@ -71,16 +80,18 @@ gene_set_enrichment <- x <- x[!is.na(x)] x[x %in% model_results$ensembl] }) - + ## warn about low power for small geneLists geneList_length <- sapply(geneList_present, length) min_genes <- 25 - if(any(geneList_length < min_genes)){ - warning( - "Gene list with n < ",min_genes," may have insufficent power for enrichment analysis: ", - paste(names(geneList_length)[geneList_length < 200], collapse = " ,") - ) - } + if (any(geneList_length < min_genes)) { + warning( + "Gene list with n < ", + min_genes, + " may have insufficent power for enrichment analysis: ", + paste(names(geneList_length)[geneList_length < 200], collapse = " ,") + ) + } tstats <- model_results[, grep("[f|t]_stat_", colnames(model_results))] @@ -95,7 +106,10 @@ gene_set_enrichment <- paste(rev(x), collapse = "-") }, character(1)) } else if (model_type == "anova") { - stop("reverse = TRUE does not work with model_type = anova since F-statistics cannot have negative values.", call. = FALSE) + stop( + "reverse = TRUE does not work with model_type = anova since F-statistics cannot have negative values.", + call. = FALSE + ) } } @@ -112,8 +126,9 @@ gene_set_enrichment <- Layer = factor(layer, c(FALSE, TRUE)) ) }) - - enrichList <- lapply(tabList, fisher.test, alternative = "greater") + + enrichList <- + lapply(tabList, fisher.test, alternative = "greater") o <- data.frame( OR = vapply(enrichList, "[[", numeric(1), "estimate"), Pval = vapply(enrichList, "[[", numeric(1), "p.value"), diff --git a/R/gene_set_enrichment_plot.R b/R/gene_set_enrichment_plot.R index 8624be09..bc674b71 100644 --- a/R/gene_set_enrichment_plot.R +++ b/R/gene_set_enrichment_plot.R @@ -1,29 +1,47 @@ -#' Plot the gene set enrichment results +#' Plot the gene set enrichment results with ComplexHeatmap #' #' This function takes the output of [gene_set_enrichment()] and creates a -#' heatmap visualization of the results. +#' ComplexHeatmap visualization of the results. Fill of the heatmap represents +#' the -log10(p-val), Odds-ratios are printed for test that pass specified +#' significance threshold `ORcut`. +#' +#' Includes functionality to plot the size of the input gene sets as barplot +#' annotations. #' #' @param enrichment The output of [gene_set_enrichment()]. -#' `unique(enrichment$ID)`. Gets passed to [layer_matrix_plot()]. +#' @param xlabs A vector of names in the same order and length as +#' `unique(enrichment$ID)`. #' @param PThresh A `numeric(1)` specifying the P-value threshold for the #' maximum value in the `-log10(p)` scale. #' @param ORcut A `numeric(1)` specifying the P-value threshold for the #' minimum value in the `-log10(p)` scale for printing the odds ratio values -#' in the cells of the resulting plot. +#' in the cells of the resulting plot. Defaults to 3 or p-val < 0.001. #' @param enrichOnly A `logical(1)` indicating whether to show only odds ratio #' values greater than 1. -#' @param layerHeights A `numeric()` vector of length equal to -#' `length(unique(enrichment$test)) + 1` that starts at 0 specifying where -#' to plot the y-axis breaks which can be used for re-creating the length of -#' each brain layer. Gets passed to [layer_matrix_plot()]. -#' @param mypal A vector with the color palette to use. Gets passed to -#' [layer_matrix_plot()]. -#' @param cex Passed to [layer_matrix_plot()]. +#' @param mypal A `character` vector with the color palette to use. Colors will +#' be in order from 0 to lowest P-val `max(-log(enrichment$Pval))`. Defaults to +#' white, yellow, red pallet. +#' @param plot_SetSize_bar A `logical(1)` indicating whether to plot SetSize +#' from `enrichment` as an `anno_barplot` at the top of the heatmap. +#' @param gene_list_length Optional named `numeric` vector indicating the length +#' of the `gene_list` used to calculate `enrichment`, if inclided and +#' `plot_setSize_bar = TRUE` then the top `anno_barplot` will show the `SetSize` +#' and the difference from the length of the input gene_list. +#' #' @param model_sig_length Optional named `numeric` vector indicating the +#' number of significant genes in `modeling_results` used to calculate +#' `enrichment`. If included `anno_barplot` will be added to rows. +#' #' @param model_colors named `character` vector of colors, Adds colors to +#' row annotations. +#' #' @param ... Additional parameters passed to +#' [ComplexHeatmap::Heatmap()][ComplexHeatmap::Heatmap()]. #' -#' @return A plot visualizing the gene set enrichment -#' odds ratio and p-value results. +#' @return A ([Heatmap-class][ComplexHeatmap::Heatmap-class]) visualizing the +#' gene set enrichment odds ratio and p-value results. #' @export #' @importFrom stats reshape +#' @importFrom circlize colorRamp2 +#' @importFrom ComplexHeatmap columnAnnotation rowAnnotation Heatmap anno_barplot +#' #' @family Gene set enrichment functions #' @author Andrew E Jaffe, Leonardo Collado-Torres #' @seealso layer_matrix_plot @@ -44,7 +62,7 @@ #' ) #' #' ## Format them appropriately -#' asd_sfari_geneList <- list( +#' asd_safari_geneList <- list( #' Gene_SFARI_all = asd_sfari$ensembl.id, #' Gene_SFARI_high = asd_sfari$ensembl.id[asd_sfari$gene.score < 3], #' Gene_SFARI_syndromic = asd_sfari$ensembl.id[asd_sfari$syndromic == 1] @@ -57,72 +75,131 @@ #' #' ## Compute the gene set enrichment results #' asd_sfari_enrichment <- gene_set_enrichment( -#' gene_list = asd_sfari_geneList, +#' gene_list = asd_safari_geneList, #' modeling_results = modeling_results, #' model_type = "enrichment" #' ) #' #' ## Visualize the gene set enrichment results -#' ## with a custom color palette +#' +#' ## Default plot +#' gene_set_enrichment_plot( +#' enrichment = asd_sfari_enrichment +#' ) +#' +#' ## Use a custom green color palette & use shorter gene set names (x-axis labels) +#' gene_set_enrichment_plot( +#' asd_sfari_enrichment, +#' xlabs = gsub(".*_", "", unique(asd_sfari_enrichment$ID)), +#' mypal = c("white",RColorBrewer::brewer.pal(9, "BuGn")) +#' ) +#' +#' ## Add bar plot annotations for SetSize of model genes in the gene_lists +#' gene_set_enrichment_plot( +#' asd_sfari_enrichment, +#' xlabs = gsub(".*_", "", unique(asd_sfari_enrichment$ID)), +#' plot_SetSize_bar = TRUE +#' ) +#' +#' ## Add stacked bar plot annotations showing SetSize and difference from the +#' ## length of the input gene_list #' gene_set_enrichment_plot( #' asd_sfari_enrichment, #' xlabs = gsub(".*_", "", unique(asd_sfari_enrichment$ID)), -#' mypal = c( -#' "white", -#' grDevices::colorRampPalette( -#' RColorBrewer::brewer.pal(9, "BuGn") -#' )(50) -#' ) +#' plot_SetSize_bar = TRUE, +#' gene_list_length = lapply(asd_safari_geneList, length) +#' ) +#' +#' ## add bar plot annotations for number of enriched genes from layers +#' if (!exists("sce_layer")) sce_layer <- fetch_data(type = "sce_layer") +#' sig_genes <- sig_genes_extract( +#' modeling_results = modeling_results, +#' model = "enrichment", +#' sce_layer = sce_layer, +#' n = nrow(sce_layer) +#' ) +#' +#' sig_genes <- sig_genes[sig_genes$fdr < 0.1,] +#' n_sig_model <- as.list(table(sig_genes$test)) +#' +#' ## add barplot with n significant genes from modeling +#' gene_set_enrichment_plot( +#' asd_sfari_enrichment, +#' xlabs = gsub(".*_", "", unique(asd_sfari_enrichment$ID)), +#' plot_SetSize_bar = TRUE, +#' model_sig_length = n_sig_model #' ) #' layer_gene_count <- get_gene_enrichment_count(model_results = modeling_results) #' +#'## add color annotaions #' gene_set_enrichment_plot( #' asd_sfari_enrichment, -#' gene_count_col = sfari_gene_count, -#' gene_count_row = layer_gene_count +#' xlabs = gsub(".*_", "", unique(asd_sfari_enrichment$ID)), +#' plot_SetSize_bar = TRUE, +#' model_colors = libd_layer_colors +#' ) +#' +#' ## add barplot with n significant genes from modeling filled with model color +#' gene_set_enrichment_plot( +#' asd_sfari_enrichment, +#' xlabs = gsub(".*_", "", unique(asd_sfari_enrichment$ID)), +#' plot_SetSize_bar = TRUE, +#' model_sig_length = n_sig_model, +#' model_colors = libd_layer_colors #' ) +#' gene_set_enrichment_plot <- - function(enrichment, - PThresh = 12, - ORcut = 3, - enrichOnly = FALSE, - gene_count_col = NULL, - gene_count_row = NULL, - anno_title_col = NULL, - anno_title_row = NULL, - column_order = NULL, - anno_add = NULL, - mypal = c( - "white", - grDevices::colorRampPalette(RColorBrewer::brewer.pal(9, "YlOrRd"))(50) - )) { + function( + enrichment, + xlabs = unique(enrichment$ID), + PThresh = 12, + ORcut = 3, + enrichOnly = FALSE, + mypal = c("white", RColorBrewer::brewer.pal(9, "YlOrRd")), + plot_SetSize_bar = FALSE, + gene_list_length = NULL, + model_sig_length = NULL, + model_colors = NULL, + ... + ){ ## Re-order and shorten names if they match our data - # if (all(unique(enrichment$test) %in% c("WM", paste0("Layer", seq_len(6))))) { - # enrichment$test <- - # factor(gsub("ayer", "", enrichment$test), levels = rev(c(paste0( - # "L", seq_len(6) - # ), "WM"))) - # } - + if (all(unique(enrichment$test) %in% c("WM", paste0("Layer", seq_len(6))))) { + enrichment$test <- + factor(gsub("ayer", "", enrichment$test), levels = rev(c(paste0( + "L", seq_len(6) + ), "WM"))) + } + ## Check inputs stopifnot(is(enrichment, "data.frame")) - stopifnot(all(c("ID", "test", "OR", "Pval", "SetSize") %in% colnames(enrichment))) + stopifnot(all(c("ID", "test", "OR", "Pval") %in% colnames(enrichment))) stopifnot(ORcut <= PThresh) - # stopifnot(length(xlabs) == length(unique(enrichment$ID))) - + stopifnot(length(xlabs) == length(unique(enrichment$ID))) + ## Convert to -log10 scale and threshold the pvalues enrichment$log10_P_thresh <- round(-log10(enrichment$Pval), 2) enrichment$log10_P_thresh[which(enrichment$log10_P_thresh > PThresh)] <- PThresh - + ## Change some values for the plot if (enrichOnly) { enrichment$log10_P_thresh[enrichment$OR < 1] <- 0 } enrichment$OR_char <- as.character(round(enrichment$OR, 2)) enrichment$OR_char[enrichment$log10_P_thresh < ORcut] <- "" - + + ## sub xlabs labels + if(!is.null(gene_list_length)){ + stopifnot(setequal(names(gene_list_length), unique(enrichment$ID))) + gene_list_length <- gene_list_length[unique(enrichment$ID)] + names(gene_list_length) <- xlabs + } + + for(i in seq(length(xlabs))){ + enrichment$ID <- gsub(unique(enrichment$ID)[[i]], xlabs[[i]], enrichment$ID) + } + ## Make into wide matrices make_wide <- function(var = "OR_char") { res <- @@ -140,86 +217,92 @@ gene_set_enrichment_plot <- res <- res[, levels(as.factor(enrichment$test))] t(res) } - - ## Define matrix wide_or <- make_wide("OR_char") wide_p <- make_wide("log10_P_thresh") - - ## Reorder - if (!is.null(column_order)) { - stopifnot(setequal(column_order, colnames(wide_or))) - wide_or <- wide_or[, column_order] - wide_p <- wide_p[, column_order] - } - - if (!is.null(anno_add)) { - stopifnot(setequal(colnames(anno_add), colnames(wide_or))) - stopifnot(setequal(rownames(anno_add), rownames(wide_or))) - - wide_or[] <- paste0(anno_add[rownames(wide_or), colnames(wide_or)], "\n", wide_or) - } - - ## define annotations - stopifnot(setequal(rownames(gene_count_col), colnames(wide_p))) - stopifnot(setequal(rownames(gene_count_row), rownames(wide_p))) - - ## get column gene counts from SetSize recorded in enrichment - gene_count_col <- unique(enrichment[,c("ID", "SetSize")]) - rownames(gene_count_col) <- gene_count_col$ID - gene_count_col$ID <- NULL - - col_gene_anno <- ComplexHeatmap::columnAnnotation( - `n genes` = ComplexHeatmap::anno_barplot(gene_count_col[colnames(wide_p), ]), - annotation_label = anno_title_col - ) - # row_gene_anno <- ComplexHeatmap::rowAnnotation( - # `n genes` = ComplexHeatmap::anno_barplot(gene_count_row[rownames(wide_p), ]), - # annotation_label = anno_title_row - # ) - + + ## define color pallet + mypal = circlize::colorRamp2(breaks = seq(0, max(wide_p), length.out = length(mypal)), + colors = mypal) + + ## Add gene count annotations + enrichment_setsize <- unique(enrichment[,c("ID", "SetSize")]) + + ## COL annotations + if(plot_SetSize_bar){ + + if(!is.null(gene_list_length)){ + stopifnot(all(colnames(wide_p) %in% names(gene_list_length))) + enrichment_setsize$SetInput <- unlist(gene_list_length[enrichment_setsize$ID]) + enrichment_setsize$Diff <- enrichment_setsize$SetInput - enrichment_setsize$SetSize + } + + rownames(enrichment_setsize) <- enrichment_setsize$ID + enrichment_setsize$ID <- NULL + enrichment_setsize$SetInput <- NULL ## only plot SetSize + Diff + + col_gene_anno <- ComplexHeatmap::columnAnnotation( + `SetSize` = ComplexHeatmap::anno_barplot(enrichment_setsize) + ) + + } else col_gene_anno <- NULL + + if(!is.null(model_colors)){ + ## shorten names if they match HumanPilot data + if (all(c("WM", paste0("Layer", seq_len(6))) %in% names(model_colors))) { + names(model_colors) <-gsub("ayer", "", names(model_colors)) + }} + + + ## ROW annotations + if(!is.null(model_sig_length)){ ## add row barplot annotation + + ## shorten names if they match HumanPilot data + if (all(names(model_sig_length) %in% c("WM", paste0("Layer", seq_len(6))))) { + names(model_sig_length) <-gsub("ayer", "", names(model_sig_length)) + } + + stopifnot(all(rownames(wide_p) %in% names(model_sig_length))) + model_sig_length <- t(data.frame(model_sig_length)) + + if(!is.null(model_colors)){ ## barplot with colors + row_gene_anno <- ComplexHeatmap::rowAnnotation( + `n\nmodel sig` = ComplexHeatmap::anno_barplot(model_sig_length[rownames(wide_p), ], + gp = gpar(fill = model_colors[rownames(wide_p)]) + ) + # annotation_label = anno_title_row + + ) + } else { ## barplot no colors + row_gene_anno <- ComplexHeatmap::rowAnnotation( + `model sig` = ComplexHeatmap::anno_barplot(model_sig_length[rownames(wide_p), ]) + # annotation_label = anno_title_row + ) + } + + } else if(!is.null(model_colors)){ ## only apply color annotation + + stopifnot(all(rownames(wide_p) %in% names(model_colors))) + model_colors <- model_colors[rownames(wide_p)] + + row_gene_anno <- ComplexHeatmap::rowAnnotation( + " " = rownames(wide_p), + col = list(" " = model_colors), + show_legend = FALSE + ) + + }else row_gene_anno <- NULL + ComplexHeatmap::Heatmap(wide_p, col = mypal, name = "-log10(p-val)", rect_gp = grid::gpar(col = "black", lwd = 1), cluster_rows = FALSE, cluster_columns = FALSE, - # right_annotation = row_gene_anno, + right_annotation = row_gene_anno, top_annotation = col_gene_anno, cell_fun = function(j, i, x, y, width, height, fill) { grid::grid.text(wide_or[i, j], x, y, gp = grid::gpar(fontsize = 10)) - } + }, + ... ) } - - -get_gene_list_count <- function(gene_list) { - data.frame( - row.names = names(gene_list), - n = purrr::map_int(gene_list, ~ sum(!is.na(.x))) - ) -} - - -get_gene_enrichment_count <- function( - model_results = fetch_data(type = "modeling_results"), - model_type = "enrichment", - fdr_cut = 0.1, - bayes_anno = bayes_anno) { - model_results <- model_results[[model_type]] - - tstats <- - model_results[, grep("[f|t]_stat_", colnames(model_results))] - colnames(tstats) <- - gsub("[f|t]_stat_", "", colnames(tstats)) - - fdrs <- - model_results[, grep("fdr_", colnames(model_results))] - - enrich_count <- sapply(seq(along.with = tstats), function(i) { - layer <- sum(tstats[, i] > 0 & fdrs[, i] < fdr_cut) - }) - data.frame( - row.names = colnames(tstats), - n = enrich_count - ) -} diff --git a/R/geom_spatial.R b/R/geom_spatial.R index 965db758..64f00cc3 100644 --- a/R/geom_spatial.R +++ b/R/geom_spatial.R @@ -58,15 +58,14 @@ #' ## Clean up #' rm(spe_sub) #' } -geom_spatial <- function( - mapping = NULL, - data = NULL, - stat = "identity", - position = "identity", - na.rm = FALSE, - show.legend = NA, - inherit.aes = FALSE, - ...) { +geom_spatial <- function(mapping = NULL, + data = NULL, + stat = "identity", + position = "identity", + na.rm = FALSE, + show.legend = NA, + inherit.aes = FALSE, + ...) { ## To avoid a NOTE on R CMD check ggname <- function(prefix, grob) { grob$name <- grid::grobName(grob, prefix) diff --git a/R/get_colors.R b/R/get_colors.R index 7d601839..9ed0c5e1 100644 --- a/R/get_colors.R +++ b/R/get_colors.R @@ -31,6 +31,18 @@ #' #' ## Example where Polychrome::palette36.colors() gets used #' get_colors(clusters = letters[seq_len(13)]) +#' +#' ## What happens if you have a logical variable with NAs? +#' set.seed(20240712) +#' log_var <- sample(c(TRUE, FALSE, NA), +#' 1000, +#' replace = TRUE, +#' prob = c(0.3, 0.15, 0.55) +#' ) +#' log_var_sorted <- sort_clusters(log_var) +#' ## A color does get assigned to 'NA', but will be overwritten by +#' ## 'na_color' passed to `vis_clus_p()` and related functions. +#' get_colors(colors = NULL, clusters = log_var_sorted) get_colors <- function(colors = NULL, clusters) { n_clus <- length(unique(clusters)) @@ -67,7 +79,12 @@ get_colors <- function(colors = NULL, clusters) { "purple" ) } - names(colors) <- seq_len(length(colors)) + ## Subset to the actual number of values if we are working with < 12 + colors <- colors[seq_len(n_clus)] + + ## Set the names of the colors in a way compatible with how names + ## are set in vis_clus_p(). + names(colors) <- levels(factor(clusters)) } else if (all(unique(as.character(clusters)) %in% c(gsub("ayer", "", names(colors)), NA))) { names(colors) <- gsub("ayer", "", names(colors)) } diff --git a/R/img_edit.R b/R/img_edit.R index 93aceee3..ec6fae90 100644 --- a/R/img_edit.R +++ b/R/img_edit.R @@ -58,25 +58,24 @@ #' plot(x) #' } img_edit <- - function( - spe, - sampleid, - image_id = "lowres", - channel = NA, - brightness = 100, - saturation = 100, - hue = 100, - enhance = FALSE, - contrast_sharpen = NA, - quantize_max = NA, - quantize_dither = TRUE, - equalize = FALSE, - normalize = FALSE, - transparent_color = NA, - transparent_fuzz = 0, - background_color = NA, - median_radius = NA, - negate = FALSE) { + function(spe, + sampleid, + image_id = "lowres", + channel = NA, + brightness = 100, + saturation = 100, + hue = 100, + enhance = FALSE, + contrast_sharpen = NA, + quantize_max = NA, + quantize_dither = TRUE, + equalize = FALSE, + normalize = FALSE, + transparent_color = NA, + transparent_fuzz = 0, + background_color = NA, + median_radius = NA, + negate = FALSE) { img <- magick::image_read(SpatialExperiment::imgRaster(spe, sample_id = sampleid, image_id = image_id)) diff --git a/R/img_update.R b/R/img_update.R index fdfe5b83..db6dfcb1 100644 --- a/R/img_update.R +++ b/R/img_update.R @@ -41,13 +41,12 @@ #' imgData(img_update(spe, sampleid = "151507", brightness = 25)) #' } img_update <- - function( - spe, - sampleid, - image_id = "lowres", - new_image_id = paste0("edited_", image_id), - overwrite = FALSE, - ...) { + function(spe, + sampleid, + image_id = "lowres", + new_image_id = paste0("edited_", image_id), + overwrite = FALSE, + ...) { img_data <- SpatialExperiment::imgData(spe) ## Skip this sample if there's no existing image to update diff --git a/R/img_update_all.R b/R/img_update_all.R index 314b9b0d..31c368c9 100644 --- a/R/img_update_all.R +++ b/R/img_update_all.R @@ -22,12 +22,11 @@ #' imgData(img_update_all(spe, brightness = 25)) #' } img_update_all <- - function( - spe, - image_id = "lowres", - new_image_id = paste0("edited_", image_id), - overwrite = FALSE, - ...) { + function(spe, + image_id = "lowres", + new_image_id = paste0("edited_", image_id), + overwrite = FALSE, + ...) { for (sampleid in unique(spe$sample_id)) { spe <- img_update( diff --git a/R/layer_boxplot.R b/R/layer_boxplot.R index 6a671a5e..345d5499 100644 --- a/R/layer_boxplot.R +++ b/R/layer_boxplot.R @@ -114,20 +114,19 @@ #' col_high_point = "firebrick4", #' cex = 3 #' ) -layer_boxplot <- function( - i = 1, - sig_genes = sig_genes_extract(), - short_title = TRUE, - sce_layer = fetch_data(type = "sce_layer"), - col_bkg_box = "grey80", - col_bkg_point = "grey40", - col_low_box = "violet", - col_low_point = "darkviolet", - col_high_box = "skyblue", - col_high_point = "dodgerblue4", - cex = 2, - group_var = "layer_guess_reordered_short", - assayname = "logcounts") { +layer_boxplot <- function(i = 1, + sig_genes = sig_genes_extract(), + short_title = TRUE, + sce_layer = fetch_data(type = "sce_layer"), + col_bkg_box = "grey80", + col_bkg_point = "grey40", + col_low_box = "violet", + col_low_point = "darkviolet", + col_high_box = "skyblue", + col_high_point = "dodgerblue4", + cex = 2, + group_var = "layer_guess_reordered_short", + assayname = "logcounts") { ## Extract the logcounts (default) mat <- assay(sce_layer, assayname) diff --git a/R/layer_matrix_plot.R b/R/layer_matrix_plot.R index 4a44fd18..f1530fb1 100644 --- a/R/layer_matrix_plot.R +++ b/R/layer_matrix_plot.R @@ -55,20 +55,19 @@ #' cex = 2 #' ) layer_matrix_plot <- - function( - matrix_values, - matrix_labels = NULL, - xlabs = NULL, - layerHeights = NULL, - mypal = c( - "white", - grDevices::colorRampPalette(RColorBrewer::brewer.pal(9, "YlOrRd"))(50) - ), - breaks = NULL, - axis.args = NULL, - srt = 45, - mar = c(8, 4 + (max(nchar(rownames(matrix_values))) %/% 3) * 0.5, 4, 2) + 0.1, - cex = 1.2) { + function(matrix_values, + matrix_labels = NULL, + xlabs = NULL, + layerHeights = NULL, + mypal = c( + "white", + grDevices::colorRampPalette(RColorBrewer::brewer.pal(9, "YlOrRd"))(50) + ), + breaks = NULL, + axis.args = NULL, + srt = 45, + mar = c(8, 4 + (max(nchar(rownames(matrix_values))) %/% 3) * 0.5, 4, 2) + 0.1, + cex = 1.2) { ## Create some default values in case the user didn't specify them if (is.null(xlabs)) { if (is.null(colnames(matrix_values))) { diff --git a/R/layer_stat_cor.R b/R/layer_stat_cor.R index f484c912..eb21228a 100644 --- a/R/layer_stat_cor.R +++ b/R/layer_stat_cor.R @@ -1,19 +1,24 @@ #' Layer modeling correlation of statistics #' -#' @param stats A data.frame where the row names are Ensembl gene IDs, the -#' column names are labels for clusters of cells or cell types, and where +#' @param stats A query `data.frame` where the row names are ENSEMBL gene IDs, +#' the column names are labels for clusters of cells or cell types, and where #' each cell contains the given statistic for that gene and cell type. These #' statistics should be computed similarly to the modeling results from #' the data we provide. For example, like the `enrichment` t-statistics that #' are derived from comparing one layer against the rest. The `stats` will be -#' matched and then correlated with our statistics. +#' matched and then correlated with the reference statistics. +#' +#' If using the output of `registration_wrapper()` then use `$enrichment` to +#' access the results from `registration_stats_enrichment()`. This function will +#' automatically extract the statistics and assign the ENSEMBL gene IDs to the +#' row names of the query matrix. #' @inheritParams sig_genes_extract #' @param top_n An `integer(1)` specifying whether to filter to the top n marker #' genes. The default is `NULL` in which case no filtering is done. #' -#' @return A correlation matrix between `stats` and our statistics using only -#' the Ensembl gene IDs present in both tables. The columns are sorted using -#' a hierarchical cluster. +#' @return A correlation matrix between the query `stats` and the reference +#' statistics using only the ENSEMBL gene IDs present in both tables. +#' The columns are sorted using hierarchical clustering. #' #' @export #' @importFrom stats cor dist hclust @@ -49,12 +54,11 @@ #' top_n = 10 #' )) layer_stat_cor <- - function( - stats, - modeling_results = fetch_data(type = "modeling_results"), - model_type = names(modeling_results)[1], - reverse = FALSE, - top_n = NULL) { + function(stats, + modeling_results = fetch_data(type = "modeling_results"), + model_type = names(modeling_results)[1], + reverse = FALSE, + top_n = NULL) { model_results <- modeling_results[[model_type]] tstats <- @@ -62,6 +66,19 @@ layer_stat_cor <- colnames(tstats) <- gsub("[f|t]_stat_", "", colnames(tstats)) + ## Use the 'ensembl' column for gene names if present in 'stats' + if ("ensembl" %in% colnames(stats)) { + rownames(stats) <- stats$ensembl + } + + ## Do the same for stats + if (any(grepl("[f|t]_stat_", colnames(stats)))) { + stats <- + stats[, grep("[f|t]_stat_", colnames(stats))] + colnames(stats) <- + gsub("[f|t]_stat_", "", colnames(stats)) + } + if (reverse) { tstats <- tstats * -1 colnames(tstats) <- diff --git a/R/layer_stat_cor_plot.R b/R/layer_stat_cor_plot.R index 0d2653b8..edefd095 100644 --- a/R/layer_stat_cor_plot.R +++ b/R/layer_stat_cor_plot.R @@ -1,126 +1,196 @@ -#' Visualize the layer modeling correlation of statistics +#' Visualize the correlation of layer modeling t-statistics with ComplexHeatmap #' -#' This function makes a heatmap from the [layer_stat_cor()] correlation matrix -#' between a given set of cell cluster/type statistics derived from scRNA-seq -#' or snRNA-seq data (among other types) and the layer statistics from the -#' Human DLPFC Visium data (when using the default arguments). +#' This function makes a ComplexHeatmap from the correlation matrix +#' between a reference and query modeling statistics from [layer_stat_cor()]. +#' For example, between the query statistics from a set of cell cluster/types +#' derived from scRNA-seq or snRNA-seq data (among other types) and the +#' reference layer statistics from the Human DLPFC Visium data (when using the +#' default arguments). +#' +#' Includes functionality to add color annotations, +#' (helpful to match to colors in Visium spot plots), and annotations from +#' [annotate_registered_clusters()]. #' #' @param cor_stats_layer The output of [layer_stat_cor()]. -#' @param max A `numeric(1)` specifying the highest correlation value for the -#' color scale (should be between 0 and 1). -#' @param min A `numeric(1)` specifying the lowest correlation value for the -#' color scale (should be between 0 and -1). -#' @param layerHeights A `numeric()` vector of length equal to -#' `ncol(cor_stats_layer) + 1` that starts at 0 specifying where -#' to plot the y-axis breaks which can be used for re-creating the length of -#' each brain layer. Gets passed to [layer_matrix_plot()]. -#' @param cex Passed to [layer_matrix_plot()]. -#' -#' @return A heatmap for the correlation matrix between statistics. +#' @param color_max A `numeric(1)` specifying the highest correlation value for +#' the color scale (should be between 0 and 1). +#' @param color_min A `numeric(1)` specifying the lowest correlation value for +#' the color scale (should be between 0 and -1). +#' @param color_scale A `character(3)` vector specifying the color scale for the +#' fill of the heatmap. The first value is used for `color_min`, the second one +#' for zero, and the third for `color_max`. +#' @param query_colors named `character` vector of colors, Adds colors to query +#' row annotations. +#' @param reference_colors named `character` vector of colors, Adds colors to +#' reference column annotations. +#' @param annotation annotation data.frame output of +#' [annotate_registered_clusters()], adds 'X' for good confidence annotations, +#' '*' for poor confidence. +#' @param ... Additional parameters passed to +#' [ComplexHeatmap::Heatmap()][ComplexHeatmap::Heatmap()] such as `cluster_rows` +#' and `cluster_columns`. +#' +#' +#' @return ([Heatmap-class][ComplexHeatmap::Heatmap-class]) plot of t-stat +#' correlations #' @export -#' @author Andrew E Jaffe, Leonardo Collado-Torres +#' @author Louise Huuki-Myers #' @family Layer correlation functions -#' @seealso layer_matrix_plot annotate_registered_clusters #' -#' @importFrom RColorBrewer brewer.pal -#' @importFrom grDevices colorRampPalette -#' @details Check -#' https://github.com/LieberInstitute/HumanPilot/blob/master/Analysis/Layer_Guesses/dlpfc_snRNAseq_annotation.R -#' for a full analysis from which this family of functions is derived from. +#' @importFrom circlize colorRamp2 +#' @importFrom ComplexHeatmap columnAnnotation rowAnnotation Heatmap #' #' @examples -#' #' ## Obtain the necessary data +#' ## reference human pilot modeling results #' if (!exists("modeling_results")) { #' modeling_results <- fetch_data(type = "modeling_results") #' } #' +#' ## query spatialDLPFC modeling results +#' query_modeling_results <- fetch_data( +#' type = "spatialDLPFC_Visium_modeling_results" +#' ) +#' #' ## Compute the correlations #' cor_stats_layer <- layer_stat_cor( -#' tstats_Human_DLPFC_snRNAseq_Nguyen_topLayer, +#' stats = query_modeling_results$enrichment, #' modeling_results, #' model_type = "enrichment" #' ) #' #' ## Visualize the correlation matrix -#' layer_stat_cor_plot(cor_stats_layer, max = max(cor_stats_layer)) #' -#' ## Annotate then re-plot -#' rownames(cor_stats_layer) <- paste0( -#' rownames(cor_stats_layer), -#' " - ", -#' annotate_registered_clusters(cor_stats_layer)$layer_label +#' ## Default plot with no annotations and defaults for ComplexHeatmap() +#' layer_stat_cor_plot(cor_stats_layer) +#' +#' ## add colors +#' ## add libd_layer_colors to reference Human Pilot layers +#' layer_stat_cor_plot(cor_stats_layer, reference_colors = libd_layer_colors) +#' +#' ## obtain colors for the query clusters +#' cluster_colors <- get_colors(clusters = rownames(cor_stats_layer)) +#' layer_stat_cor_plot(cor_stats_layer, +#' query_colors = cluster_colors, +#' reference_colors = libd_layer_colors #' ) -#' layer_stat_cor_plot(cor_stats_layer, max = max(cor_stats_layer)) #' -#' ## Restrict the range of colors further -#' layer_stat_cor_plot(cor_stats_layer, max = 0.25) +#' ## Apply additional ComplexHeatmap param +#' layer_stat_cor_plot(cor_stats_layer, +#' cluster_rows = FALSE, +#' cluster_columns = FALSE +#' ) #' -#' ## Repeat with just the top 10 layer marker genes -#' layer_stat_cor_plot(layer_stat_cor( -#' tstats_Human_DLPFC_snRNAseq_Nguyen_topLayer, -#' modeling_results, -#' model_type = "enrichment", -#' top_n = 10 -#' ), max = 0.25) +#' ## Add annotation +#' annotation_df <- annotate_registered_clusters( +#' cor_stats_layer, +#' confidence_threshold = .55 +#' ) +#' layer_stat_cor_plot(cor_stats_layer, annotation = annotation_df) #' -#' ## Now with the "pairwise" modeling results and also top_n = 10 -#' layer_stat_cor_plot(layer_stat_cor( -#' tstats_Human_DLPFC_snRNAseq_Nguyen_topLayer, -#' modeling_results, -#' model_type = "pairwise", -#' top_n = 10 -#' ), max = 0.25) -layer_stat_cor_plot <- - function( +#' ## All together +#' layer_stat_cor_plot( +#' cor_stats_layer, +#' query_colors = cluster_colors, +#' reference_colors = libd_layer_colors, +#' annotation = annotation_df, +#' cluster_rows = FALSE, +#' cluster_columns = FALSE +#' ) +#' +layer_stat_cor_plot <- function( cor_stats_layer, - max = 0.81, - min = -max, - layerHeights = NULL, - cex = 1.2) { - ## From https://github.com/LieberInstitute/HumanPilot/blob/master/Analysis/Layer_Guesses/dlpfc_snRNAseq_annotation.R - theSeq <- seq(min, max, by = 0.01) - my.col <- grDevices::colorRampPalette(RColorBrewer::brewer.pal(7, "PRGn"))(length(theSeq)) - - ## Subset values - cor_stats_layer[cor_stats_layer <= min] <- min - cor_stats_layer[cor_stats_layer >= max] <- max + color_max = max(cor_stats_layer), + color_min = min(cor_stats_layer), + color_scale = c("#762A83", "#F7F7F7", "#1B7837"), + query_colors = NULL, + reference_colors = NULL, + annotation = NULL, + ...) { + ## define color pallet + stopifnot(color_min < color_max) + stopifnot(color_min < 0) + stopifnot(length(color_scale) == 3) + my.col <- circlize::colorRamp2(c(color_min, 0, color_max), color_scale) - ## Re-shape the matrix - mat_vals <- t(cor_stats_layer) + # ## query annotations on row + if (!is.null(query_colors)) { + stopifnot(all(rownames(cor_stats_layer) %in% names(query_colors))) + query_colors <- query_colors[rownames(cor_stats_layer)] - ## Re-order and shorten names if they match our data - if (all(rownames(mat_vals) %in% c("WM", paste0("Layer", seq_len(6))))) { - rownames(mat_vals) <- gsub("ayer", "", rownames(mat_vals)) - mat_vals <- mat_vals[c("WM", paste0("L", rev(seq_len(6)))), , drop = FALSE] - ## Use our default layer heights also - if (is.null(layerHeights)) { - layerHeights <- c(0, 40, 55, 75, 85, 110, 120, 135) - } - } + query_row_annotation <- ComplexHeatmap::rowAnnotation( + " " = rownames(cor_stats_layer), + col = list(" " = query_colors), + show_legend = FALSE + ) + } else { + query_row_annotation <- NULL + } - ## From fields:::imagePlotInfo - midpoints <- seq(min, max, length.out = length(my.col)) - delta <- (midpoints[2] - midpoints[1]) / 2 - breaks <- c(midpoints[1] - delta, midpoints + delta) + ## reference annotation on bottom + if (!is.null(reference_colors)) { + stopifnot(all(colnames(cor_stats_layer) %in% names(reference_colors))) + reference_colors <- reference_colors[colnames(cor_stats_layer)] - legend_cuts <- seq(-1, 1, by = 0.1) - legend_cuts <- legend_cuts[legend_cuts >= min & legend_cuts <= max] - axis.args <- list( - at = legend_cuts, - labels = legend_cuts + ref_col_annotation <- ComplexHeatmap::columnAnnotation( + " " = colnames(cor_stats_layer), + col = list(" " = reference_colors), + show_legend = FALSE ) + } else { + ref_col_annotation <- NULL + } + + ## add annotation + if (!is.null(annotation)) { + anno_matrix <- create_annotation_matrix(annotation, cor_stats_layer) - layer_matrix_plot( - matrix_values = mat_vals, - matrix_labels = NULL, - xlabs = NULL, - layerHeights = layerHeights, - mypal = my.col, - breaks = breaks, - axis.args = axis.args, - srt = 90, - cex = cex + ## plot heatmap + return( + ComplexHeatmap::Heatmap( + matrix = cor_stats_layer, + col = my.col, + name = "Cor", + bottom_annotation = ref_col_annotation, + right_annotation = query_row_annotation, + cell_fun = function(j, i, x, y, width, height, fill) { + grid.text(anno_matrix[i, j], x, y, gp = gpar(fontsize = 10)) + }, + ... + ) ) } + + ## plot heatmap + return( + ComplexHeatmap::Heatmap( + matrix = cor_stats_layer, + col = my.col, + name = "Cor", + bottom_annotation = ref_col_annotation, + right_annotation = query_row_annotation, + ... + ) + ) +} + +create_annotation_matrix <- function(annotation_df, cor_stats_layer) { + anno_list <- lapply( + rownames(cor_stats_layer), + function(cluster) { + # look up confidence + confidence <- annotation_df[match(cluster, annotation_df$cluster), "layer_confidence"] + sym <- ifelse(confidence == "good", "X", "*") + # match annotations + anno <- annotation_df[match(cluster, annotation_df$cluster), "layer_label"] + return(ifelse(unlist(lapply(colnames(cor_stats_layer), grepl, anno)), sym, "")) + } + ) + + anno_matrix <- t(data.frame(anno_list)) + rownames(anno_matrix) <- rownames(cor_stats_layer) + colnames(anno_matrix) <- colnames(cor_stats_layer) + + return(anno_matrix) +} diff --git a/R/multi_gene_pca.R b/R/multi_gene_pca.R new file mode 100644 index 00000000..c3ec9a12 --- /dev/null +++ b/R/multi_gene_pca.R @@ -0,0 +1,61 @@ +#' Combine multiple continuous variables through PCA +#' +#' PCA is performed on \code{cont_mat}, the matrix of multiple continuous +#' features. The first PC is returned, representing the dominant spatial +#' signature of the feature set. Its direction is negated if necessary so that +#' the majority of coefficients across features are positive (when the features +#' are highly correlated, this encourages spots with higher values to +#' represent areas of higher expression of the features). +#' +#' @param cont_mat A \code{matrix()} with spots as rows and 2 or more continuous +#' variables as columns. +#' +#' @return A \code{numeric()} vector with one element per spot, summarizing the +#' multiple continuous variables. +#' +#' @author Nicholas J. Eagles +#' @importFrom stats prcomp +#' @family functions for summarizing expression of multiple continuous variables simultaneously +#' @keywords internal +multi_gene_pca <- function(cont_mat) { + # PCA calculation requires at least 2 features with no NAs and nonzero + # variance. Verify this and drop any bad features + good_indices <- which( + (colSums(is.na(cont_mat)) == 0) & + (colSds(cont_mat) != 0) + ) + if (length(good_indices) < 2) { + stop("After dropping features with NAs or no expression variation, less than 2 features were left. This error can occur when using data from only 1 spot.", call. = FALSE) + } + if (ncol(cont_mat) - length(good_indices) > 0) { + warning( + sprintf( + "Dropping features(s) '%s' which have NAs or no expression variation", + paste(colnames(cont_mat)[-good_indices], collapse = "', '") + ), + call. = FALSE + ) + } + cont_mat <- cont_mat[, good_indices] + + pc_exp <- stats::prcomp(cont_mat, center = TRUE, scale = TRUE) + pc_vec <- pc_exp$x[, "PC1"] + + # Reverse the direction of PC1 if needed to improve visual interpretation + # + # Often, this function will be called with multiple genes as continuous + # variables, and in particular for genes with similar spatial patterns of + # expression. In this case, it's likely that each gene's coefficients to + # the first PC should tend to have the same sign. Next, the sign of each + # PC is arbitary, and we'd like plots to have positive values where + # expression is greater. If most genes have negative coefficients to the + # first PC, we reverse the sign of the coefficients to make visual + # intrepretation consistent. Note this step is neither beneficial nor + # harmful in other cases, where continuous features are not expected to be + # positively correlated + if (mean(pc_exp$rotation[, 1] > 0) < 0.5) { + pc_vec <- -1 * pc_vec + } + + return(pc_vec) +} diff --git a/R/multi_gene_sparsity.R b/R/multi_gene_sparsity.R new file mode 100644 index 00000000..4f53290b --- /dev/null +++ b/R/multi_gene_sparsity.R @@ -0,0 +1,18 @@ +#' Combine multiple continuous variables by proportion of positive values +#' +#' To summarize multiple features, the proportion of features with positive +#' values for each spot is computed. +#' +#' @param cont_mat A \code{matrix()} with spots as rows and 2 or more continuous +#' variables as columns. +#' +#' @return A \code{numeric()} vector with one element per spot, summarizing the +#' multiple continuous variables. +#' +#' @author Nicholas J. Eagles +#' @import MatrixGenerics +#' @family functions for summarizing expression of multiple continuous variables simultaneously +#' @keywords internal +multi_gene_sparsity <- function(cont_mat) { + return(rowMeans(cont_mat > 0, na.rm = TRUE)) +} diff --git a/R/multi_gene_z_score.R b/R/multi_gene_z_score.R new file mode 100644 index 00000000..abfdfff2 --- /dev/null +++ b/R/multi_gene_z_score.R @@ -0,0 +1,40 @@ +#' Combine multiple continuous variables by averaging Z scores +#' +#' To summarize multiple features, each is normalized to represent a Z-score. +#' Scores are averaged to return a single vector. +#' +#' @param cont_mat A \code{matrix()} with spots as rows and 2 or more continuous +#' variables as columns. +#' +#' @return A \code{numeric()} vector with one element per spot, summarizing the +#' multiple continuous variables. +#' +#' @author Nicholas J. Eagles +#' @import MatrixGenerics +#' @family functions for summarizing expression of multiple continuous variables simultaneously +#' @keywords internal +multi_gene_z_score <- function(cont_mat) { + # Z-score calculation requires at least 1 feature with nonzero variance. + # Verify this and drop any zero-variance features + good_indices <- which(colSds(cont_mat, na.rm = TRUE) != 0) + if (length(good_indices) < 1) { + stop("After dropping features with no expression variation, no features were left. This error can occur when using data from only 1 spot.", call. = FALSE) + } + if (ncol(cont_mat) - length(good_indices) > 0) { + warning( + sprintf( + "Dropping features(s) '%s' which have no expression variation", + paste(colnames(cont_mat)[-good_indices], collapse = "', '") + ), + call. = FALSE + ) + } + cont_mat <- cont_mat[, good_indices, drop = FALSE] + + # For each spot, average Z-scores across all features + cont_z <- (cont_mat - colMeans(cont_mat, na.rm = TRUE)) / + colSds(cont_mat, na.rm = TRUE) + z_vec <- rowMeans(cont_z, na.rm = TRUE) + + return(z_vec) +} diff --git a/R/prep_stitched_data.R b/R/prep_stitched_data.R new file mode 100644 index 00000000..1485d924 --- /dev/null +++ b/R/prep_stitched_data.R @@ -0,0 +1,74 @@ +#' Prepare stitched data for plotting +#' +#' Given a \code{SpatialExperiment} built with \code{visiumStitched::build_spe()} +#' , drop +#' excluded spots (specified by \code{spe$exclude_overlapping}) and compute an +#' appropriate spot size for plotting with \code{vis_gene()} or +#' \code{vis_clus()}, assuming the plot will be written to a PDF of default +#' dimensions (i.e. \code{width = 7} and \code{height = 7}). +#' +#' @param spe A \code{SpatialExperiment} built with +#' \code{visiumStitched::build_spe()}, containing a logical +#' \code{spe$exclude_overlapping} column specifying which spots to display in +#' plots +#' @inheritParams vis_clus +#' +#' @return A list with names \code{spe} and \code{point_size} containing a +#' filtered, ready-to-plot \code{SpatialExperiment} and an appropriate spot size +#' (passed to \code{vis_gene()} or \code{vis_clus()}), respectively +#' +#' @author Nicholas J. Eagles +#' @keywords internal +prep_stitched_data <- function(spe, point_size, image_id) { + # State assumptions about columns expected to be in the colData + expected_cols <- c("array_row", "array_col", "exclude_overlapping") + if (!all(expected_cols %in% colnames(colData(spe)))) { + stop( + sprintf( + 'Missing at least one of the following colData columns: "%s"', + paste(expected_cols, collapse = '", "') + ), + call. = FALSE + ) + } + + if (any(is.na(spe$exclude_overlapping))) { + stop("spe$exclude_overlapping must not have NAs", call. = FALSE) + } + + # Drop excluded spots; verify some spots are not excluded + subset_cols <- !spe$exclude_overlapping + if (length(which(subset_cols)) == 0) { + stop( + "spe$exclude_overlapping must include some FALSE values to plot", + call. = FALSE + ) + } + spe <- spe[, subset_cols] + + # Compute an appropriate spot size for this sample + + # Determine some pixel values for the horizontal bounds of the spots + MIN_COL <- min(spatialCoords(spe)[, "pxl_row_in_fullres"]) + MAX_COL <- max(spatialCoords(spe)[, "pxl_row_in_fullres"]) + + # The distance between spots (in pixels) is double the average distance + # between array columns + INTER_SPOT_DIST_PX <- 2 * (MAX_COL - MIN_COL) / + (max(spe$array_col) - min(spe$array_col)) + + # Find the appropriate spot size for this donor. This can vary because + # ggplot downscales a plot to fit desired output dimensions (in this + # case presumably a square region on a PDF), and stitched images can vary + # in aspect ratio. Also, lowres images always have a larger image + # dimension of 1200, no matter how many spots fit in either dimension. + small_image_data <- imgData(spe)[ + imgData(spe)$image_id == image_id, + ] + + # The coefficient of 100 was determined empirically + point_size <- point_size * 100 * INTER_SPOT_DIST_PX * + small_image_data$scaleFactor / max(dim(small_image_data$data[[1]])) + + return(list(spe = spe, point_size = point_size)) +} diff --git a/R/read10xVisiumAnalysis.R b/R/read10xVisiumAnalysis.R index 7864f0b6..ef76f1d0 100644 --- a/R/read10xVisiumAnalysis.R +++ b/R/read10xVisiumAnalysis.R @@ -40,58 +40,91 @@ read10xVisiumAnalysis <- function( } names(samples) <- sids - dir <- file.path(samples, "analysis") + analysis_options <- c("analysis", "analysis_csv") + dir <- + file.path(rep(samples, each = length(analysis_options)), analysis_options) + dir <- dir[file.exists(dir)] + stopifnot(length(dir) == length(samples)) - current_dir <- dir[1] - current_sample <- sids[1] + # current_dir <- dir[1] + # current_sample <- sids[1] - clusters_all <- do.call(rbind, mapply(function(current_dir, current_sample) { - clustering_files <- - list.files( - current_dir, - pattern = "clusters.csv", - all.files = TRUE, - full.names = TRUE, - recursive = TRUE + clusters_all <- + do.call( + rbind, + mapply( + function(current_dir, current_sample) { + clustering_files <- + list.files( + current_dir, + pattern = "clusters.csv", + all.files = TRUE, + full.names = TRUE, + recursive = TRUE + ) + + clusters_list <- lapply(clustering_files, read_barcoded_csv) + clusters <- + Reduce( + function(...) { + merge(..., by = "barcode", all = TRUE) + }, + clusters_list + ) + clusters$sample_id <- current_sample + return(clusters) + }, + dir, + sids, + SIMPLIFY = FALSE, + USE.NAMES = FALSE ) + ) - clusters_list <- lapply(clustering_files, read_barcoded_csv) - clusters <- Reduce(function(...) merge(..., by = "barcode", all = TRUE), clusters_list) - clusters$sample_id <- current_sample - return(clusters) - }, dir, sids, SIMPLIFY = FALSE, USE.NAMES = FALSE)) + projection_all <- mapply( + function(current_dir, current_sample) { + projection_files <- + list.files( + current_dir, + pattern = "projection.csv", + all.files = TRUE, + full.names = TRUE, + recursive = TRUE + ) - projection_all <- mapply(function(current_dir, current_sample) { - projection_files <- - list.files( - current_dir, - pattern = "projection.csv", - all.files = TRUE, - full.names = TRUE, - recursive = TRUE - ) + projection_list <- lapply(projection_files, function(x) { + res <- read_barcoded_csv(x) + res$sample_id <- current_sample + return(res) + }) + names(projection_list) <- + paste0("10x_", basename(dirname(dirname( + projection_files + )))) - projection_list <- lapply(projection_files, function(x) { - res <- read_barcoded_csv(x) - res$sample_id <- current_sample - return(res) - }) - names(projection_list) <- paste0("10x_", basename(dirname(dirname(projection_files)))) + return(projection_list) + }, + dir, + sids, + SIMPLIFY = FALSE, + USE.NAMES = FALSE + ) - return(projection_list) - }, dir, sids, SIMPLIFY = FALSE, USE.NAMES = FALSE) - - projection_names <- unique(unlist(lapply(projection_all, names))) - projections_combined <- lapply(projection_names, function(projection_name) { - one_projection_list <- lapply(projection_all, "[[", projection_name) - do.call(rbind, one_projection_list) - }) + projection_names <- + unique(unlist(lapply(projection_all, names))) + projections_combined <- + lapply(projection_names, function(projection_name) { + one_projection_list <- lapply(projection_all, "[[", projection_name) + do.call(rbind, one_projection_list) + }) names(projections_combined) <- projection_names - cluster_cols <- which(!colnames(clusters_all) %in% c("barcode", "sample_id")) - colnames(clusters_all)[cluster_cols] <- paste0("10x_", colnames(clusters_all)[cluster_cols]) + cluster_cols <- + which(!colnames(clusters_all) %in% c("barcode", "sample_id")) + colnames(clusters_all)[cluster_cols] <- + paste0("10x_", colnames(clusters_all)[cluster_cols]) return(list(clusters = clusters_all, projections = projections_combined)) } @@ -102,7 +135,8 @@ read_barcoded_csv <- function(x) { colnames(df) <- tolower(colnames(df)) if (colnames(df)[2] == "cluster") { - colnames(df)[2] <- basename(dirname(x)) + colnames(df)[2] <- + gsub("gene_expression_", "", basename(dirname(x))) } return(df) } diff --git a/R/read10xVisiumWrapper.R b/R/read10xVisiumWrapper.R index 7be0b325..8c64f060 100644 --- a/R/read10xVisiumWrapper.R +++ b/R/read10xVisiumWrapper.R @@ -44,27 +44,44 @@ #' #' ## Note that ?SpatialExperiment::read10xVisium doesn't include all the files #' ## we need to illustrate read10xVisiumWrapper(). -read10xVisiumWrapper <- function( - samples = "", - sample_id = paste0("sample", sprintf("%02d", seq_along(samples))), - type = c("HDF5", "sparse"), - data = c("filtered", "raw"), - images = c("lowres", "hires", "detected", "aligned"), - load = TRUE, - reference_gtf = NULL, - chrM = "chrM", - gtf_cols = c("source", "type", "gene_id", "gene_version", "gene_name", "gene_type"), - verbose = TRUE) { +read10xVisiumWrapper <- function(samples = "", + sample_id = paste0("sample", sprintf("%02d", seq_along(samples))), + type = c("HDF5", "sparse"), + data = c("filtered", "raw"), + images = c("lowres", "hires", "detected", "aligned"), + load = TRUE, + reference_gtf = NULL, + chrM = "chrM", + gtf_cols = c("source", "type", "gene_id", "gene_version", "gene_name", "gene_type"), + verbose = TRUE) { stopifnot(all(c("gene_name", "gene_id") %in% gtf_cols)) if (missing(reference_gtf)) { summary_file <- file.path(samples[1], "web_summary.html") web <- readLines(summary_file) + + # For spaceranger versions before 3.0 reference_path <- gsub('.*"', "", regmatches(web, regexpr('\\["Reference Path", *"[/|A-z|0-9|-]+', web))) - reference_gtf <- file.path(reference_path, "genes", "genes.gtf") + + # For recent spaceranger versions (3.0.0+?) + if (length(reference_path) == 0) { + reference_path <- sub( + ".*--transcriptome=(\\S*).*", + "\\1", + web[grep("--transcriptome=", web)] + ) + } + reference_gtf <- list.files( + file.path(reference_path, "genes"), "^genes\\.gtf(\\.gz)?$", + full.names = TRUE + ) + } + reference_gtf <- reference_gtf[file.exists(reference_gtf)] + if (length(reference_gtf) > 1) { + stop("More than one 'reference_gtf' was provided or detected. Manually specify the path to just one 'reference_gtf'. If different GTF files were used, then different genes will have been quantified and thus cannot be merged naively into a single SpatialExperiment object. If that's the case, we recommend you build separate SPE objects based on the different 'reference_gtf' files used.", call. = FALSE) + } else if (length(reference_gtf) == 0) { + stop("No 'reference_gtf' files were detected. Please check that the files are available.", call. = FALSE) } - stopifnot(length(reference_gtf) == 1) - stopifnot(file.exists(reference_gtf)) if (verbose) message(Sys.time(), " SpatialExperiment::read10xVisium: reading basic data from SpaceRanger") spe <- SpatialExperiment::read10xVisium( diff --git a/R/registration_model.R b/R/registration_model.R index 6921dbb9..3f36260c 100644 --- a/R/registration_model.R +++ b/R/registration_model.R @@ -24,10 +24,9 @@ #' head(registration_mod) #' registration_model <- - function( - sce_pseudo, - covars = NULL, - var_registration = "registration_variable") { + function(sce_pseudo, + covars = NULL, + var_registration = "registration_variable") { ## Specify a formula without an intercept if (is.null(covars)) { mat_formula <- diff --git a/R/registration_pseudobulk.R b/R/registration_pseudobulk.R index 1e115847..70c78d7a 100644 --- a/R/registration_pseudobulk.R +++ b/R/registration_pseudobulk.R @@ -8,7 +8,9 @@ #' object or one that inherits its properties. #' @param var_registration A `character(1)` specifying the `colData(sce)` #' variable of interest against which will be used for computing the relevant -#' statistics. +#' statistics. This should be a categorical variable, with all categories +#' syntaticly valid (could be used as an R variable, no special characters or +#' leading numbers), ex. 'L1.2', 'celltype2' not 'L1/2' or '2'. #' @param var_sample_id A `character(1)` specifying the `colData(sce)` variable #' with the sample ID. #' @param covars A `character()` with names of sample-level covariates. @@ -51,13 +53,12 @@ #' sce_pseudo <- registration_pseudobulk(sce, "Cell_Cycle", "sample_id", c("age"), min_ncells = NULL) #' colData(sce_pseudo) registration_pseudobulk <- - function( - sce, - var_registration, - var_sample_id, - covars = NULL, - min_ncells = 10, - pseudobulk_rds_file = NULL) { + function(sce, + var_registration, + var_sample_id, + covars = NULL, + min_ncells = 10, + pseudobulk_rds_file = NULL) { ## Check that inputs are correct stopifnot(is(sce, "SingleCellExperiment")) stopifnot(var_registration %in% colnames(colData(sce))) @@ -71,15 +72,34 @@ registration_pseudobulk <- stopifnot(!var_sample_id %in% covars) stopifnot(var_registration != var_sample_id) - ## Check that the values in the registration variable are ok + ## Check that the values in the registration variable are numeric + if (is.numeric(sce[[var_registration]])) { + warning( + sprintf( + "var_registration \"%s\" is numeric, convering to categorical vector...", + var_registration + ), + call. = FALSE + ) + } + + ## check for Non-Syntactic variables - convert with make.names & warn uniq_var_regis <- unique(sce[[var_registration]]) - if (any(grepl("\\+|\\-", uniq_var_regis))) { - stop( - "Remove the + and - signs in colData(sce)[, '", - var_registration, - "'] to avoid downstream issues.", + syntatic <- grepl( + "^((([[:alpha:]]|[.][._[:alpha:]])[._[:alnum:]]*)|[.])$", + uniq_var_regis + ) + if (!all(syntatic)) { + warning( + sprintf( + "var_registration \"%s\" contains non-syntatic variables: %s\nconverting to %s", + var_registration, + paste(uniq_var_regis[!syntatic], collapse = ", "), + paste(make.names(uniq_var_regis[!syntatic]), collapse = ", ") + ), call. = FALSE ) + sce[[var_registration]] <- make.names(sce[[var_registration]]) } ## Pseudo-bulk for our current BayesSpace cluster results @@ -126,6 +146,12 @@ registration_pseudobulk <- sce_pseudo <- sce_pseudo[, sce_pseudo$ncells >= min_ncells] } + if (is.factor(sce_pseudo$registration_variable)) { + ## Drop unused var_registration levels if we had to drop some due + ## to min_ncells + sce_pseudo$registration_variable <- droplevels(sce_pseudo$registration_variable) + } + ## Drop lowly-expressed genes message(Sys.time(), " drop lowly expressed genes") keep_expr <- diff --git a/R/registration_stats_anova.R b/R/registration_stats_anova.R index d1d0b135..c899459d 100644 --- a/R/registration_stats_anova.R +++ b/R/registration_stats_anova.R @@ -50,15 +50,14 @@ #' results_anova_merged <- merge(results_anova, results_anova_nocovar) #' head(results_anova_merged) registration_stats_anova <- - function( - sce_pseudo, - block_cor, - covars = NULL, - var_registration = "registration_variable", - var_sample_id = "registration_sample_id", - gene_ensembl = NULL, - gene_name = NULL, - suffix = "") { + function(sce_pseudo, + block_cor, + covars = NULL, + var_registration = "registration_variable", + var_sample_id = "registration_sample_id", + gene_ensembl = NULL, + gene_name = NULL, + suffix = "") { if (is.null(covars)) { mat_formula <- eval(str2expression(paste("~", var_registration))) } else { diff --git a/R/registration_stats_enrichment.R b/R/registration_stats_enrichment.R index cd3ee182..d186547e 100644 --- a/R/registration_stats_enrichment.R +++ b/R/registration_stats_enrichment.R @@ -34,13 +34,14 @@ #' ) #' head(results_enrichment_nan) registration_stats_enrichment <- - function(sce_pseudo, - block_cor, - covars = NULL, - var_registration = "registration_variable", - var_sample_id = "registration_sample_id", - gene_ensembl = NULL, - gene_name = NULL) { + function( + sce_pseudo, + block_cor, + covars = NULL, + var_registration = "registration_variable", + var_sample_id = "registration_sample_id", + gene_ensembl = NULL, + gene_name = NULL) { ## For each cluster, test it against the rest cluster_idx <- split(seq(along = sce_pseudo[[var_registration]]), sce_pseudo[[var_registration]]) diff --git a/R/registration_stats_pairwise.R b/R/registration_stats_pairwise.R index 09bb3ff9..afb9771a 100644 --- a/R/registration_stats_pairwise.R +++ b/R/registration_stats_pairwise.R @@ -32,13 +32,14 @@ #' ) #' head(results_pairwise_nan) registration_stats_pairwise <- - function(sce_pseudo, - registration_model, - block_cor, - var_registration = "registration_variable", - var_sample_id = "registration_sample_id", - gene_ensembl = NULL, - gene_name = NULL) { + function( + sce_pseudo, + registration_model, + block_cor, + var_registration = "registration_variable", + var_sample_id = "registration_sample_id", + gene_ensembl = NULL, + gene_name = NULL) { ## Identify which are the pairwise columns of interest (aka, don't use ## the sample-level covariates we are adjusting for) and then ## shorten the names diff --git a/R/registration_wrapper.R b/R/registration_wrapper.R index 19cc3097..a5713544 100644 --- a/R/registration_wrapper.R +++ b/R/registration_wrapper.R @@ -47,8 +47,19 @@ #' ## Compute all modeling results #' example_modeling_results <- registration_wrapper( #' sce, -#' "Cell_Cycle", "sample_id", c("age"), "ensembl", "gene_name", "wrapper" +#' var_registration = "Cell_Cycle", +#' var_sample_id = "sample_id", +#' covars = c("age"), +#' gene_ensembl = "ensembl", +#' gene_name = "gene_name", +#' suffix = "wrapper" #' ) +#' +#' ## Explore the results from registration_wrapper() +#' class(example_modeling_results) +#' length(example_modeling_results) +#' names(example_modeling_results) +#' lapply(example_modeling_results, head) registration_wrapper <- function( sce, @@ -60,8 +71,13 @@ registration_wrapper <- suffix = "", min_ncells = 10, pseudobulk_rds_file = NULL) { + ## Change the rownames to ENSEMBL IDs + rownames(sce) <- rowData(sce)[, gene_ensembl] + + ## Pseudobulk sce_pseudo <- - registration_pseudobulk(sce, + registration_pseudobulk( + sce, var_registration = var_registration, var_sample_id = var_sample_id, min_ncells = min_ncells, @@ -74,6 +90,15 @@ registration_wrapper <- block_cor <- registration_block_cor(sce_pseudo, registration_model = registration_mod) + ## test if registration var has two groups + registration_var_k2 <- length(grep("^registration_variable", colnames(registration_mod))) == 2 + if (registration_var_k2) { + warning( + "You need 'var_registration' to have at least 3 unique values to compute an F-statistic and thus ANOVA modeling results cannot be computed.", + call. = FALSE + ) + } + results_enrichment <- registration_stats_enrichment( sce_pseudo, @@ -82,6 +107,7 @@ registration_wrapper <- gene_ensembl = gene_ensembl, gene_name = gene_name ) + results_pairwise <- registration_stats_pairwise( sce_pseudo, @@ -90,21 +116,27 @@ registration_wrapper <- gene_ensembl = gene_ensembl, gene_name = gene_name ) - results_anova <- - registration_stats_anova( - sce_pseudo, - block_cor = block_cor, - covars = covars, - gene_ensembl = gene_ensembl, - gene_name = gene_name, - suffix = suffix - ) + ## with more than 2 groups run ANOVA model + if (!registration_var_k2) { + results_anova <- + registration_stats_anova( + sce_pseudo, + block_cor = block_cor, + covars = covars, + gene_ensembl = gene_ensembl, + gene_name = gene_name, + suffix = suffix + ) + } else { + results_anova <- NULL + } + + ## Bundle results together modeling_results <- list( - "anova" = results_anova, + "anova" = NULL, "enrichment" = results_enrichment, "pairwise" = results_pairwise ) - return(modeling_results) } diff --git a/R/run_app.R b/R/run_app.R index 21fb0ebc..851045a3 100644 --- a/R/run_app.R +++ b/R/run_app.R @@ -36,6 +36,11 @@ #' @param default_cluster A `character(1)` with the name of the main cluster #' (discrete) variable to use. It will have to be present in both `colData(spe)` #' and `colData(sce_layer)`. +#' @param auto_crop_default A `logical(1)` specifying the default value for +#' automatically cropping the images. Set this to `FALSE` if your images do not +#' follow the Visium grid size expectations, which are key for enabling +#' auto-cropping. +#' @inheritParams vis_clus #' @param ... Other arguments passed to the list of golem options for running #' the application. #' @@ -181,6 +186,28 @@ #' ## * https://github.com/LieberInstitute/spatialDLPFC/tree/main/code/deploy_app_k09_position_noWM #' ## * https://github.com/LieberInstitute/spatialDLPFC/tree/main/code/deploy_app_k16 #' ## * https://github.com/LieberInstitute/spatialDLPFC/tree/main/code/analysis_IF/03_spatialLIBD_app +#' +#' +#' ## Example for an object with multiple capture areas stitched together with +#' ## . +#' spe_stitched <- fetch_data("Visium_LS_spe") +#' +#' ## Inspect this object +#' spe_stitched +#' +#' ## Notice the use of "exclude_overlapping" +#' table(spe_stitched$exclude_overlapping, useNA = "ifany") +#' +#' ## Run the app with this stitched data +#' run_app( +#' spe = spe_stitched, +#' sce_layer = NULL, modeling_results = NULL, sig_genes = NULL, +#' title = "visiumStitched example data", +#' spe_discrete_vars = c("capture_area", "scran_quick_cluster", "ManualAnnotation"), +#' spe_continuous_vars = c("sum_umi", "sum_gene", "expr_chrM", "expr_chrM_ratio"), +#' default_cluster = "scran_quick_cluster", +#' is_stitched = TRUE +#' ) #' } run_app <- function( spe = fetch_data(type = "spe"), @@ -229,10 +256,13 @@ run_app <- function( "expr_chrM_ratio" ), default_cluster = "spatialLIBD", + auto_crop_default = TRUE, + is_stitched = FALSE, ...) { ## Run the checks in the relevant order stopifnot(length(default_cluster) == 1) stopifnot(default_cluster %in% spe_discrete_vars) + if (is_stitched) auto_crop_default <- FALSE spe <- check_spe(spe, @@ -276,6 +306,8 @@ run_app <- function( spe_discrete_vars = spe_discrete_vars, spe_continuous_vars = spe_continuous_vars, default_cluster = default_cluster, + auto_crop_default = auto_crop_default, + is_stitched = is_stitched, ... ) ) diff --git a/R/sig_genes_extract.R b/R/sig_genes_extract.R index fac5f65b..26562a6b 100644 --- a/R/sig_genes_extract.R +++ b/R/sig_genes_extract.R @@ -12,7 +12,8 @@ #' columns `f_stat_*` or `t_stat_*` as well as `p_value_*` and `fdr_*` plus #' `ensembl`. The column name is used to extract the statistic results, the #' p-values, and the FDR adjusted p-values. Then the `ensembl` column is used -#' for matching in some cases. See [fetch_data()] for more details. +#' for matching in some cases. See [fetch_data()] for more details. Typically +#' this is the set of reference statistics used in `layer_stat_cor()`. #' @param model_type A named element of the `modeling_results` list. By default #' that is either `enrichment` for the model that tests one human brain layer #' against the rest (one group vs the rest), `pairwise` which compares two @@ -59,11 +60,12 @@ #' sce_layer = sce_layer, #' n = nrow(sce_layer) #' ) -sig_genes_extract <- function(n = 10, - modeling_results = fetch_data(type = "modeling_results"), - model_type = names(modeling_results)[1], - reverse = FALSE, - sce_layer = fetch_data(type = "sce_layer")) { +sig_genes_extract <- function( + n = 10, + modeling_results = fetch_data(type = "modeling_results"), + model_type = names(modeling_results)[1], + reverse = FALSE, + sce_layer = fetch_data(type = "sce_layer")) { model_results <- modeling_results[[model_type]] tstats <- diff --git a/R/sig_genes_extract_all.R b/R/sig_genes_extract_all.R index 0d68b880..d2c3c01f 100644 --- a/R/sig_genes_extract_all.R +++ b/R/sig_genes_extract_all.R @@ -27,9 +27,10 @@ #' modeling_results = modeling_results, #' sce_layer = sce_layer #' ) -sig_genes_extract_all <- function(n = 10, - modeling_results = fetch_data(type = "modeling_results"), - sce_layer = fetch_data(type = "sce_layer")) { +sig_genes_extract_all <- function( + n = 10, + modeling_results = fetch_data(type = "modeling_results"), + sce_layer = fetch_data(type = "sce_layer")) { ## Run checks since this function is run by default by run_app() ## before the checks have been run elsewhere sce_layer <- check_sce_layer(sce_layer) diff --git a/R/sort_clusters.R b/R/sort_clusters.R index 7d906690..f9554af9 100644 --- a/R/sort_clusters.R +++ b/R/sort_clusters.R @@ -1,15 +1,15 @@ #' Sort clusters by frequency #' -#' This function takes a vector with cluster labels and sorts it by frequency -#' such that the most frequent cluster is the first one and so on. +#' This function takes a vector with cluster labels, recasts it as a `factor()`, +#' and sorts the `factor()` levels by frequency such that the most frequent +#' cluster is the first level and so on. #' #' @param clusters A vector with cluster labels. #' @param map_subset A logical vector of length equal to `clusters` specifying #' which elements of `clusters` to use to determine the ranking of the clusters. #' -#' @return A factor of length equal to `clusters` where the levels are the new -#' ordered clusters and the names of the factor are the original values from -#' `clusters`. +#' @return A `factor()` version of `clusters` where the levels are ordered by +#' frequency. #' #' @export #' @@ -21,9 +21,50 @@ #' ## In this case, it's a character vector #' class(clus) #' -#' ## Sort them and obtain a factor +#' ## We see that we have 10 elements in this vector, which is +#' ## an unnamed character vector +#' clus +#' +#' ## letter 'd' is the most frequent +#' table(clus) +#' +#' ## Sort them and obtain a factor. Notice that it's a named +#' ## factor, and the names correspond to the original values +#' ## in the character vector. #' sort_clusters(clus) +#' +#' ## Since 'd' was the most frequent, it gets assigned to the first level +#' ## in the factor variable. +#' table(sort_clusters(clus)) +#' +#' ## If we skip the first 3 values of clus (which are all 'd'), we can +#' ## change the most frequent cluster. And thus the ordering of the +#' ## factor levels. +#' sort_clusters(clus, map_subset = seq_len(length(clus)) > 3) +#' +#' ## Let's try with a factor variable +#' clus_factor <- factor(clus) +#' ## sort_clusters() returns an identical result in this case +#' stopifnot(identical(sort_clusters(clus), sort_clusters(clus_factor))) +#' +#' ## What happens if you have a logical variable with NAs? +#' set.seed(20240712) +#' log_var <- sample(c(TRUE, FALSE, NA), +#' 1000, +#' replace = TRUE, +#' prob = c(0.3, 0.15, 0.55) +#' ) +#' ## Here, the NAs are the most frequent group. +#' table(log_var, useNA = "ifany") +#' +#' ## The NAs are not used for sorting. Since we have more 'TRUE' than 'FALSE' +#' ## then, 'TRUE' becomes the first level. +#' table(sort_clusters(log_var), useNA = "ifany") sort_clusters <- function(clusters, map_subset = NULL) { + if (is.logical(clusters)) { + clusters <- as.character(clusters) + } + if (is.null(map_subset)) { map_subset <- rep(TRUE, length(clusters)) } else { @@ -36,6 +77,5 @@ sort_clusters <- function(clusters, map_subset = NULL) { } map <- rank(length(clusters[map_subset]) - table(clusters[map_subset]), ties.method = "first") - res <- map[clusters] - factor(res) + factor(clusters, levels = names(sort(map))) } diff --git a/R/vis_clus.R b/R/vis_clus.R index 58ee8289..7286705e 100644 --- a/R/vis_clus.R +++ b/R/vis_clus.R @@ -4,7 +4,11 @@ #' using (by default) the histology information on the background. To visualize #' gene-level (or any continuous variable) use [vis_gene()]. #' -#' @inheritParams run_app +#' @param spe A +#' [SpatialExperiment-class][SpatialExperiment::SpatialExperiment-class] +#' object. See [fetch_data()] for how to download some example objects or +#' [read10xVisiumWrapper()] to read in `spaceranger --count` output files and +#' build your own `spe` object. #' @param sampleid A `character(1)` specifying which sample to plot from #' `colData(spe)$sample_id` (formerly `colData(spe)$sample_name`). #' @param clustervar A `character(1)` with the name of the `colData(spe)` @@ -24,6 +28,17 @@ #' @param auto_crop A `logical(1)` indicating whether to automatically crop #' the image / plotting area, which is useful if the Visium capture area is #' not centered on the image and if the image is not a square. +#' @param na_color A `character(1)` specifying a color for the NA values. +#' If you set `alpha = NA` then it's best to set `na_color` to a color that has +#' alpha blending already, which will make non-NA values pop up more and the NA +#' values will show with a lighter color. This behavior is lost when `alpha` is +#' set to a non-`NA` value. +#' @param is_stitched A \code{logical(1)} vector: If `TRUE`, expects a +#' [SpatialExperiment-class][SpatialExperiment::SpatialExperiment-class] built +#' with `visiumStitched::build_spe()`. +#' ; in +#' particular, expects a logical colData column `exclude_overlapping` +#' specifying which spots to exclude from the plot. Sets `auto_crop = FALSE`. #' @param ... Passed to [paste0()][base::paste] for making the title of the #' plot following the `sampleid`. #' @@ -74,6 +89,19 @@ #' spatial = FALSE #' ) #' print(p3) +#' +#' ## With some NA values +#' spe$tmp <- spe$layer_guess_reordered +#' spe$tmp[spe$sample_id == "151673"][seq_len(500)] <- NA +#' p4 <- vis_clus( +#' spe = spe, +#' clustervar = "tmp", +#' sampleid = "151673", +#' colors = libd_layer_colors, +#' na_color = "white", +#' ... = " LIBD Layers" +#' ) +#' print(p4) #' } vis_clus <- function( spe, @@ -98,8 +126,42 @@ vis_clus <- function( alpha = NA, point_size = 2, auto_crop = TRUE, + na_color = "#CCCCCC40", + is_stitched = FALSE, ...) { + # Verify existence and legitimacy of 'sampleid' + if ( + !("sample_id" %in% colnames(colData(spe))) || + !(sampleid %in% spe$sample_id) + ) { + stop( + paste( + "'spe$sample_id' must exist and contain the ID", sampleid + ), + call. = FALSE + ) + } + + # Check validity of spatial coordinates + if (!setequal(c("pxl_col_in_fullres", "pxl_row_in_fullres"), colnames(spatialCoords(spe)))) { + stop( + "Abnormal spatial coordinates: should have 'pxl_row_in_fullres' and 'pxl_col_in_fullres' columns.", + call. = FALSE + ) + } + spe_sub <- spe[, spe$sample_id == sampleid] + + if (is_stitched) { + # Drop excluded spots and calculate an appropriate point size + temp <- prep_stitched_data(spe_sub, point_size, image_id) + spe_sub <- temp$spe + point_size <- temp$point_size + + # Frame limits are poorly defined for stitched data + auto_crop <- FALSE + } + d <- as.data.frame(cbind(colData(spe_sub), SpatialExperiment::spatialCoords(spe_sub)), optional = TRUE) vis_clus_p( @@ -113,6 +175,7 @@ vis_clus <- function( image_id = image_id, alpha = alpha, point_size = point_size, - auto_crop = auto_crop + auto_crop = auto_crop, + na_color = na_color ) } diff --git a/R/vis_clus_p.R b/R/vis_clus_p.R index cd8b720a..b7a381d6 100644 --- a/R/vis_clus_p.R +++ b/R/vis_clus_p.R @@ -6,8 +6,8 @@ #' gene-level (or any continuous variable) use [vis_gene_p()]. #' #' @inheritParams vis_clus -#' @param d A data.frame with the sample-level information. This is typically -#' obtained using `cbind(colData(spe), spatialCoords(spe))`. +#' @param d A `data.frame()` with the sample-level information. This is +#' typically obtained using `cbind(colData(spe), spatialCoords(spe))`. #' @param title The title for the plot. #' #' @return A [ggplot2][ggplot2::ggplot] object. @@ -42,18 +42,18 @@ #' rm(spe_sub) #' } vis_clus_p <- - function( - spe, - d, - clustervar, - sampleid = unique(spe$sample_id)[1], - colors, - spatial, - title, - image_id = "lowres", - alpha = NA, - point_size = 2, - auto_crop = TRUE) { + function(spe, + d, + clustervar, + sampleid = unique(spe$sample_id)[1], + colors, + spatial, + title, + image_id = "lowres", + alpha = NA, + point_size = 2, + auto_crop = TRUE, + na_color = "#CCCCCC40") { ## Some variables pxl_row_in_fullres <- pxl_col_in_fullres <- key <- NULL # stopifnot(all(c("pxl_col_in_fullres", "pxl_row_in_fullres", "key") %in% colnames(d))) @@ -113,7 +113,7 @@ vis_clus_p <- alpha = alpha ) + coord_fixed(expand = FALSE) + - scale_fill_manual(values = colors) + + scale_fill_manual(values = colors, na.value = na_color) + xlim(0, ncol(img)) + ylim(nrow(img), 0) + xlab("") + ylab("") + diff --git a/R/vis_gene.R b/R/vis_gene.R index 708e4f53..15218962 100644 --- a/R/vis_gene.R +++ b/R/vis_gene.R @@ -6,10 +6,12 @@ #' To visualize clusters (or any discrete variable) use [vis_clus()]. #' #' @inheritParams vis_clus -#' @param geneid A `character(1)` specifying the gene ID stored in -#' `rowData(spe)$gene_search` or a continuous variable stored in `colData(spe)` -#' to visualize. If `rowData(spe)$gene_search` is missing, then `rownames(spe)` -#' is used to search for the gene ID. +#' @param geneid A `character()` specifying the gene ID(s) stored in +#' `rowData(spe)$gene_search` or a continuous variable(s) stored in +#' `colData(spe)` to visualize. For each ID, if `rowData(spe)$gene_search` is +#' missing, then `rownames(spe)` is used to search for the gene ID. When a +#' vector of length > 1 is supplied, the continuous variables are combined +#' according to `multi_gene_method`, producing a single value for each spot. #' @param assayname The name of the `assays(spe)` to use for extracting the #' gene expression data. Defaults to `logcounts`. #' @param minCount A `numeric(1)` specifying the minimum gene expression (or @@ -23,16 +25,23 @@ #' dependent on cell density. #' @param cont_colors A `character()` vector of colors that supersedes the #' `viridis` argument. -#' @param na_color A `character(1)` specifying a color for the NA values. -#' If you set `alpha = NA` then it's best to set `na_color` to a color that has -#' alpha blending already, which will make non-NA values pop up more and the NA -#' values will show with a lighter color. This behavior is lost when `alpha` is -#' set to a non-`NA` value. +#' @param multi_gene_method A `character(1)`: either `"pca"`, `"sparsity"`, or +#' `"z_score"`. This parameter controls how multiple continuous variables are +#' combined for visualization, and only applies when `geneid` has length +#' great than 1. `z_score`: to summarize multiple continuous variables, each is +#' normalized to represent a Z-score. The multiple scores are then averaged. +#' `pca`: PCA dimension reduction is conducted on the matrix formed by the +#' continuous variables, and the first PC is then used and multiplied by -1 if +#' needed to have the majority of the values for PC1 to be positive. `sparsity`: +#' the proportion of continuous variables with positive values for each spot is +#' computed. For more details, check the multi gene vignette at +#' . #' #' @return A [ggplot2][ggplot2::ggplot] object. #' @export #' @importFrom SummarizedExperiment assays #' @importFrom SpatialExperiment spatialCoords +#' @importFrom rlang arg_match #' @family Spatial gene visualization functions #' @details This function subsets `spe` to the given sample and prepares the #' data and title for [vis_gene_p()]. It also adds a caption to the plot. @@ -111,6 +120,42 @@ #' auto_crop = FALSE #' ) #' print(p5) +#' +#' # Define several markers for white matter +#' white_matter_genes <- c( +#' "ENSG00000197971", "ENSG00000131095", "ENSG00000123560", +#' "ENSG00000171885" +#' ) +#' +#' ## Plot all white matter markers at once using the Z-score combination +#' ## method +#' p6 <- vis_gene( +#' spe = spe, +#' sampleid = "151507", +#' geneid = white_matter_genes, +#' multi_gene_method = "z_score" +#' ) +#' print(p6) +#' +#' ## Plot all white matter markers at once using the sparsity combination +#' ## method +#' p7 <- vis_gene( +#' spe = spe, +#' sampleid = "151507", +#' geneid = white_matter_genes, +#' multi_gene_method = "sparsity" +#' ) +#' print(p7) +#' +#' ## Plot all white matter markers at once using the PCA combination +#' ## method +#' p8 <- vis_gene( +#' spe = spe, +#' sampleid = "151507", +#' geneid = white_matter_genes, +#' multi_gene_method = "pca" +#' ) +#' print(p8) #' } vis_gene <- function( @@ -127,31 +172,125 @@ vis_gene <- point_size = 2, auto_crop = TRUE, na_color = "#CCCCCC40", + multi_gene_method = c("z_score", "pca", "sparsity"), + is_stitched = FALSE, ...) { + multi_gene_method <- rlang::arg_match(multi_gene_method) + # Verify existence and legitimacy of 'sampleid' + if ( + !("sample_id" %in% colnames(colData(spe))) || + !(sampleid %in% spe$sample_id) + ) { + stop( + paste( + "'spe$sample_id' must exist and contain the ID", sampleid + ), + call. = FALSE + ) + } + + # Verify 'assayname' + if (!(assayname %in% names(assays(spe)))) { + stop(sprintf("'%s' is not an assay in 'spe'", assayname), call. = FALSE) + } + + # Check validity of spatial coordinates + if (!setequal(c("pxl_col_in_fullres", "pxl_row_in_fullres"), colnames(spatialCoords(spe)))) { + stop( + "Abnormal spatial coordinates: should have 'pxl_row_in_fullres' and 'pxl_col_in_fullres' columns.", + call. = FALSE + ) + } + spe_sub <- spe[, spe$sample_id == sampleid] + + if (is_stitched) { + # Drop excluded spots and calculate an appropriate point size + temp <- prep_stitched_data(spe_sub, point_size, image_id) + spe_sub <- temp$spe + point_size <- temp$point_size + + # Frame limits are poorly defined for stitched data + auto_crop <- FALSE + } + d <- as.data.frame(cbind(colData(spe_sub), SpatialExperiment::spatialCoords(spe_sub)), optional = TRUE) - if (geneid %in% colnames(colData(spe_sub))) { - d$COUNT <- colData(spe_sub)[[geneid]] - } else if (geneid %in% rowData(spe_sub)$gene_search) { - d$COUNT <- - assays(spe_sub)[[assayname]][which(rowData(spe_sub)$gene_search == geneid), ] - } else if (geneid %in% rownames(spe_sub)) { - d$COUNT <- assays(spe_sub)[[assayname]][which(rownames(spe_sub) == geneid), ] + # Verify legitimacy of names in geneid + geneid_is_valid <- (geneid %in% rowData(spe_sub)$gene_search) | + (geneid %in% rownames(spe_sub)) | + (geneid %in% colnames(colData(spe_sub))) + if (any(!geneid_is_valid)) { + stop( + "Could not find the 'geneid'(s) ", + paste(geneid[!geneid_is_valid], collapse = ", "), + call. = FALSE + ) + } + + # Grab any continuous colData columns and verify they're all numeric + cont_cols <- colData(spe_sub)[ + , geneid[geneid %in% colnames(colData(spe_sub))], + drop = FALSE + ] + if (!all(sapply(cont_cols, class) %in% c("numeric", "integer"))) { + stop( + "'geneid' can not contain non-numeric colData columns.", + call. = FALSE + ) + } + cont_cols <- as.matrix(cont_cols) + + # Get the integer indices of each gene in the SpatialExperiment, since we + # aren't guaranteed that rownames are gene names + remaining_geneid <- geneid[!(geneid %in% colnames(colData(spe_sub)))] + valid_gene_indices <- unique( + c( + match(remaining_geneid, rowData(spe_sub)$gene_search), + match(remaining_geneid, rownames(spe_sub)) + ) + ) + valid_gene_indices <- valid_gene_indices[!is.na(valid_gene_indices)] + + # Grab any genes + gene_cols <- t( + as.matrix(assays(spe_sub[valid_gene_indices, ])[[assayname]]) + ) + + # Combine into one matrix where rows are samples and columns are continuous + # features + cont_matrix <- cbind(cont_cols, gene_cols) + + # Determine plot and legend titles + if (ncol(cont_matrix) == 1) { + plot_title <- paste(sampleid, geneid, ...) + d$COUNT <- cont_matrix[, 1] + if (!(geneid %in% colnames(colData(spe_sub)))) { + legend_title <- sprintf("%s\n min > %s", assayname, minCount) + } else { + legend_title <- sprintf("min > %s", minCount) + } } else { - stop("Could not find the 'geneid' ", geneid, call. = FALSE) + plot_title <- paste(sampleid, ...) + if (multi_gene_method == "z_score") { + d$COUNT <- multi_gene_z_score(cont_matrix) + legend_title <- paste("Z score\n min > ", minCount) + } else if (multi_gene_method == "sparsity") { + d$COUNT <- multi_gene_sparsity(cont_matrix) + legend_title <- paste("Prop. nonzero\n min > ", minCount) + } else { # must be 'pca' + d$COUNT <- multi_gene_pca(cont_matrix) + legend_title <- paste("PC1\n min > ", minCount) + } } d$COUNT[d$COUNT <= minCount] <- NA + p <- vis_gene_p( spe = spe_sub, d = d, sampleid = sampleid, spatial = spatial, - title = paste( - sampleid, - geneid, - ... - ), + title = plot_title, viridis = viridis, image_id = image_id, alpha = alpha, @@ -159,14 +298,7 @@ vis_gene <- point_size = point_size, auto_crop = auto_crop, na_color = na_color, - legend_title = paste0( - if (!geneid %in% colnames(colData(spe_sub))) { - paste0(assayname, "\n") - } else { - NULL - }, - " min > ", minCount - ) + legend_title = legend_title ) return(p) } diff --git a/R/vis_gene_p.R b/R/vis_gene_p.R index 9bc3dc0a..900bd651 100644 --- a/R/vis_gene_p.R +++ b/R/vis_gene_p.R @@ -3,12 +3,12 @@ #' This function visualizes the gene expression stored in `assays(spe)` or any #' continuous variable stored in `colData(spe)` for one given sample at the #' spot-level using (by default) the histology information on the background. -#' This is the function that does all the plotting behind [vis_gene()]. +#' This is the function that does all the plotting behind [vis_gene()] #' To visualize clusters (or any discrete variable) use [vis_clus_p()]. #' -#' @param d A data.frame with the sample-level information. This is typically -#' obtained using `cbind(colData(spe), spatialCoords(spe))`. -#' The data.frame has to contain +#' @param d A `data.frame()` with the sample-level information. This is +#' typically obtained using `cbind(colData(spe), spatialCoords(spe))`. +#' The `data.frame` has to contain #' a column with the continuous variable data to plot stored under `d$COUNT`. #' @param legend_title A `character(1)` specifying the legend title. #' @inheritParams vis_clus_p @@ -33,7 +33,6 @@ #' df <- as.data.frame(cbind(colData(spe_sub), SpatialExperiment::spatialCoords(spe_sub)), optional = TRUE) #' df$COUNT <- df$expr_chrM_ratio #' -#' ## Use the manual color palette by Lukas M Weber #' ## Don't plot the histology information #' p <- vis_gene_p( #' spe = spe_sub, @@ -48,24 +47,19 @@ #' rm(spe_sub) #' } vis_gene_p <- - function( - spe, - d, - sampleid = unique(spe$sample_id)[1], - spatial, - title, - viridis = TRUE, - image_id = "lowres", - alpha = NA, - cont_colors = if (viridis) { - viridisLite::viridis(21) - } else { - c("aquamarine4", "springgreen", "goldenrod", "red") - }, - point_size = 2, - auto_crop = TRUE, - na_color = "#CCCCCC40", - legend_title = "") { + function(spe, + d, + sampleid = unique(spe$sample_id)[1], + spatial, + title, + viridis = TRUE, + image_id = "lowres", + alpha = NA, + cont_colors = if (viridis) viridisLite::viridis(21) else c("aquamarine4", "springgreen", "goldenrod", "red"), + point_size = 2, + auto_crop = TRUE, + na_color = "#CCCCCC40", + legend_title = "") { ## Some variables pxl_row_in_fullres <- pxl_col_in_fullres <- key <- COUNT <- NULL diff --git a/R/vis_grid_clus.R b/R/vis_grid_clus.R index 40ec7aee..c84dbac4 100644 --- a/R/vis_grid_clus.R +++ b/R/vis_grid_clus.R @@ -62,6 +62,8 @@ vis_grid_clus <- sample_order = unique(spe$sample_id), point_size = 2, auto_crop = TRUE, + na_color = "#CCCCCC40", + is_stitched = FALSE, ...) { stopifnot(all(sample_order %in% unique(spe$sample_id))) @@ -80,6 +82,8 @@ vis_grid_clus <- alpha = alpha, point_size = point_size, auto_crop = auto_crop, + na_color = na_color, + is_stitched = is_stitched, ... ) }) @@ -87,7 +91,7 @@ vis_grid_clus <- if (!return_plots) { - pdf(pdf_file, height = 24, width = 36) + pdf(pdf_file, height = height, width = width) print(cowplot::plot_grid(plotlist = plots)) dev.off() return(pdf_file) diff --git a/R/vis_grid_gene.R b/R/vis_grid_gene.R index 2504cce5..bf32dd7a 100644 --- a/R/vis_grid_gene.R +++ b/R/vis_grid_gene.R @@ -53,6 +53,7 @@ vis_grid_gene <- point_size = 2, auto_crop = TRUE, na_color = "#CCCCCC40", + is_stitched = FALSE, ...) { stopifnot(all(sample_order %in% unique(spe$sample_id))) @@ -71,6 +72,7 @@ vis_grid_gene <- point_size = point_size, auto_crop = auto_crop, na_color = na_color, + is_stitched = is_stitched, ... ) }) diff --git a/README.Rmd b/README.Rmd index ef568571..2f3b9a1a 100644 --- a/README.Rmd +++ b/README.Rmd @@ -23,9 +23,10 @@ knitr::opts_chunk$set( [![Bioc support](https://bioconductor.org/shields/posts/spatialLIBD.svg)](https://support.bioconductor.org/tag/spatialLIBD) [![Bioc last commit](https://bioconductor.org/shields/lastcommit/devel/data-experiment/spatialLIBD.svg)](http://bioconductor.org/checkResults/devel/data-experiment-LATEST/spatialLIBD/) [![Bioc dependencies](https://bioconductor.org/shields/dependencies/release/spatialLIBD.svg)](https://bioconductor.org/packages/release/data-experiment/html/spatialLIBD.html#since) -[![Codecov test coverage](https://codecov.io/gh/LieberInstitute/spatialLIBD/branch/master/graph/badge.svg)](https://codecov.io/gh/LieberInstitute/spatialLIBD?branch=master) -[![R build status](https://github.com/LieberInstitute/spatialLIBD/workflows/R-CMD-check-bioc/badge.svg)](https://github.com/LieberInstitute/spatialLIBD/actions) +[![Codecov test coverage](https://codecov.io/gh/LieberInstitute/spatialLIBD/branch/devel/graph/badge.svg)](https://codecov.io/gh/LieberInstitute/spatialLIBD?branch=devel) +[![R-CMD-check-bioc](https://github.com/LieberInstitute/spatialLIBD/actions/workflows/check-bioc.yml/badge.svg)](https://github.com/LieberInstitute/spatialLIBD/actions/workflows/check-bioc.yml) [![GitHub issues](https://img.shields.io/github/issues/LieberInstitute/spatialLIBD)](https://github.com/LieberInstitute/spatialLIBD/issues) +[![GitHub pulls](https://img.shields.io/github/issues-pr/LieberInstitute/spatialLIBD)](https://github.com/LieberInstitute/spatialLIBD/pulls) [![DOI](https://zenodo.org/badge/225913568.svg)](https://zenodo.org/badge/latestdoi/225913568) @@ -69,6 +70,16 @@ spatialLIBD::run_app() * [Main shiny application website](http://spatial.libd.org/spatialLIBD/) (note that the link must have a trailing slash `/` for it to work) * [Shinyapps](https://libd.shinyapps.io/spatialLIBD/) This version has less RAM memory but is typically deployed using the latest version of `spatialLIBD`. +## Introductory material + +If you prefer to watch a video overview of the `HumanPilot` project, check the following journal club presentation of the main results. + + + +You might also be interested in the explainer video and [companion blog post](https://lcolladotor.github.io/2024/05/23/humanpilot-first-spatially-resolved-transcriptomics-study-using-visium/) as well as [the original Feb 29, 2020 blog post](https://lcolladotor.github.io/2020/02/29/diving-together-into-the-unknown-world-of-spatial-transcriptomics/) from when we first made this project public. + + + ## R/Bioconductor package The `spatialLIBD` package contains functions for: diff --git a/README.md b/README.md index 97d15e8b..6f02d134 100644 --- a/README.md +++ b/README.md @@ -20,11 +20,12 @@ commit](https://bioconductor.org/shields/lastcommit/devel/data-experiment/spatia [![Bioc dependencies](https://bioconductor.org/shields/dependencies/release/spatialLIBD.svg)](https://bioconductor.org/packages/release/data-experiment/html/spatialLIBD.html#since) [![Codecov test -coverage](https://codecov.io/gh/LieberInstitute/spatialLIBD/branch/master/graph/badge.svg)](https://codecov.io/gh/LieberInstitute/spatialLIBD?branch=master) -[![R build -status](https://github.com/LieberInstitute/spatialLIBD/workflows/R-CMD-check-bioc/badge.svg)](https://github.com/LieberInstitute/spatialLIBD/actions) +coverage](https://codecov.io/gh/LieberInstitute/spatialLIBD/branch/devel/graph/badge.svg)](https://codecov.io/gh/LieberInstitute/spatialLIBD?branch=devel) +[![R-CMD-check-bioc](https://github.com/LieberInstitute/spatialLIBD/actions/workflows/check-bioc.yml/badge.svg)](https://github.com/LieberInstitute/spatialLIBD/actions/workflows/check-bioc.yml) [![GitHub issues](https://img.shields.io/github/issues/LieberInstitute/spatialLIBD)](https://github.com/LieberInstitute/spatialLIBD/issues) +[![GitHub +pulls](https://img.shields.io/github/issues-pr/LieberInstitute/spatialLIBD)](https://github.com/LieberInstitute/spatialLIBD/pulls) [![DOI](https://zenodo.org/badge/225913568.svg)](https://zenodo.org/badge/latestdoi/225913568) @@ -115,6 +116,23 @@ spatialLIBD::run_app() less RAM memory but is typically deployed using the latest version of `spatialLIBD`. +## Introductory material + +If you prefer to watch a video overview of the `HumanPilot` project, +check the following journal club presentation of the main results. + + + +You might also be interested in the explainer video and [companion blog +post](https://lcolladotor.github.io/2024/05/23/humanpilot-first-spatially-resolved-transcriptomics-study-using-visium/) +as well as [the original Feb 29, 2020 blog +post](https://lcolladotor.github.io/2020/02/29/diving-together-into-the-unknown-world-of-spatial-transcriptomics/) +from when we first made this project public. + + + ## R/Bioconductor package The `spatialLIBD` package contains functions for: @@ -196,10 +214,16 @@ spe #> altExpNames(0): #> spatialCoords names(2) : pxl_col_in_fullres pxl_row_in_fullres #> imgData names(4): sample_id image_id data scaleFactor +``` + +``` r ## Note the memory size lobstr::obj_size(spe) #> 2.04 GB +``` + +``` r ## Remake the logo image with histology information vis_clus( @@ -220,20 +244,20 @@ You can access all the raw data through Furthermore, below you can find the links to the raw data we received from 10x Genomics. -| SampleID | h5_filtered | h5_raw | image_full | image_hi | image_lo | loupe | HTML_report | -|---------:|:------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------------| -| 151507 | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151507_filtered_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151507_raw_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151507_full_image.tif) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151507_tissue_hires_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151507_tissue_lowres_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/loupe/151507.cloupe) | [GitHub](https://github.com/LieberInstitute/HumanPilot/blob/master/10X/151507/151507_web_summary.html) | -| 151508 | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151508_filtered_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151508_raw_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151508_full_image.tif) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151508_tissue_hires_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151508_tissue_lowres_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/loupe/151508.cloupe) | [GitHub](https://github.com/LieberInstitute/HumanPilot/blob/master/10X/151508/151508_web_summary.html) | -| 151509 | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151509_filtered_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151509_raw_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151509_full_image.tif) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151509_tissue_hires_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151509_tissue_lowres_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/loupe/151509.cloupe) | [GitHub](https://github.com/LieberInstitute/HumanPilot/blob/master/10X/151509/151509_web_summary.html) | -| 151510 | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151510_filtered_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151510_raw_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151510_full_image.tif) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151510_tissue_hires_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151510_tissue_lowres_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/loupe/151510.cloupe) | [GitHub](https://github.com/LieberInstitute/HumanPilot/blob/master/10X/151510/151510_web_summary.html) | -| 151669 | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151669_filtered_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151669_raw_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151669_full_image.tif) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151669_tissue_hires_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151669_tissue_lowres_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/loupe/151669.cloupe) | [GitHub](https://github.com/LieberInstitute/HumanPilot/blob/master/10X/151669/151669_web_summary.html) | -| 151670 | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151670_filtered_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151670_raw_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151670_full_image.tif) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151670_tissue_hires_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151670_tissue_lowres_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/loupe/151670.cloupe) | [GitHub](https://github.com/LieberInstitute/HumanPilot/blob/master/10X/151670/151670_web_summary.html) | -| 151671 | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151671_filtered_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151671_raw_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151671_full_image.tif) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151671_tissue_hires_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151671_tissue_lowres_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/loupe/151671.cloupe) | [GitHub](https://github.com/LieberInstitute/HumanPilot/blob/master/10X/151671/151671_web_summary.html) | -| 151672 | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151672_filtered_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151672_raw_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151672_full_image.tif) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151672_tissue_hires_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151672_tissue_lowres_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/loupe/151672.cloupe) | [GitHub](https://github.com/LieberInstitute/HumanPilot/blob/master/10X/151672/151672_web_summary.html) | -| 151673 | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151673_filtered_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151673_raw_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151673_full_image.tif) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151673_tissue_hires_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151673_tissue_lowres_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/loupe/151673.cloupe) | [GitHub](https://github.com/LieberInstitute/HumanPilot/blob/master/10X/151673/151673_web_summary.html) | -| 151674 | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151674_filtered_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151674_raw_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151674_full_image.tif) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151674_tissue_hires_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151674_tissue_lowres_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/loupe/151674.cloupe) | [GitHub](https://github.com/LieberInstitute/HumanPilot/blob/master/10X/151674/151674_web_summary.html) | -| 151675 | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151675_filtered_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151675_raw_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151675_full_image.tif) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151675_tissue_hires_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151675_tissue_lowres_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/loupe/151675.cloupe) | [GitHub](https://github.com/LieberInstitute/HumanPilot/blob/master/10X/151675/151675_web_summary.html) | -| 151676 | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151676_filtered_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151676_raw_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151676_full_image.tif) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151676_tissue_hires_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151676_tissue_lowres_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/loupe/151676.cloupe) | [GitHub](https://github.com/LieberInstitute/HumanPilot/blob/master/10X/151676/151676_web_summary.html) | +| SampleID | h5_filtered | h5_raw | image_full | image_hi | image_lo | loupe | HTML_report | +|---:|:---|:---|:---|:---|:---|:---|:---| +| 151507 | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151507_filtered_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151507_raw_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151507_full_image.tif) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151507_tissue_hires_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151507_tissue_lowres_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/loupe/151507.cloupe) | [GitHub](https://github.com/LieberInstitute/HumanPilot/blob/master/10X/151507/151507_web_summary.html) | +| 151508 | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151508_filtered_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151508_raw_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151508_full_image.tif) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151508_tissue_hires_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151508_tissue_lowres_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/loupe/151508.cloupe) | [GitHub](https://github.com/LieberInstitute/HumanPilot/blob/master/10X/151508/151508_web_summary.html) | +| 151509 | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151509_filtered_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151509_raw_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151509_full_image.tif) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151509_tissue_hires_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151509_tissue_lowres_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/loupe/151509.cloupe) | [GitHub](https://github.com/LieberInstitute/HumanPilot/blob/master/10X/151509/151509_web_summary.html) | +| 151510 | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151510_filtered_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151510_raw_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151510_full_image.tif) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151510_tissue_hires_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151510_tissue_lowres_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/loupe/151510.cloupe) | [GitHub](https://github.com/LieberInstitute/HumanPilot/blob/master/10X/151510/151510_web_summary.html) | +| 151669 | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151669_filtered_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151669_raw_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151669_full_image.tif) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151669_tissue_hires_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151669_tissue_lowres_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/loupe/151669.cloupe) | [GitHub](https://github.com/LieberInstitute/HumanPilot/blob/master/10X/151669/151669_web_summary.html) | +| 151670 | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151670_filtered_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151670_raw_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151670_full_image.tif) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151670_tissue_hires_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151670_tissue_lowres_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/loupe/151670.cloupe) | [GitHub](https://github.com/LieberInstitute/HumanPilot/blob/master/10X/151670/151670_web_summary.html) | +| 151671 | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151671_filtered_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151671_raw_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151671_full_image.tif) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151671_tissue_hires_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151671_tissue_lowres_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/loupe/151671.cloupe) | [GitHub](https://github.com/LieberInstitute/HumanPilot/blob/master/10X/151671/151671_web_summary.html) | +| 151672 | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151672_filtered_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151672_raw_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151672_full_image.tif) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151672_tissue_hires_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151672_tissue_lowres_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/loupe/151672.cloupe) | [GitHub](https://github.com/LieberInstitute/HumanPilot/blob/master/10X/151672/151672_web_summary.html) | +| 151673 | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151673_filtered_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151673_raw_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151673_full_image.tif) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151673_tissue_hires_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151673_tissue_lowres_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/loupe/151673.cloupe) | [GitHub](https://github.com/LieberInstitute/HumanPilot/blob/master/10X/151673/151673_web_summary.html) | +| 151674 | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151674_filtered_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151674_raw_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151674_full_image.tif) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151674_tissue_hires_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151674_tissue_lowres_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/loupe/151674.cloupe) | [GitHub](https://github.com/LieberInstitute/HumanPilot/blob/master/10X/151674/151674_web_summary.html) | +| 151675 | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151675_filtered_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151675_raw_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151675_full_image.tif) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151675_tissue_hires_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151675_tissue_lowres_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/loupe/151675.cloupe) | [GitHub](https://github.com/LieberInstitute/HumanPilot/blob/master/10X/151675/151675_web_summary.html) | +| 151676 | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151676_filtered_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151676_raw_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151676_full_image.tif) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151676_tissue_hires_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151676_tissue_lowres_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/loupe/151676.cloupe) | [GitHub](https://github.com/LieberInstitute/HumanPilot/blob/master/10X/151676/151676_web_summary.html) | ## Citation @@ -243,7 +267,6 @@ Please run this yourself to check for any updates on how to cite ``` r print(citation("spatialLIBD"), bibtex = TRUE) -#> #> To cite package 'spatialLIBD' in publications use: #> #> Pardo B, Spangler A, Weber LM, Hicks SC, Jaffe AE, Martinowich K, @@ -283,6 +306,47 @@ print(citation("spatialLIBD"), bibtex = TRUE) #> doi = {10.1038/s41593-020-00787-0}, #> url = {https://www.nature.com/articles/s41593-020-00787-0}, #> } +#> +#> Huuki-Myers LA, Spangler A, Eagles NJ, Montgomergy KD, Kwon SH, Guo +#> B, Grant-Peters M, Divecha HR, Tippani M, Sriworarat C, Nguyen AB, +#> Ravichandran P, Tran MN, Seyedian A, Consortium P, Hyde TM, Kleinman +#> JE, Battle A, Page SC, Ryten M, Hicks SC, Martinowich K, +#> Collado-Torres L, Maynard KR (2024). "A data-driven single-cell and +#> spatial transcriptomic map of the human prefrontal cortex." +#> _Science_. doi:10.1126/science.adh1938 +#> , +#> . +#> +#> A BibTeX entry for LaTeX users is +#> +#> @Article{, +#> title = {A data-driven single-cell and spatial transcriptomic map of the human prefrontal cortex}, +#> author = {Louise A. Huuki-Myers and Abby Spangler and Nicholas J. Eagles and Kelsey D. Montgomergy and Sang Ho Kwon and Boyi Guo and Melissa Grant-Peters and Heena R. Divecha and Madhavi Tippani and Chaichontat Sriworarat and Annie B. Nguyen and Prashanthi Ravichandran and Matthew N. Tran and Arta Seyedian and PsychENCODE Consortium and Thomas M. Hyde and Joel E. Kleinman and Alexis Battle and Stephanie C. Page and Mina Ryten and Stephanie C. Hicks and Keri Martinowich and Leonardo Collado-Torres and Kristen R. Maynard}, +#> year = {2024}, +#> journal = {Science}, +#> doi = {10.1126/science.adh1938}, +#> url = {https://doi.org/10.1126/science.adh1938}, +#> } +#> +#> Kwon SH, Parthiban S, Tippani M, Divecha HR, Eagles NJ, Lobana JS, +#> Williams SR, Mark M, Bharadwaj RA, Kleinman JE, Hyde TM, Page SC, +#> Hicks SC, Martinowich K, Maynard KR, Collado-Torres L (2023). +#> "Influence of Alzheimer’s disease related neuropathology on local +#> microenvironment gene expression in the human inferior temporal +#> cortex." _GEN Biotechnology_. doi:10.1089/genbio.2023.0019 +#> , +#> . +#> +#> A BibTeX entry for LaTeX users is +#> +#> @Article{, +#> title = {Influence of Alzheimer’s disease related neuropathology on local microenvironment gene expression in the human inferior temporal cortex}, +#> author = {Sang Ho Kwon and Sowmya Parthiban and Madhavi Tippani and Heena R. Divecha and Nicholas J. Eagles and Jashandeep S. Lobana and Stephen R. Williams and Michelle Mark and Rahul A. Bharadwaj and Joel E. Kleinman and Thomas M. Hyde and Stephanie C. Page and Stephanie C. Hicks and Keri Martinowich and Kristen R. Maynard and Leonardo Collado-Torres}, +#> year = {2023}, +#> journal = {GEN Biotechnology}, +#> doi = {10.1089/genbio.2023.0019}, +#> url = {https://doi.org/10.1089/genbio.2023.0019}, +#> } ``` Please note that the `spatialLIBD` was only made possible thanks to many @@ -306,7 +370,7 @@ By contributing to this project, you agree to abide by its terms. *[rcmdcheck](https://CRAN.R-project.org/package=rcmdcheck)* customized to use [Bioconductor’s docker containers](https://www.bioconductor.org/help/docker/) and - *[BiocCheck](https://bioconductor.org/packages/3.16/BiocCheck)*. + *[BiocCheck](https://bioconductor.org/packages/3.19/BiocCheck)*. - Code coverage assessment is possible thanks to [codecov](https://codecov.io/gh) and *[covr](https://CRAN.R-project.org/package=covr)*. @@ -323,7 +387,7 @@ By contributing to this project, you agree to abide by its terms. For more details, check the `dev` directory. This package was developed using -*[biocthis](https://bioconductor.org/packages/3.16/biocthis)*. +*[biocthis](https://bioconductor.org/packages/3.19/biocthis)*. @@ -336,6 +400,5 @@ This package was developed using window.dataLayer = window.dataLayer || []; function gtag(){dataLayer.push(arguments);} gtag('js', new Date()); - gtag('config', 'G-QKT3SV9EFL'); diff --git a/_pkgdown.yml b/_pkgdown.yml index 6ef5f5a6..e047e6e6 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -1 +1,2 @@ destination: docs + diff --git a/data-raw/.gitignore b/data-raw/.gitignore index d839103c..32d882b2 100644 --- a/data-raw/.gitignore +++ b/data-raw/.gitignore @@ -1,3 +1,4 @@ *.Rdata logo.ai logo.pdf +spatialDLPFC_spe_subset_example.rds diff --git a/data-raw/create_spatialDLPFC_spe_subset.R b/data-raw/create_spatialDLPFC_spe_subset.R new file mode 100644 index 00000000..74cf9d21 --- /dev/null +++ b/data-raw/create_spatialDLPFC_spe_subset.R @@ -0,0 +1,243 @@ +library("spatialLIBD") +library("sessioninfo") + +spe <- fetch_data("spatialDLPFC_Visium") + +lobstr::obj_size(spe) +# 6.97 GB + +## Subset to just 3 samples used in Figure 2A +spe <- spe[, spe$sample_id %in% c("Br8667_mid", "Br6522_ant", "Br6432_ant")] + +lobstr::obj_size(spe) +# 777.93 MB + +imgData(spe) +# DataFrame with 12 rows and 4 columns +# sample_id image_id data scaleFactor +# +# 1 Br6432_ant lowres #### 0.0148894 +# 2 Br6432_ant hires #### 0.0496315 +# 3 Br6432_ant detected #### 0.0496315 +# 4 Br6432_ant aligned #### 0.0496315 +# 5 Br6522_ant lowres #### 0.0192517 +# ... ... ... ... ... +# 8 Br6522_ant aligned #### 0.0641725 +# 9 Br8667_mid lowres #### 0.0169181 +# 10 Br8667_mid hires #### 0.0563936 +# 11 Br8667_mid detected #### 0.0563936 +# 12 Br8667_mid aligned #### 0.0563936 + +## Subset to just the lowres images +imgData(spe) <- imgData(spe)[imgData(spe)$image_id == "lowres", ] + +lobstr::obj_size(spe) +# 540.88 MB + +saveRDS(spe, file = here::here("data-raw", "spatialDLPFC_spe_subset_example.rds")) +system2("ls", paste("-lh", here::here("data-raw", "spatialDLPFC_spe_subset_example.rds"))) +# -rw-r--r--@ 1 leocollado staff 107M May 23 23:42 /Users/leocollado/Dropbox/Code/spatialLIBD/data-raw/spatialDLPFC_spe_subset_example.rds + +## Reproducibility information +print("Reproducibility information:") +Sys.time() +proc.time() +options(width = 120) +session_info() +# ─ Session info ─────────────────────────────────────────────────────────────────────────────────────────────────────── +# setting value +# version R version 4.3.0 (2023-04-21) +# os macOS Ventura 13.3.1 +# system aarch64, darwin20 +# ui RStudio +# language (EN) +# collate en_US.UTF-8 +# ctype en_US.UTF-8 +# tz Europe/London +# date 2023-05-23 +# rstudio 2023.03.1+446 Cherry Blossom (desktop) +# pandoc 2.17.1.1 @ /opt/homebrew/bin/pandoc +# +# ─ Packages ─────────────────────────────────────────────────────────────────────────────────────────────────────────── +# package * version date (UTC) lib source +# AnnotationDbi 1.62.1 2023-05-02 [1] Bioconductor +# AnnotationHub 3.8.0 2023-04-25 [1] Bioconductor +# attempt 0.3.1 2020-05-03 [1] CRAN (R 4.3.0) +# beachmat 2.16.0 2023-04-25 [1] Bioconductor +# beeswarm 0.4.0 2021-06-01 [1] CRAN (R 4.3.0) +# benchmarkme 1.0.8 2022-06-12 [1] CRAN (R 4.3.0) +# benchmarkmeData 1.0.4 2020-04-23 [1] CRAN (R 4.3.0) +# Biobase * 2.60.0 2023-04-25 [1] Bioconductor +# BiocFileCache 2.8.0 2023-04-25 [1] Bioconductor +# BiocGenerics * 0.46.0 2023-04-25 [1] Bioconductor +# BiocIO 1.10.0 2023-04-25 [1] Bioconductor +# BiocManager 1.30.20 2023-02-24 [1] CRAN (R 4.3.0) +# BiocNeighbors 1.18.0 2023-04-25 [1] Bioconductor +# BiocParallel 1.34.1 2023-05-08 [1] Bioconductor +# BiocSingular 1.16.0 2023-04-25 [1] Bioconductor +# biocthis 1.11.1 2023-05-06 [1] Github (lcolladotor/biocthis@42dc8df) +# BiocVersion 3.17.1 2022-12-20 [1] Bioconductor +# Biostrings 2.68.1 2023-05-16 [1] Bioconductor +# bit 4.0.5 2022-11-15 [1] CRAN (R 4.3.0) +# bit64 4.0.5 2020-08-30 [1] CRAN (R 4.3.0) +# bitops 1.0-7 2021-04-24 [1] CRAN (R 4.3.0) +# blob 1.2.4 2023-03-17 [1] CRAN (R 4.3.0) +# brio 1.1.3 2021-11-30 [1] CRAN (R 4.3.0) +# bslib 0.4.2 2022-12-16 [1] CRAN (R 4.3.0) +# cachem 1.0.8 2023-05-01 [1] CRAN (R 4.3.0) +# callr 3.7.3 2022-11-02 [1] CRAN (R 4.3.0) +# cli 3.6.1 2023-03-23 [1] CRAN (R 4.3.0) +# codetools 0.2-19 2023-02-01 [1] CRAN (R 4.3.0) +# colorout 1.2-2 2023-05-06 [1] Github (jalvesaq/colorout@79931fd) +# colorspace 2.1-0 2023-01-23 [1] CRAN (R 4.3.0) +# config 0.3.1 2020-12-17 [1] CRAN (R 4.3.0) +# cowplot 1.1.1 2020-12-30 [1] CRAN (R 4.3.0) +# crayon 1.5.2 2022-09-29 [1] CRAN (R 4.3.0) +# curl 5.0.0 2023-01-12 [1] CRAN (R 4.3.0) +# data.table 1.14.8 2023-02-17 [1] CRAN (R 4.3.0) +# DBI 1.1.3 2022-06-18 [1] CRAN (R 4.3.0) +# dbplyr 2.3.2 2023-03-21 [1] CRAN (R 4.3.0) +# DelayedArray 0.26.2 2023-05-05 [1] Bioconductor +# DelayedMatrixStats 1.22.0 2023-04-25 [1] Bioconductor +# devtools * 2.4.5 2022-10-11 [1] CRAN (R 4.3.0) +# digest 0.6.31 2022-12-11 [1] CRAN (R 4.3.0) +# doParallel 1.0.17 2022-02-07 [1] CRAN (R 4.3.0) +# dotCall64 1.0-2 2022-10-03 [1] CRAN (R 4.3.0) +# dplyr 1.1.2 2023-04-20 [1] CRAN (R 4.3.0) +# dqrng 0.3.0 2021-05-01 [1] CRAN (R 4.3.0) +# DropletUtils 1.20.0 2023-05-08 [1] Bioconductor +# DT 0.28 2023-05-18 [1] CRAN (R 4.3.0) +# edgeR 3.42.2 2023-05-08 [1] Bioconductor +# ellipsis 0.3.2 2021-04-29 [1] CRAN (R 4.3.0) +# ExperimentHub 2.8.0 2023-04-25 [1] Bioconductor +# fansi 1.0.4 2023-01-22 [1] CRAN (R 4.3.0) +# fastmap 1.1.1 2023-02-24 [1] CRAN (R 4.3.0) +# fields 14.1 2022-08-12 [1] CRAN (R 4.3.0) +# filelock 1.0.2 2018-10-05 [1] CRAN (R 4.3.0) +# foreach 1.5.2 2022-02-02 [1] CRAN (R 4.3.0) +# fs 1.6.2 2023-04-25 [1] CRAN (R 4.3.0) +# generics 0.1.3 2022-07-05 [1] CRAN (R 4.3.0) +# GenomeInfoDb * 1.36.0 2023-04-25 [1] Bioconductor +# GenomeInfoDbData 1.2.10 2023-05-06 [1] Bioconductor +# GenomicAlignments 1.36.0 2023-04-25 [1] Bioconductor +# GenomicRanges * 1.52.0 2023-04-25 [1] Bioconductor +# ggbeeswarm 0.7.2 2023-04-29 [1] CRAN (R 4.3.0) +# ggplot2 3.4.2 2023-04-03 [1] CRAN (R 4.3.0) +# ggrepel 0.9.3 2023-02-03 [1] CRAN (R 4.3.0) +# glue 1.6.2 2022-02-24 [1] CRAN (R 4.3.0) +# golem 0.4.0 2023-03-12 [1] CRAN (R 4.3.0) +# gridExtra 2.3 2017-09-09 [1] CRAN (R 4.3.0) +# gtable 0.3.3 2023-03-21 [1] CRAN (R 4.3.0) +# HDF5Array 1.28.1 2023-05-01 [1] Bioconductor +# here 1.0.1 2020-12-13 [1] CRAN (R 4.3.0) +# hms 1.1.3 2023-03-21 [1] CRAN (R 4.3.0) +# htmltools 0.5.5 2023-03-23 [1] CRAN (R 4.3.0) +# htmlwidgets 1.6.2 2023-03-17 [1] CRAN (R 4.3.0) +# httpuv 1.6.11 2023-05-11 [1] CRAN (R 4.3.0) +# httr 1.4.6 2023-05-08 [1] CRAN (R 4.3.0) +# interactiveDisplayBase 1.38.0 2023-04-25 [1] Bioconductor +# IRanges * 2.34.0 2023-04-25 [1] Bioconductor +# irlba 2.3.5.1 2022-10-03 [1] CRAN (R 4.3.0) +# iterators 1.0.14 2022-02-05 [1] CRAN (R 4.3.0) +# jquerylib 0.1.4 2021-04-26 [1] CRAN (R 4.3.0) +# jsonlite 1.8.4 2022-12-06 [1] CRAN (R 4.3.0) +# KEGGREST 1.40.0 2023-04-25 [1] Bioconductor +# later 1.3.1 2023-05-02 [1] CRAN (R 4.3.0) +# lattice 0.21-8 2023-04-05 [1] CRAN (R 4.3.0) +# lazyeval 0.2.2 2019-03-15 [1] CRAN (R 4.3.0) +# lifecycle 1.0.3 2022-10-07 [1] CRAN (R 4.3.0) +# limma 3.56.1 2023-05-08 [1] Bioconductor +# lobstr 1.1.2 2022-06-22 [1] CRAN (R 4.3.0) +# locfit 1.5-9.7 2023-01-02 [1] CRAN (R 4.3.0) +# lubridate 1.9.2 2023-02-10 [1] CRAN (R 4.3.0) +# magick 2.7.4 2023-03-09 [1] CRAN (R 4.3.0) +# magrittr 2.0.3 2022-03-30 [1] CRAN (R 4.3.0) +# maps 3.4.1 2022-10-30 [1] CRAN (R 4.3.0) +# Matrix 1.5-4.1 2023-05-18 [1] CRAN (R 4.3.0) +# MatrixGenerics * 1.12.0 2023-04-25 [1] Bioconductor +# matrixStats * 0.63.0 2022-11-18 [1] CRAN (R 4.3.0) +# memoise 2.0.1 2021-11-26 [1] CRAN (R 4.3.0) +# mime 0.12 2021-09-28 [1] CRAN (R 4.3.0) +# miniUI 0.1.1.1 2018-05-18 [1] CRAN (R 4.3.0) +# munsell 0.5.0 2018-06-12 [1] CRAN (R 4.3.0) +# paletteer 1.5.0 2022-10-19 [1] CRAN (R 4.3.0) +# pillar 1.9.0 2023-03-22 [1] CRAN (R 4.3.0) +# pkgbuild 1.4.0 2022-11-27 [1] CRAN (R 4.3.0) +# pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.3.0) +# pkgload 1.3.2 2022-11-16 [1] CRAN (R 4.3.0) +# plotly 4.10.1 2022-11-07 [1] CRAN (R 4.3.0) +# png 0.1-8 2022-11-29 [1] CRAN (R 4.3.0) +# prettyunits 1.1.1 2020-01-24 [1] CRAN (R 4.3.0) +# processx 3.8.1 2023-04-18 [1] CRAN (R 4.3.0) +# profvis 0.3.8 2023-05-02 [1] CRAN (R 4.3.0) +# promises 1.2.0.1 2021-02-11 [1] CRAN (R 4.3.0) +# prompt 1.0.1 2023-05-06 [1] Github (gaborcsardi/prompt@7ef0f2e) +# ps 1.7.5 2023-04-18 [1] CRAN (R 4.3.0) +# purrr 1.0.1 2023-01-10 [1] CRAN (R 4.3.0) +# R.cache 0.16.0 2022-07-21 [1] CRAN (R 4.3.0) +# R.methodsS3 1.8.2 2022-06-13 [1] CRAN (R 4.3.0) +# R.oo 1.25.0 2022-06-12 [1] CRAN (R 4.3.0) +# R.utils 2.12.2 2022-11-11 [1] CRAN (R 4.3.0) +# R6 2.5.1 2021-08-19 [1] CRAN (R 4.3.0) +# rappdirs 0.3.3 2021-01-31 [1] CRAN (R 4.3.0) +# RColorBrewer 1.1-3 2022-04-03 [1] CRAN (R 4.3.0) +# Rcpp 1.0.10 2023-01-22 [1] CRAN (R 4.3.0) +# RCurl 1.98-1.12 2023-03-27 [1] CRAN (R 4.3.0) +# rematch2 2.1.2 2020-05-01 [1] CRAN (R 4.3.0) +# remotes 2.4.2 2021-11-30 [1] CRAN (R 4.3.0) +# restfulr 0.0.15 2022-06-16 [1] CRAN (R 4.3.0) +# rhdf5 2.44.0 2023-04-25 [1] Bioconductor +# rhdf5filters 1.12.1 2023-04-30 [1] Bioconductor +# Rhdf5lib 1.22.0 2023-04-25 [1] Bioconductor +# rjson 0.2.21 2022-01-09 [1] CRAN (R 4.3.0) +# rlang 1.1.1 2023-04-28 [1] CRAN (R 4.3.0) +# rprojroot 2.0.3 2022-04-02 [1] CRAN (R 4.3.0) +# Rsamtools 2.16.0 2023-04-25 [1] Bioconductor +# RSQLite 2.3.1 2023-04-03 [1] CRAN (R 4.3.0) +# rsthemes 0.4.0 2023-05-06 [1] Github (gadenbuie/rsthemes@34a55a4) +# rstudioapi 0.14 2022-08-22 [1] CRAN (R 4.3.0) +# rsvd 1.0.5 2021-04-16 [1] CRAN (R 4.3.0) +# rtracklayer 1.60.0 2023-04-25 [1] Bioconductor +# S4Arrays 1.0.4 2023-05-14 [1] Bioconductor +# S4Vectors * 0.38.1 2023-05-02 [1] Bioconductor +# sass 0.4.6.9000 2023-05-06 [1] Github (rstudio/sass@f248fe5) +# ScaledMatrix 1.8.1 2023-05-03 [1] Bioconductor +# scales 1.2.1 2022-08-20 [1] CRAN (R 4.3.0) +# scater 1.28.0 2023-04-25 [1] Bioconductor +# scuttle 1.10.1 2023-05-02 [1] Bioconductor +# sessioninfo * 1.2.2 2021-12-06 [1] CRAN (R 4.3.0) +# shiny 1.7.4 2022-12-15 [1] CRAN (R 4.3.0) +# shinyWidgets 0.7.6 2023-01-08 [1] CRAN (R 4.3.0) +# SingleCellExperiment * 1.22.0 2023-04-25 [1] Bioconductor +# spam 2.9-1 2022-08-07 [1] CRAN (R 4.3.0) +# sparseMatrixStats 1.12.0 2023-04-25 [1] Bioconductor +# SpatialExperiment * 1.10.0 2023-04-25 [1] Bioconductor +# spatialLIBD * 1.12.0 2023-04-27 [1] Bioconductor +# statmod 1.5.0 2023-01-06 [1] CRAN (R 4.3.0) +# stringi 1.7.12 2023-01-11 [1] CRAN (R 4.3.0) +# stringr 1.5.0 2022-12-02 [1] CRAN (R 4.3.0) +# styler 1.9.1 2023-03-04 [1] CRAN (R 4.3.0) +# SummarizedExperiment * 1.30.1 2023-05-01 [1] Bioconductor +# suncalc 0.5.1 2022-09-29 [1] CRAN (R 4.3.0) +# testthat * 3.1.8 2023-05-04 [1] CRAN (R 4.3.0) +# tibble 3.2.1 2023-03-20 [1] CRAN (R 4.3.0) +# tidyr 1.3.0 2023-01-24 [1] CRAN (R 4.3.0) +# tidyselect 1.2.0 2022-10-10 [1] CRAN (R 4.3.0) +# timechange 0.2.0 2023-01-11 [1] CRAN (R 4.3.0) +# urlchecker 1.0.1 2021-11-30 [1] CRAN (R 4.3.0) +# usethis * 2.1.6 2022-05-25 [1] CRAN (R 4.3.0) +# utf8 1.2.3 2023-01-31 [1] CRAN (R 4.3.0) +# vctrs 0.6.2 2023-04-19 [1] CRAN (R 4.3.0) +# vipor 0.4.5 2017-03-22 [1] CRAN (R 4.3.0) +# viridis 0.6.3 2023-05-03 [1] CRAN (R 4.3.0) +# viridisLite 0.4.2 2023-05-02 [1] CRAN (R 4.3.0) +# withr 2.5.0 2022-03-03 [1] CRAN (R 4.3.0) +# XML 3.99-0.14 2023-03-19 [1] CRAN (R 4.3.0) +# xtable 1.8-4 2019-04-21 [1] CRAN (R 4.3.0) +# XVector 0.40.0 2023-04-25 [1] Bioconductor +# yaml 2.3.7 2023-01-23 [1] CRAN (R 4.3.0) +# zlibbioc 1.46.0 2023-04-25 [1] Bioconductor +# +# [1] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library +# +# ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── diff --git a/inst/CITATION b/inst/CITATION index 1351d786..e10fdcf2 100644 --- a/inst/CITATION +++ b/inst/CITATION @@ -43,5 +43,63 @@ c( journal = "Nature Neuroscience", doi = "10.1038/s41593-020-00787-0", url = "https://www.nature.com/articles/s41593-020-00787-0" + ), + bibentry(bibtype="article", + title = "A data-driven single-cell and spatial transcriptomic map of the human prefrontal cortex", + author = personList( + as.person("Louise A. Huuki-Myers"), + as.person("Abby Spangler"), + as.person("Nicholas J. Eagles"), + as.person("Kelsey D. Montgomergy"), + as.person("Sang Ho Kwon"), + as.person("Boyi Guo"), + as.person("Melissa Grant-Peters"), + as.person("Heena R. Divecha"), + as.person("Madhavi Tippani"), + as.person("Chaichontat Sriworarat"), + as.person("Annie B. Nguyen"), + as.person("Prashanthi Ravichandran"), + as.person("Matthew N. Tran"), + as.person("Arta Seyedian"), + as.person("PsychENCODE Consortium"), + as.person("Thomas M. Hyde"), + as.person("Joel E. Kleinman"), + as.person("Alexis Battle"), + as.person("Stephanie C. Page"), + as.person("Mina Ryten"), + as.person("Stephanie C. Hicks"), + as.person("Keri Martinowich"), + as.person("Leonardo Collado-Torres"), + as.person("Kristen R. Maynard") + ), + year = 2024, + journal = "Science", + doi = "10.1126/science.adh1938", + url = "https://doi.org/10.1126/science.adh1938" + ), + bibentry(bibtype="article", + title = "Influence of Alzheimer’s disease related neuropathology on local microenvironment gene expression in the human inferior temporal cortex", + author = personList( + as.person("Sang Ho Kwon"), + as.person("Sowmya Parthiban"), + as.person("Madhavi Tippani"), + as.person("Heena R. Divecha"), + as.person("Nicholas J. Eagles"), + as.person("Jashandeep S. Lobana"), + as.person("Stephen R. Williams"), + as.person("Michelle Mark"), + as.person("Rahul A. Bharadwaj"), + as.person("Joel E. Kleinman"), + as.person("Thomas M. Hyde"), + as.person("Stephanie C. Page"), + as.person("Stephanie C. Hicks"), + as.person("Keri Martinowich"), + as.person("Kristen R. Maynard"), + as.person("Leonardo Collado-Torres") + ), + year = 2023, + journal = "GEN Biotechnology", + doi = "10.1089/genbio.2023.0019", + url = "https://doi.org/10.1089/genbio.2023.0019" ) ) diff --git a/inst/app/www/README.md b/inst/app/www/README.md index 71853ef8..65d68c96 100644 --- a/inst/app/www/README.md +++ b/inst/app/www/README.md @@ -20,34 +20,36 @@ commit](https://bioconductor.org/shields/lastcommit/devel/data-experiment/spatia [![Bioc dependencies](https://bioconductor.org/shields/dependencies/release/spatialLIBD.svg)](https://bioconductor.org/packages/release/data-experiment/html/spatialLIBD.html#since) [![Codecov test -coverage](https://codecov.io/gh/LieberInstitute/spatialLIBD/branch/master/graph/badge.svg)](https://codecov.io/gh/LieberInstitute/spatialLIBD?branch=master) +coverage](https://codecov.io/gh/LieberInstitute/spatialLIBD/branch/devel/graph/badge.svg)](https://codecov.io/gh/LieberInstitute/spatialLIBD?branch=devel) [![R build -status](https://github.com/LieberInstitute/spatialLIBD/workflows/R-CMD-check-bioc/badge.svg)](https://github.com/LieberInstitute/spatialLIBD/actions) +status](https://github.com/LieberInstitute/spatialLIBD/actions/workflows/check-bioc.yml/badge.svg)](https://github.com/LieberInstitute/spatialLIBD/actions/workflows/check-bioc.yml) [![GitHub issues](https://img.shields.io/github/issues/LieberInstitute/spatialLIBD)](https://github.com/LieberInstitute/spatialLIBD/issues) +[![GitHub +pulls](https://img.shields.io/github/issues-pr/LieberInstitute/spatialLIBD)](https://github.com/LieberInstitute/spatialLIBD/pulls) [![DOI](https://zenodo.org/badge/225913568.svg)](https://zenodo.org/badge/latestdoi/225913568) Welcome to the `spatialLIBD` project! It is composed of: -- a [shiny](https://shiny.rstudio.com/) web application that we are - hosting at - [spatial.libd.org/spatialLIBD/](http://spatial.libd.org/spatialLIBD/) - that can handle a - [limited](https://github.com/LieberInstitute/spatialLIBD/issues/2) - set of concurrent users, -- a Bioconductor package at - [bioconductor.org/packages/spatialLIBD](http://bioconductor.org/packages/spatialLIBD) - (or from [here](http://research.libd.org/spatialLIBD/)) that lets - you analyze the data and run a local version of our web application - (with our data or yours), -- and a [research article](https://doi.org/10.1038/s41593-020-00787-0) - with the scientific knowledge we drew from this dataset. The - analysis code for our project is available - [here](https://github.com/LieberInstitute/HumanPilot/) and the high - quality figures for the manuscript are available through - [Figshare](https://doi.org/10.6084/m9.figshare.13623902.v1). +- a [shiny](https://shiny.rstudio.com/) web application that we are + hosting at + [spatial.libd.org/spatialLIBD/](http://spatial.libd.org/spatialLIBD/) + that can handle a + [limited](https://github.com/LieberInstitute/spatialLIBD/issues/2) set + of concurrent users, +- a Bioconductor package at + [bioconductor.org/packages/spatialLIBD](http://bioconductor.org/packages/spatialLIBD) + (or from [here](http://research.libd.org/spatialLIBD/)) that lets you + analyze the data and run a local version of our web application (with + our data or yours), +- and a [research article](https://doi.org/10.1038/s41593-020-00787-0) + with the scientific knowledge we drew from this dataset. The analysis + code for our project is available + [here](https://github.com/LieberInstitute/HumanPilot/) and the high + quality figures for the manuscript are available through + [Figshare](https://doi.org/10.6084/m9.figshare.13623902.v1). The web application allows you to browse the LIBD human dorsolateral pre-frontal cortex (DLPFC) spatial transcriptomics data generated with @@ -115,19 +117,35 @@ spatialLIBD::run_app() less RAM memory but is typically deployed using the latest version of `spatialLIBD`. +## Introductory material + +If you prefer to watch a video overview of the `HumanPilot` project, +check the following journal club presentation of the main results. + + + +You might also be interested in the explainer video and [companion blog +post](https://lcolladotor.github.io/2024/05/23/humanpilot-first-spatially-resolved-transcriptomics-study-using-visium/) +as well as [the original Feb 29, 2020 blog +post](https://lcolladotor.github.io/2020/02/29/diving-together-into-the-unknown-world-of-spatial-transcriptomics/) +from when we first made this project public. + + + ## R/Bioconductor package The `spatialLIBD` package contains functions for: -- Accessing the spatial transcriptomics data from the LIBD Human Pilot - project ([code on - GitHub](https://github.com/LieberInstitute/HumanPilot)) generated - with the Visium platform from 10x Genomics. The data is retrieved - from [Bioconductor](http://bioconductor.org/)’s `ExperimentHub`. -- Visualizing the spot-level spatial gene expression data and - clusters. -- Inspecting the data interactively either on your computer or through - [spatial.libd.org/spatialLIBD/](http://spatial.libd.org/spatialLIBD/). +- Accessing the spatial transcriptomics data from the LIBD Human Pilot + project ([code on + GitHub](https://github.com/LieberInstitute/HumanPilot)) generated with + the Visium platform from 10x Genomics. The data is retrieved from + [Bioconductor](http://bioconductor.org/)’s `ExperimentHub`. +- Visualizing the spot-level spatial gene expression data and clusters. +- Inspecting the data interactively either on your computer or through + [spatial.libd.org/spatialLIBD/](http://spatial.libd.org/spatialLIBD/). For more details, please check the [documentation website](http://lieberinstitute.github.io/spatialLIBD) or the @@ -190,18 +208,23 @@ spe #> rowData names(9): source type ... gene_search is_top_hvg #> colnames(47681): AAACAACGAATAGTTC-1 AAACAAGTATCTCCCA-1 ... #> TTGTTTCCATACAACT-1 TTGTTTGTGTAAATTC-1 -#> colData names(66): sample_id Cluster ... spatialLIBD ManualAnnotation +#> colData names(69): sample_id Cluster ... array_row array_col #> reducedDimNames(6): PCA TSNE_perplexity50 ... TSNE_perplexity80 #> UMAP_neighbors15 #> mainExpName: NULL #> altExpNames(0): -#> spatialData names(3) : in_tissue array_row array_col #> spatialCoords names(2) : pxl_col_in_fullres pxl_row_in_fullres #> imgData names(4): sample_id image_id data scaleFactor +``` + +``` r ## Note the memory size -lobstr::obj_size(spe) +lobstr::obj_size(spe) #> 2.04 GB +``` + +``` r ## Remake the logo image with histology information vis_clus( @@ -222,20 +245,20 @@ You can access all the raw data through Furthermore, below you can find the links to the raw data we received from 10x Genomics. -| SampleID | h5\_filtered | h5\_raw | image\_full | image\_hi | image\_lo | loupe | HTML\_report | -|---------:|:------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------------| -| 151507 | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151507_filtered_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151507_raw_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151507_full_image.tif) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151507_tissue_hires_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151507_tissue_lowres_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/loupe/151507.cloupe) | [GitHub](https://github.com/LieberInstitute/HumanPilot/blob/master/10X/151507/151507_web_summary.html) | -| 151508 | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151508_filtered_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151508_raw_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151508_full_image.tif) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151508_tissue_hires_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151508_tissue_lowres_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/loupe/151508.cloupe) | [GitHub](https://github.com/LieberInstitute/HumanPilot/blob/master/10X/151508/151508_web_summary.html) | -| 151509 | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151509_filtered_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151509_raw_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151509_full_image.tif) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151509_tissue_hires_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151509_tissue_lowres_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/loupe/151509.cloupe) | [GitHub](https://github.com/LieberInstitute/HumanPilot/blob/master/10X/151509/151509_web_summary.html) | -| 151510 | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151510_filtered_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151510_raw_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151510_full_image.tif) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151510_tissue_hires_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151510_tissue_lowres_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/loupe/151510.cloupe) | [GitHub](https://github.com/LieberInstitute/HumanPilot/blob/master/10X/151510/151510_web_summary.html) | -| 151669 | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151669_filtered_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151669_raw_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151669_full_image.tif) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151669_tissue_hires_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151669_tissue_lowres_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/loupe/151669.cloupe) | [GitHub](https://github.com/LieberInstitute/HumanPilot/blob/master/10X/151669/151669_web_summary.html) | -| 151670 | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151670_filtered_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151670_raw_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151670_full_image.tif) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151670_tissue_hires_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151670_tissue_lowres_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/loupe/151670.cloupe) | [GitHub](https://github.com/LieberInstitute/HumanPilot/blob/master/10X/151670/151670_web_summary.html) | -| 151671 | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151671_filtered_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151671_raw_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151671_full_image.tif) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151671_tissue_hires_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151671_tissue_lowres_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/loupe/151671.cloupe) | [GitHub](https://github.com/LieberInstitute/HumanPilot/blob/master/10X/151671/151671_web_summary.html) | -| 151672 | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151672_filtered_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151672_raw_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151672_full_image.tif) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151672_tissue_hires_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151672_tissue_lowres_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/loupe/151672.cloupe) | [GitHub](https://github.com/LieberInstitute/HumanPilot/blob/master/10X/151672/151672_web_summary.html) | -| 151673 | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151673_filtered_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151673_raw_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151673_full_image.tif) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151673_tissue_hires_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151673_tissue_lowres_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/loupe/151673.cloupe) | [GitHub](https://github.com/LieberInstitute/HumanPilot/blob/master/10X/151673/151673_web_summary.html) | -| 151674 | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151674_filtered_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151674_raw_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151674_full_image.tif) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151674_tissue_hires_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151674_tissue_lowres_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/loupe/151674.cloupe) | [GitHub](https://github.com/LieberInstitute/HumanPilot/blob/master/10X/151674/151674_web_summary.html) | -| 151675 | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151675_filtered_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151675_raw_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151675_full_image.tif) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151675_tissue_hires_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151675_tissue_lowres_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/loupe/151675.cloupe) | [GitHub](https://github.com/LieberInstitute/HumanPilot/blob/master/10X/151675/151675_web_summary.html) | -| 151676 | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151676_filtered_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151676_raw_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151676_full_image.tif) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151676_tissue_hires_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151676_tissue_lowres_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/loupe/151676.cloupe) | [GitHub](https://github.com/LieberInstitute/HumanPilot/blob/master/10X/151676/151676_web_summary.html) | +| SampleID | h5_filtered | h5_raw | image_full | image_hi | image_lo | loupe | HTML_report | +|---:|:---|:---|:---|:---|:---|:---|:---| +| 151507 | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151507_filtered_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151507_raw_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151507_full_image.tif) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151507_tissue_hires_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151507_tissue_lowres_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/loupe/151507.cloupe) | [GitHub](https://github.com/LieberInstitute/HumanPilot/blob/master/10X/151507/151507_web_summary.html) | +| 151508 | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151508_filtered_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151508_raw_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151508_full_image.tif) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151508_tissue_hires_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151508_tissue_lowres_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/loupe/151508.cloupe) | [GitHub](https://github.com/LieberInstitute/HumanPilot/blob/master/10X/151508/151508_web_summary.html) | +| 151509 | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151509_filtered_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151509_raw_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151509_full_image.tif) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151509_tissue_hires_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151509_tissue_lowres_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/loupe/151509.cloupe) | [GitHub](https://github.com/LieberInstitute/HumanPilot/blob/master/10X/151509/151509_web_summary.html) | +| 151510 | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151510_filtered_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151510_raw_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151510_full_image.tif) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151510_tissue_hires_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151510_tissue_lowres_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/loupe/151510.cloupe) | [GitHub](https://github.com/LieberInstitute/HumanPilot/blob/master/10X/151510/151510_web_summary.html) | +| 151669 | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151669_filtered_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151669_raw_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151669_full_image.tif) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151669_tissue_hires_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151669_tissue_lowres_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/loupe/151669.cloupe) | [GitHub](https://github.com/LieberInstitute/HumanPilot/blob/master/10X/151669/151669_web_summary.html) | +| 151670 | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151670_filtered_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151670_raw_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151670_full_image.tif) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151670_tissue_hires_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151670_tissue_lowres_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/loupe/151670.cloupe) | [GitHub](https://github.com/LieberInstitute/HumanPilot/blob/master/10X/151670/151670_web_summary.html) | +| 151671 | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151671_filtered_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151671_raw_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151671_full_image.tif) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151671_tissue_hires_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151671_tissue_lowres_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/loupe/151671.cloupe) | [GitHub](https://github.com/LieberInstitute/HumanPilot/blob/master/10X/151671/151671_web_summary.html) | +| 151672 | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151672_filtered_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151672_raw_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151672_full_image.tif) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151672_tissue_hires_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151672_tissue_lowres_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/loupe/151672.cloupe) | [GitHub](https://github.com/LieberInstitute/HumanPilot/blob/master/10X/151672/151672_web_summary.html) | +| 151673 | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151673_filtered_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151673_raw_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151673_full_image.tif) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151673_tissue_hires_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151673_tissue_lowres_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/loupe/151673.cloupe) | [GitHub](https://github.com/LieberInstitute/HumanPilot/blob/master/10X/151673/151673_web_summary.html) | +| 151674 | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151674_filtered_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151674_raw_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151674_full_image.tif) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151674_tissue_hires_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151674_tissue_lowres_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/loupe/151674.cloupe) | [GitHub](https://github.com/LieberInstitute/HumanPilot/blob/master/10X/151674/151674_web_summary.html) | +| 151675 | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151675_filtered_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151675_raw_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151675_full_image.tif) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151675_tissue_hires_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151675_tissue_lowres_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/loupe/151675.cloupe) | [GitHub](https://github.com/LieberInstitute/HumanPilot/blob/master/10X/151675/151675_web_summary.html) | +| 151676 | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151676_filtered_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/h5/151676_raw_feature_bc_matrix.h5) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151676_full_image.tif) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151676_tissue_hires_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/images/151676_tissue_lowres_image.png) | [AWS](https://spatial-dlpfc.s3.us-east-2.amazonaws.com/loupe/151676.cloupe) | [GitHub](https://github.com/LieberInstitute/HumanPilot/blob/master/10X/151676/151676_web_summary.html) | ## Citation @@ -245,13 +268,14 @@ Please run this yourself to check for any updates on how to cite ``` r print(citation("spatialLIBD"), bibtex = TRUE) +#> To cite package 'spatialLIBD' in publications use: #> -#> Pardo B, Spangler A, Weber LM, Hicks SC, Jaffe AE, Martinowich K, -#> Maynard KR, Collado-Torres L (2022). "spatialLIBD: an R/Bioconductor -#> package to visualize spatially-resolved transcriptomics data." -#> _BMC Genomics_. doi: 10.1186/s12864-022-08601-w (URL: -#> https://doi.org/10.1186/s12864-022-08601-w), https://doi.org/10.1186/s12864-022-08601-w>. +#> Pardo B, Spangler A, Weber LM, Hicks SC, Jaffe AE, Martinowich K, +#> Maynard KR, Collado-Torres L (2022). "spatialLIBD: an R/Bioconductor +#> package to visualize spatially-resolved transcriptomics data." _BMC +#> Genomics_. doi:10.1186/s12864-022-08601-w +#> , +#> . #> #> A BibTeX entry for LaTeX users is #> @@ -264,14 +288,14 @@ print(citation("spatialLIBD"), bibtex = TRUE) #> url = {https://doi.org/10.1186/s12864-022-08601-w}, #> } #> -#> Maynard KR, Collado-Torres L, Weber LM, Uytingco C, Barry BK, Williams -#> SR, II JLC, Tran MN, Besich Z, Tippani M, Chew J, Yin Y, Kleinman JE, -#> Hyde TM, Rao N, Hicks SC, Martinowich K, Jaffe AE (2021). -#> "Transcriptome-scale spatial gene expression in the human dorsolateral -#> prefrontal cortex." _Nature Neuroscience_. doi: -#> 10.1038/s41593-020-00787-0 (URL: -#> https://doi.org/10.1038/s41593-020-00787-0), https://www.nature.com/articles/s41593-020-00787-0>. +#> Maynard KR, Collado-Torres L, Weber LM, Uytingco C, Barry BK, +#> Williams SR, II JLC, Tran MN, Besich Z, Tippani M, Chew J, Yin Y, +#> Kleinman JE, Hyde TM, Rao N, Hicks SC, Martinowich K, Jaffe AE +#> (2021). "Transcriptome-scale spatial gene expression in the human +#> dorsolateral prefrontal cortex." _Nature Neuroscience_. +#> doi:10.1038/s41593-020-00787-0 +#> , +#> . #> #> A BibTeX entry for LaTeX users is #> @@ -283,8 +307,99 @@ print(citation("spatialLIBD"), bibtex = TRUE) #> doi = {10.1038/s41593-020-00787-0}, #> url = {https://www.nature.com/articles/s41593-020-00787-0}, #> } +#> +#> Huuki-Myers LA, Spangler A, Eagles NJ, Montgomergy KD, Kwon SH, Guo +#> B, Grant-Peters M, Divecha HR, Tippani M, Sriworarat C, Nguyen AB, +#> Ravichandran P, Tran MN, Seyedian A, Consortium P, Hyde TM, Kleinman +#> JE, Battle A, Page SC, Ryten M, Hicks SC, Martinowich K, +#> Collado-Torres L, Maynard KR (2024). "A data-driven single-cell and +#> spatial transcriptomic map of the human prefrontal cortex." +#> _Science_. doi:10.1126/science.adh1938 +#> , +#> . +#> +#> A BibTeX entry for LaTeX users is +#> +#> @Article{, +#> title = {A data-driven single-cell and spatial transcriptomic map of the human prefrontal cortex}, +#> author = {Louise A. Huuki-Myers and Abby Spangler and Nicholas J. Eagles and Kelsey D. Montgomergy and Sang Ho Kwon and Boyi Guo and Melissa Grant-Peters and Heena R. Divecha and Madhavi Tippani and Chaichontat Sriworarat and Annie B. Nguyen and Prashanthi Ravichandran and Matthew N. Tran and Arta Seyedian and PsychENCODE Consortium and Thomas M. Hyde and Joel E. Kleinman and Alexis Battle and Stephanie C. Page and Mina Ryten and Stephanie C. Hicks and Keri Martinowich and Leonardo Collado-Torres and Kristen R. Maynard}, +#> year = {2024}, +#> journal = {Science}, +#> doi = {10.1126/science.adh1938}, +#> url = {https://doi.org/10.1126/science.adh1938}, +#> } +#> +#> Kwon SH, Parthiban S, Tippani M, Divecha HR, Eagles NJ, Lobana JS, +#> Williams SR, Mark M, Bharadwaj RA, Kleinman JE, Hyde TM, Page SC, +#> Hicks SC, Martinowich K, Maynard KR, Collado-Torres L (2023). +#> "Influence of Alzheimer’s disease related neuropathology on local +#> microenvironment gene expression in the human inferior temporal +#> cortex." _bioRxiv_. doi:10.1101/2023.04.20.537710 +#> , +#> . +#> +#> A BibTeX entry for LaTeX users is +#> +#> @Article{, +#> title = {Influence of Alzheimer’s disease related neuropathology on local microenvironment gene expression in the human inferior temporal cortex}, +#> author = {Sang Ho Kwon and Sowmya Parthiban and Madhavi Tippani and Heena R. Divecha and Nicholas J. Eagles and Jashandeep S. Lobana and Stephen R. Williams and Michelle Mark and Rahul A. Bharadwaj and Joel E. Kleinman and Thomas M. Hyde and Stephanie C. Page and Stephanie C. Hicks and Keri Martinowich and Kristen R. Maynard and Leonardo Collado-Torres}, +#> year = {2023}, +#> journal = {GEN Biotechnology}, +#> doi = {10.1089/genbio.2023.0019}, +#> url = {https://doi.org/10.1089/genbio.2023.0019}, +#> } ``` Please note that the `spatialLIBD` was only made possible thanks to many other R and bioinformatics software authors, which are cited either in the vignettes and/or the paper(s) describing this package. + +## Code of Conduct + +Please note that the spatialLIBD project is released with a [Contributor +Code of +Conduct](https://contributor-covenant.org/version/2/0/CODE_OF_CONDUCT.html). +By contributing to this project, you agree to abide by its terms. + +## Development tools + +- Continuous code testing is possible thanks to [GitHub + actions](https://www.tidyverse.org/blog/2020/04/usethis-1-6-0/) + through *[usethis](https://CRAN.R-project.org/package=usethis)*, + *[remotes](https://CRAN.R-project.org/package=remotes)*, + *[sysreqs](https://github.com/r-hub/sysreqs)* and + *[rcmdcheck](https://CRAN.R-project.org/package=rcmdcheck)* customized + to use [Bioconductor’s docker + containers](https://www.bioconductor.org/help/docker/) and + *[BiocCheck](https://bioconductor.org/packages/3.19/BiocCheck)*. +- Code coverage assessment is possible thanks to + [codecov](https://codecov.io/gh) and + *[covr](https://CRAN.R-project.org/package=covr)*. +- The [documentation + website](http://lieberinstitute.github.io/spatialLIBD) is + automatically updated thanks to + *[pkgdown](https://CRAN.R-project.org/package=pkgdown)*. +- The code is styled automatically thanks to + *[styler](https://CRAN.R-project.org/package=styler)*. +- The documentation is formatted thanks to + *[devtools](https://CRAN.R-project.org/package=devtools)* and + *[roxygen2](https://CRAN.R-project.org/package=roxygen2)*. + +For more details, check the `dev` directory. + +This package was developed using +*[biocthis](https://bioconductor.org/packages/3.19/biocthis)*. + + + +
+ +
+ + + diff --git a/inst/app/www/documentation_sce_layer.md b/inst/app/www/documentation_sce_layer.md index d2b31881..5b6660c7 100644 --- a/inst/app/www/documentation_sce_layer.md +++ b/inst/app/www/documentation_sce_layer.md @@ -1,15 +1,15 @@ Layer-level `spatialLIBD` documentation ======================================= -This document describes the layer-level portion of the shiny web application made by the [`spatialLIBD`](https://bioconductor.org/packages/spatialLIBD) Bioconductor package. You can either find the documentation about this package through [Bioconductor](https://bioconductor.org/packages/spatialLIBD) or at the [`spatialLIBD` documentation website](http://lieberinstitute.github.io/spatialLIBD). Below we explain the options common across tabs and each of the tabs at the layer-level data. As explained in the documentation, the layer-level data is the result of pseudo-bulking the spot-level data to compress it, reduce sparsity and power more analyses. +This document describes the layer-level portion of the shiny web application made by the [`spatialLIBD`](https://bioconductor.org/packages/spatialLIBD) Bioconductor package. You can either find the documentation about this package through [Bioconductor](https://bioconductor.org/packages/spatialLIBD) or at the [`spatialLIBD` documentation website](http://lieberinstitute.github.io/spatialLIBD). Below we explain the options common across tabs and each of the tabs at the layer-level data. As explained in the documentation, the layer-level data is the result of pseudo-bulking the spot-level data to compress it, reduce sparsity, and power more analyses. ## Slides and videos You might find the following slides useful for understanding the features from this part of the web application. Particularly slides 10-12 and 15-22. - + -These slides were part of our 2021-04-27 webinar for BioTuring that you can watch from [their website](https://bioturing.com/sources/webinar/60752954a433e26dd8affcbd) or YouTube: +These slides were part of our 2021-04-27 webinar for BioTuring that you can watch on YouTube: @@ -79,7 +79,7 @@ spatialLIBD::layer_boxplot() ## Gene Set Enrichment -This tab allows you to upload a CSV file that has a particular format as illustrated [in this example file](https://github.com/LieberInstitute/spatialLIBD/blob/master/data-raw/asd_sfari_geneList.csv). This CSV file should contain: +This tab allows you to upload a CSV file that has a particular format as illustrated [in this example file](https://github.com/LieberInstitute/spatialLIBD/blob/devel/data-raw/asd_sfari_geneList.csv). This CSV file should contain: * one column per gene set of interest labeled as column names on the first row, * no row names, @@ -97,7 +97,7 @@ spatialLIBD::gene_set_enrichment_plot() ## Spatial registration -If you have a single nucleus or single cell RNA-sequencing (snRNA-seq) (scRNA-seq) dataset, you might group your cells into clusters. Once you do, you could compress the data by pseudo-bulking (like we did to go from `spe` to `sce_layer`). You could then compute `enrichment` (`pairwise`, `anova`) statistics for your cell clusters. If you do so, you can then upload a specially formatted CSV file just like the one in [this example file](https://github.com/LieberInstitute/spatialLIBD/blob/master/data-raw/tstats_Human_DLPFC_snRNAseq_Nguyen_topLayer.csv). This file has: +If you have a single nucleus or single cell RNA-sequencing (snRNA-seq) (scRNA-seq) dataset, you might group your cells into clusters. Once you do, you could compress the data by pseudo-bulking (like we did to go from `spe` to `sce_layer`). You could then compute `enrichment` (`pairwise`, `anova`) statistics for your cell clusters. If you do so, you can then upload a specially formatted CSV file just like the one in [this example file](https://github.com/LieberInstitute/spatialLIBD/blob/devel/data-raw/tstats_Human_DLPFC_snRNAseq_Nguyen_topLayer.csv). This file has: * column names, * human Ensembl gene IDs as the row names (first column, no name for the column), diff --git a/inst/app/www/documentation_spe.md b/inst/app/www/documentation_spe.md index 0daae913..61bc610c 100644 --- a/inst/app/www/documentation_spe.md +++ b/inst/app/www/documentation_spe.md @@ -7,9 +7,9 @@ This document describes the spot-level portion of the shiny web application made You might find the following slides useful for understanding the features from this part of the web application. - + -These slides were part of our 2021-04-27 webinar for BioTuring that you can watch from [their website](https://bioturing.com/sources/webinar/60752954a433e26dd8affcbd) or YouTube: +These slides were part of our 2021-04-27 webinar for BioTuring that you can watch on YouTube: @@ -43,7 +43,11 @@ Throughout the rest of this document, we'll refer to this object by the name `sp - resulting from using a shared nearest neighbors approach with 50 neighbors cut at 4 up to 28 clusters. These are `SNN_k50_k4` up to `SNN_k50_k28`. - described in Figure 7 from our paper (DOI: [10.1038/s41593-020-00787-0](https://doi.org/10.1038/s41593-020-00787-0)) such as `SpatialDE_PCA`, `SpatialDE_pool_PCA` and others. * `Reduced dimensions`: which reduced dimension to visualize on the `clusters (interactive)` tab. Only the first two dimensions will be shown. -* `Continuous variable to plot`: which gene or continuous variable (such as the cell count, the ratio of the mitochondrial chromosome expression) to visualize in the gene tabs as well as on the `clusters (interactive)` tab. +* `Continuous variable(s) to plot`: which gene(s) or continuous variable(s) (such as the cell count, the ratio of the mitochondrial chromosome expression) to visualize in the gene tabs as well as on the `clusters (interactive)` tab. Multiple choices may be selected, in which case "Multi-gene method" controls the method used to combine information from all selected variables. +* `Multi-gene method`: when selecting more than one continuous variable, the method used to combine information from all selected variables. See [the multi gene plots vignette](https://research.libd.org/spatialLIBD/articles/multi_gene_plots.html) for more information about these methods for combining multiple continuous variables. + * `z_score`: to summarize multiple continuous variables, each is normalized to represent a Z-score. The multiple scores are then averaged. + * `pca`: PCA dimension reduction is conducted on the matrix formed by the continuous variables, and the first PC is then used and multiplied by -1 if needed to have the majority of the values for PC1 to be positive. + * `sparsity`: the proportion of continuous variables with positive values for each spot is computed. * `Gene scale`: whether to use the raw expression values (`counts`) or the scaled and log transformed values (`logcounts`). * `Image name`: the name of the background image to use. You can edit this image on the `Edit image` tab. * `Spot transparency level`: the transparency of the spots in the visualizations. It can be useful if the spot colors are blocking the background image. diff --git a/inst/extdata/metadata_Visium_SPG_AD.csv b/inst/extdata/metadata_Visium_SPG_AD.csv new file mode 100644 index 00000000..7fb11177 --- /dev/null +++ b/inst/extdata/metadata_Visium_SPG_AD.csv @@ -0,0 +1,5 @@ +"Title","Description","BiocVersion","Genome","SourceType","SourceUrl","SourceVersion","Species","TaxonomyId","Coordinate_1_based","DataProvider","Maintainer","RDataClass","DispatchClass","RDataPath","Tags" +"Visium_SPG_AD_Visium_wholegenome_spe","SpatialExperiment object at the spot-level for the Visium_SPG_AD human brain (inferior temporal cortex ITC) Alzheimer's Disease (AD) spatial transcriptomics data (n = 10) from the Visium Spatial Proteogenomics (Visium-SPG) platform from 10x Genomics (at the whole genome level) generated by the Lieber Institute for Brain Development (LIBD) and available through the spatialLIBD Bioconductor package.","3.17","GRCh38","GTF","https://bioconductor.org/packages/spatialLIBD","Mar 17 2023","Homo sapiens",9606,TRUE,"LIBD","Leonardo Collado-Torres ","SpatialExperiment","Rda","spatialLIBD/spatialLIBD_files/Visium_SPG_AD_spe_wholegenome.Rdata","Visium_SPG_AD_Alzheimer_Disease_ITC_spatialLIBD" +"Visium_SPG_AD_Visium_targeted_spe","SpatialExperiment object at the spot-level for the Visium_SPG_AD human brain (inferior temporal cortex ITC) Alzheimer's Disease (AD) spatial transcriptomics data (n = 10) from the Visium Spatial Proteogenomics (Visium-SPG) platform from 10x Genomics (with a targeted sequencing panel) generated by the Lieber Institute for Brain Development (LIBD) and available through the spatialLIBD Bioconductor package.","3.17","GRCh38","GTF","https://bioconductor.org/packages/spatialLIBD","Mar 17 2023","Homo sapiens",9606,TRUE,"LIBD","Leonardo Collado-Torres ","SpatialExperiment","Rda","spatialLIBD/spatialLIBD_files/Visium_SPG_AD_spe_targeted.Rdata","Visium_SPG_AD_Alzheimer_Disease_ITC_spatialLIBD" +"Visium_SPG_AD_Visium_wholegenome_pseudobulk_spe","Pseudo-bulked SingleCellExperiment object (SpatialExperiment object without spatial data) for the seven Alzheimer's Disease (AD) pathology levels from the Visium_SPG_AD human brain (inferior temporal cortex ITC) Alzheimer's Disease (AD) spatial transcriptomics data (n = 10) from the Visium Spatial Proteogenomics (Visium-SPG) platform from 10x Genomics (at the whole genome level) generated by the Lieber Institute for Brain Development (LIBD) and available through the spatialLIBD Bioconductor package.","3.17","GRCh38","GTF","https://bioconductor.org/packages/spatialLIBD","Mar 17 2023","Homo sapiens",9606,TRUE,"LIBD","Leonardo Collado-Torres ","SpatialExperiment","Rds","spatialLIBD/spatialLIBD_files/sce_pseudo_pathology_wholegenome.rds","Visium_SPG_AD_Alzheimer_Disease_ITC_spatialLIBD" +"Visium_SPG_AD_Visium_wholegenome_modeling_results","List of modeling results for the seven Alzheimer's Disease (AD) pathology levels from the Visium_SPG_AD human brain (inferior temporal cortex ITC) Alzheimer's Disease (AD) spatial transcriptomics data (n = 10) from the Visium Spatial Proteogenomics (Visium-SPG) platform from 10x Genomics (at the whole genome level) generated by the Lieber Institute for Brain Development (LIBD) and available through the spatialLIBD Bioconductor package.","3.17","GRCh38","GTF","https://bioconductor.org/packages/spatialLIBD","Mar 17 2023","Homo sapiens",9606,TRUE,"LIBD","Leonardo Collado-Torres ","list","Rds","spatialLIBD/spatialLIBD_files/Visium_IF_AD_modeling_results.Rdata","Visium_SPG_AD_Alzheimer_Disease_ITC_spatialLIBD" diff --git a/inst/extdata/metadata_spatialDLPFC.csv b/inst/extdata/metadata_spatialDLPFC.csv index f5d66b39..bcb2b822 100644 --- a/inst/extdata/metadata_spatialDLPFC.csv +++ b/inst/extdata/metadata_spatialDLPFC.csv @@ -1,6 +1,7 @@ "Title","Description","BiocVersion","Genome","SourceType","SourceUrl","SourceVersion","Species","TaxonomyId","Coordinate_1_based","DataProvider","Maintainer","RDataClass","DispatchClass","RDataPath","Tags" -"spatialDLPFC_Visium_spe","SpatialExperiment object at the spot-level for the spatialDLPFC human brain (DLPFC) spatial transcriptomics data (n = 30) from the Visium platform from 10x Genomics generated by the Lieber Institute for Brain Development (LIBD) and available through the spatialLIBD Bioconductor package.","3.17","GRCh38","GTF","https://bioconductor.org/packages/spatialLIBD","Feb 13 2022","Homo sapiens",9606,TRUE,"LIBD","Leonardo Collado-Torres ","SpatialExperiment","Rds","spatialLIBD/spatialLIBD_files/spe_filtered_final_with_clusters_and_deconvolution_results.rds","spatialDLPFC_Visium_VisiumSPG_snRNAseq_spatialLIBD" -"spatialDLPFC_Visium_pseudobulk_spe","Pseudo-bulked SingleCellExperiment object (SpatialExperiment object without spatial data) at the Sp09D spatial domain resolution for the spatialDLPFC human brain (DLPFC) spatial transcriptomics data (n = 30) from the Visium platform from 10x Genomics generated by the Lieber Institute for Brain Development (LIBD) and available through the spatialLIBD Bioconductor package.","3.17","GRCh38","GTF","https://bioconductor.org/packages/spatialLIBD","Feb 13 2022","Homo sapiens",9606,TRUE,"LIBD","Leonardo Collado-Torres ","SpatialExperiment","Rds","spatialLIBD/spatialLIBD_files/sce_pseudo_BayesSpace_k09.rds","spatialDLPFC_Visium_VisiumSPG_snRNAseq_spatialLIBD" -"spatialDLPFC_Visium_modeling_results","List of modeling results at the Sp09D spatial domain resolution for the spatialDLPFC human brain (DLPFC) spatial transcriptomics data (n = 30) from the Visium platform from 10x Genomics generated by the Lieber Institute for Brain Development (LIBD) and available through the spatialLIBD Bioconductor package.","3.17","GRCh38","GTF","https://bioconductor.org/packages/spatialLIBD","Feb 13 2022","Homo sapiens",9606,TRUE,"LIBD","Leonardo Collado-Torres ","list","Rda","spatialLIBD/spatialLIBD_files/modeling_results_BayesSpace_k09.Rdata","spatialDLPFC_Visium_VisiumSPG_snRNAseq_spatialLIBD" -"spatialDLPFC_Visium_SPG","SpatialExperiment object at the spot-level for the spatialDLPFC human brain (DLPFC) spatial transcriptomics data (n = 4) from the Visium Spatial Proteogenomics (Visium-SPG) platform from 10x Genomics generated by the Lieber Institute for Brain Development (LIBD) and available through the spatialLIBD Bioconductor package.","3.17","GRCh38","GTF","https://bioconductor.org/packages/spatialLIBD","Feb 13 2022","Homo sapiens",9606,TRUE,"LIBD","Leonardo Collado-Torres ","SpatialExperiment","Rds","spatialLIBD/spatialLIBD_files/spe.rds","spatialDLPFC_Visium_VisiumSPG_snRNAseq_spatialLIBD" -"spatialDLPFC_snRNAseq","SingleCellExperiment object for the spatialDLPFC human brain (DLPFC) single nucleus transcriptomics data (snRNA-seq, n = 19) from the Chromium platform from 10x Genomics generated by the Lieber Institute for Brain Development (LIBD) and available through the spatialLIBD Bioconductor package.","3.17","GRCh38","GTF","https://bioconductor.org/packages/spatialLIBD","Feb 13 2022","Homo sapiens",9606,TRUE,"LIBD","Leonardo Collado-Torres ","SingleCellExperiment","Rds","spatialLIBD/spatialLIBD_files/TBD.rds","spatialDLPFC_Visium_VisiumSPG_snRNAseq_spatialLIBD" +"spatialDLPFC_Visium_spe","SpatialExperiment object at the spot-level for the spatialDLPFC human brain (DLPFC) spatial transcriptomics data (n = 30) from the Visium platform from 10x Genomics generated by the Lieber Institute for Brain Development (LIBD) and available through the spatialLIBD Bioconductor package.","3.17","GRCh38","GTF","https://bioconductor.org/packages/spatialLIBD","Mar 17 2023","Homo sapiens",9606,TRUE,"LIBD","Leonardo Collado-Torres ","SpatialExperiment","Rds","spatialLIBD/spatialLIBD_files/spe_filtered_final_with_clusters_and_deconvolution_results.rds","spatialDLPFC_Visium_VisiumSPG_snRNAseq_spatialLIBD" +"spatialDLPFC_Visium_example_subset","SpatialExperiment object at the spot-level for the spatialDLPFC human brain (DLPFC) spatial transcriptomics data (n = 30) from the Visium platform from 10x Genomics generated by the Lieber Institute for Brain Development (LIBD) and available through the spatialLIBD Bioconductor package. Subsetted to just 3 samples with only the lowres images for example purposes.","3.17","GRCh38","GTF","https://bioconductor.org/packages/spatialLIBD","Mar 17 2023","Homo sapiens",9606,TRUE,"LIBD","Leonardo Collado-Torres ","SpatialExperiment","Rds","spatialLIBD/spatialLIBD_files/spatialDLPFC_spe_subset_example.rds","spatialDLPFC_Visium_VisiumSPG_snRNAseq_spatialLIBD" +"spatialDLPFC_Visium_pseudobulk_spe","Pseudo-bulked SingleCellExperiment object (SpatialExperiment object without spatial data) at the Sp09D spatial domain resolution for the spatialDLPFC human brain (DLPFC) spatial transcriptomics data (n = 30) from the Visium platform from 10x Genomics generated by the Lieber Institute for Brain Development (LIBD) and available through the spatialLIBD Bioconductor package.","3.17","GRCh38","GTF","https://bioconductor.org/packages/spatialLIBD","Mar 17 2023","Homo sapiens",9606,TRUE,"LIBD","Leonardo Collado-Torres ","SpatialExperiment","Rds","spatialLIBD/spatialLIBD_files/sce_pseudo_BayesSpace_k09.rds","spatialDLPFC_Visium_VisiumSPG_snRNAseq_spatialLIBD" +"spatialDLPFC_Visium_modeling_results","List of modeling results at the Sp09D spatial domain resolution for the spatialDLPFC human brain (DLPFC) spatial transcriptomics data (n = 30) from the Visium platform from 10x Genomics generated by the Lieber Institute for Brain Development (LIBD) and available through the spatialLIBD Bioconductor package.","3.17","GRCh38","GTF","https://bioconductor.org/packages/spatialLIBD","Mar 17 2023","Homo sapiens",9606,TRUE,"LIBD","Leonardo Collado-Torres ","list","Rda","spatialLIBD/spatialLIBD_files/modeling_results_BayesSpace_k09.Rdata","spatialDLPFC_Visium_VisiumSPG_snRNAseq_spatialLIBD" +"spatialDLPFC_Visium_SPG","SpatialExperiment object at the spot-level for the spatialDLPFC human brain (DLPFC) spatial transcriptomics data (n = 4) from the Visium Spatial Proteogenomics (Visium-SPG) platform from 10x Genomics generated by the Lieber Institute for Brain Development (LIBD) and available through the spatialLIBD Bioconductor package.","3.17","GRCh38","GTF","https://bioconductor.org/packages/spatialLIBD","Mar 17 2023","Homo sapiens",9606,TRUE,"LIBD","Leonardo Collado-Torres ","SpatialExperiment","Rds","spatialLIBD/spatialLIBD_files/spe.rds","spatialDLPFC_Visium_VisiumSPG_snRNAseq_spatialLIBD" +"spatialDLPFC_snRNAseq","SingleCellExperiment object for the spatialDLPFC human brain (DLPFC) single nucleus transcriptomics data (snRNA-seq, n = 19) from the Chromium platform from 10x Genomics generated by the Lieber Institute for Brain Development (LIBD) and available through the spatialLIBD Bioconductor package.","3.17","GRCh38","GTF","https://bioconductor.org/packages/spatialLIBD","Mar 17 2023","Homo sapiens",9606,TRUE,"LIBD","Leonardo Collado-Torres ","SingleCellExperiment","FilePath","spatialLIBD/spatialLIBD_files/sce_DLPFC_annotated.zip","spatialDLPFC_Visium_VisiumSPG_snRNAseq_spatialLIBD" diff --git a/inst/extdata/metadata_visiumStitched_brain.csv b/inst/extdata/metadata_visiumStitched_brain.csv new file mode 100644 index 00000000..b2f440fa --- /dev/null +++ b/inst/extdata/metadata_visiumStitched_brain.csv @@ -0,0 +1,4 @@ +Title,Description,BiocVersion,Genome,SourceType,SourceUrl,SourceVersion,Species,TaxonomyId,Coordinate_1_based,DataProvider,Maintainer,RDataClass,DispatchClass,RDataPath,Tags +visiumStitched_brain_spe,SpatialExperiment object at the spot-level for the spatially stitched human brain spatial transcriptomics data (n = 3) from the Visium platform from 10x Genomics generated by the Lieber Institute for Brain Development (LIBD) and available through the spatialLIBD Bioconductor package.,3.19,GRCh38,GTF,https://bioconductor.org/packages/spatialLIBD,June 11 2024,Homo sapiens,9606,TRUE,LIBD,Leonardo Collado-Torres ,SpatialExperiment,Rds,spatialLIBD/spatialLIBD_files/visiumStitched_brain_spe.rds,visiumStitched_brain_spatialLIBD +visiumStitched_brain_spaceranger,Spaceranger outputs for the spatially stitched human brain (spatial transcriptomics data (n = 3) from the Visium platform from 10x Genomics generated by the Lieber Institute for Brain Development (LIBD) and available through the spatialLIBD Bioconductor package. Can be used with visiumStitched::build_spe() to construct a SpatialExperiment.,3.19,GRCh38,Zip,https://bioconductor.org/packages/spatialLIBD,June 11 2024,Homo sapiens,9606,TRUE,LIBD,Leonardo Collado-Torres ,list,Zip,spatialLIBD/spatialLIBD_files/visiumStitched_brain_spaceranger.zip,visiumStitched_brain_spatialLIBD +visiumStitched_brain_Fiji_out,Stitched PNG image and XML file from aligning human brain spatial transcriptomics data (n = 3) in Fiji. Data to align was from the Visium platform from 10x Genomics generated by the Lieber Institute for Brain Development (LIBD) and is available through the spatialLIBD Bioconductor package.,3.19,GRCh38,Zip,https://bioconductor.org/packages/spatialLIBD,June 11 2024,Homo sapiens,9606,TRUE,LIBD,Leonardo Collado-Torres ,list,Zip,spatialLIBD/spatialLIBD_files/visiumStitched_brain_imagej_out.zip,visiumStitched_brain_spatialLIBD diff --git a/inst/scripts/make-metadata_Visium_SPG_AD.R b/inst/scripts/make-metadata_Visium_SPG_AD.R new file mode 100644 index 00000000..913c0d7c --- /dev/null +++ b/inst/scripts/make-metadata_Visium_SPG_AD.R @@ -0,0 +1,70 @@ +library("here") +library("sessioninfo") + + +outdir <- "spatialLIBD_files" +pkgname <- "spatialLIBD" + + +meta <- data.frame( + Title = c( + "Visium_SPG_AD_Visium_wholegenome_spe", + "Visium_SPG_AD_Visium_targeted_spe", + "Visium_SPG_AD_Visium_wholegenome_pseudobulk_spe", + "Visium_SPG_AD_Visium_wholegenome_modeling_results" + ), + Description = c( + "SpatialExperiment object at the spot-level for the Visium_SPG_AD human brain (inferior temporal cortex ITC) Alzheimer's Disease (AD) spatial transcriptomics data (n = 10) from the Visium Spatial Proteogenomics (Visium-SPG) platform from 10x Genomics (at the whole genome level) generated by the Lieber Institute for Brain Development (LIBD) and available through the spatialLIBD Bioconductor package.", + "SpatialExperiment object at the spot-level for the Visium_SPG_AD human brain (inferior temporal cortex ITC) Alzheimer's Disease (AD) spatial transcriptomics data (n = 10) from the Visium Spatial Proteogenomics (Visium-SPG) platform from 10x Genomics (with a targeted sequencing panel) generated by the Lieber Institute for Brain Development (LIBD) and available through the spatialLIBD Bioconductor package.", + "Pseudo-bulked SingleCellExperiment object (SpatialExperiment object without spatial data) for the seven Alzheimer's Disease (AD) pathology levels from the Visium_SPG_AD human brain (inferior temporal cortex ITC) Alzheimer's Disease (AD) spatial transcriptomics data (n = 10) from the Visium Spatial Proteogenomics (Visium-SPG) platform from 10x Genomics (at the whole genome level) generated by the Lieber Institute for Brain Development (LIBD) and available through the spatialLIBD Bioconductor package.", + "List of modeling results for the seven Alzheimer's Disease (AD) pathology levels from the Visium_SPG_AD human brain (inferior temporal cortex ITC) Alzheimer's Disease (AD) spatial transcriptomics data (n = 10) from the Visium Spatial Proteogenomics (Visium-SPG) platform from 10x Genomics (at the whole genome level) generated by the Lieber Institute for Brain Development (LIBD) and available through the spatialLIBD Bioconductor package." + ), + BiocVersion = "3.17", + Genome = "GRCh38", + SourceType = "GTF", + SourceUrl = "https://bioconductor.org/packages/spatialLIBD", + SourceVersion = "Mar 17 2023", + Species = "Homo sapiens", + TaxonomyId = 9606, + Coordinate_1_based = TRUE, + DataProvider = "LIBD", + Maintainer = "Leonardo Collado-Torres ", + RDataClass = c( + "SpatialExperiment", + "SpatialExperiment", + "SpatialExperiment", + "list" + ), + DispatchClass = c("Rda", "Rda", "Rds", "Rds"), + RDataPath = file.path( + pkgname, + outdir, + c( + "Visium_SPG_AD_spe_wholegenome.Rdata", + "Visium_SPG_AD_spe_targeted.Rdata", + "sce_pseudo_pathology_wholegenome.rds", + "Visium_IF_AD_modeling_results.Rdata" + ) + ), + Tags = "Visium_SPG_AD_Alzheimer_Disease_ITC_spatialLIBD", + row.names = NULL, + stringsAsFactors = FALSE +) + +write.csv( + meta, + file = here::here("inst", "extdata", "metadata_Visium_SPG_AD.csv"), + row.names = FALSE +) + +## Check +if (FALSE) { + AnnotationHubData::makeAnnotationHubMetadata(here::here(), fileName = "metadata_Visium_SPG_AD.csv") +} + +## Reproducibility information +print("Reproducibility information:") +Sys.time() +proc.time() +options(width = 120) +session_info() diff --git a/inst/scripts/make-metadata_spatialDLPFC.R b/inst/scripts/make-metadata_spatialDLPFC.R index f502414a..b728b856 100644 --- a/inst/scripts/make-metadata_spatialDLPFC.R +++ b/inst/scripts/make-metadata_spatialDLPFC.R @@ -9,6 +9,7 @@ pkgname <- "spatialLIBD" meta <- data.frame( Title = c( "spatialDLPFC_Visium_spe", + "spatialDLPFC_Visium_example_subset", "spatialDLPFC_Visium_pseudobulk_spe", "spatialDLPFC_Visium_modeling_results", "spatialDLPFC_Visium_SPG", @@ -16,6 +17,7 @@ meta <- data.frame( ), Description = c( "SpatialExperiment object at the spot-level for the spatialDLPFC human brain (DLPFC) spatial transcriptomics data (n = 30) from the Visium platform from 10x Genomics generated by the Lieber Institute for Brain Development (LIBD) and available through the spatialLIBD Bioconductor package.", + "SpatialExperiment object at the spot-level for the spatialDLPFC human brain (DLPFC) spatial transcriptomics data (n = 30) from the Visium platform from 10x Genomics generated by the Lieber Institute for Brain Development (LIBD) and available through the spatialLIBD Bioconductor package. Subsetted to just 3 samples with only the lowres images for example purposes.", "Pseudo-bulked SingleCellExperiment object (SpatialExperiment object without spatial data) at the Sp09D spatial domain resolution for the spatialDLPFC human brain (DLPFC) spatial transcriptomics data (n = 30) from the Visium platform from 10x Genomics generated by the Lieber Institute for Brain Development (LIBD) and available through the spatialLIBD Bioconductor package.", "List of modeling results at the Sp09D spatial domain resolution for the spatialDLPFC human brain (DLPFC) spatial transcriptomics data (n = 30) from the Visium platform from 10x Genomics generated by the Lieber Institute for Brain Development (LIBD) and available through the spatialLIBD Bioconductor package.", "SpatialExperiment object at the spot-level for the spatialDLPFC human brain (DLPFC) spatial transcriptomics data (n = 4) from the Visium Spatial Proteogenomics (Visium-SPG) platform from 10x Genomics generated by the Lieber Institute for Brain Development (LIBD) and available through the spatialLIBD Bioconductor package.", @@ -25,29 +27,31 @@ meta <- data.frame( Genome = "GRCh38", SourceType = "GTF", SourceUrl = "https://bioconductor.org/packages/spatialLIBD", - SourceVersion = "Feb 13 2022", + SourceVersion = "Mar 17 2023", Species = "Homo sapiens", TaxonomyId = 9606, Coordinate_1_based = TRUE, DataProvider = "LIBD", Maintainer = "Leonardo Collado-Torres ", RDataClass = c( + "SpatialExperiment", "SpatialExperiment", "SpatialExperiment", "list", "SpatialExperiment", "SingleCellExperiment" ), - DispatchClass = c("Rds", "Rds", "Rda", "Rds", "Rds"), + DispatchClass = c("Rds", "Rds", "Rds", "Rda", "Rds", "FilePath"), RDataPath = file.path( pkgname, outdir, c( "spe_filtered_final_with_clusters_and_deconvolution_results.rds", + "spatialDLPFC_spe_subset_example.rds", "sce_pseudo_BayesSpace_k09.rds", "modeling_results_BayesSpace_k09.Rdata", "spe.rds", - "TBD.rds" + "sce_DLPFC_annotated.zip" ) ), Tags = "spatialDLPFC_Visium_VisiumSPG_snRNAseq_spatialLIBD", diff --git a/inst/scripts/make-metadata_visiumStitched_brain.R b/inst/scripts/make-metadata_visiumStitched_brain.R new file mode 100644 index 00000000..249ef1ac --- /dev/null +++ b/inst/scripts/make-metadata_visiumStitched_brain.R @@ -0,0 +1,189 @@ +library(here) +library(sessioninfo) +library(tidyverse) + +outdir <- "spatialLIBD_files" +pkgname <- "spatialLIBD" + +meta <- tibble( + Title = c("visiumStitched_brain_spe", "visiumStitched_brain_spaceranger", "visiumStitched_brain_Fiji_out"), + Description = c( + "SpatialExperiment object at the spot-level for the spatially stitched human brain spatial transcriptomics data (n = 3) from the Visium platform from 10x Genomics generated by the Lieber Institute for Brain Development (LIBD) and available through the spatialLIBD Bioconductor package.", + "Spaceranger outputs for the spatially stitched human brain (spatial transcriptomics data (n = 3) from the Visium platform from 10x Genomics generated by the Lieber Institute for Brain Development (LIBD) and available through the spatialLIBD Bioconductor package. Can be used with visiumStitched::build_spe() to construct a SpatialExperiment.", + "Stitched PNG image and XML file from aligning human brain spatial transcriptomics data (n = 3) in Fiji. Data to align was from the Visium platform from 10x Genomics generated by the Lieber Institute for Brain Development (LIBD) and is available through the spatialLIBD Bioconductor package." + ), + BiocVersion = "3.19", + Genome = "GRCh38", + SourceType = c("GTF", "Zip", "Zip"), + SourceUrl = "https://bioconductor.org/packages/spatialLIBD", + SourceVersion = "June 11 2024", + Species = "Homo sapiens", + TaxonomyId = 9606, + Coordinate_1_based = TRUE, + DataProvider = "LIBD", + Maintainer = "Leonardo Collado-Torres ", + RDataClass = c("SpatialExperiment", "list", "list"), + DispatchClass = c("Rds", "Zip", "Zip"), + RDataPath = file.path( + pkgname, + outdir, + c( + "visiumStitched_brain_spe.rds", "visiumStitched_brain_spaceranger.zip", + "visiumStitched_brain_fiji_out.zip" + ) + ), + Tags = "visiumStitched_brain_spatialLIBD" +) + +write_csv(meta, here("inst", "extdata", "metadata_visiumStitched_brain.csv")) + +## Check interactively +if (FALSE) { + AnnotationHubData::makeAnnotationHubMetadata( + here(), + fileName = "metadata_visiumStitched_brain.csv" + ) +} + +## Reproducibility information +print("Reproducibility information:") +Sys.time() +proc.time() +options(width = 120) +session_info() + +# ─ Session info ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────── +# setting value +# version R version 4.4.0 Patched (2024-05-22 r86590) +# os Rocky Linux 9.2 (Blue Onyx) +# system x86_64, linux-gnu +# ui X11 +# language (EN) +# collate en_US.UTF-8 +# ctype en_US.UTF-8 +# tz US/Eastern +# date 2024-06-11 +# pandoc 3.1.13 @ /jhpce/shared/community/core/conda_R/4.4/bin/pandoc + +# ─ Packages ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── +# package * version date (UTC) lib source +# abind 1.4-5 2016-07-21 [2] CRAN (R 4.4.0) +# AnnotationDbi 1.66.0 2024-05-01 [2] Bioconductor 3.19 (R 4.4.0) +# AnnotationForge 1.46.0 2024-04-30 [2] Bioconductor 3.19 (R 4.4.0) +# AnnotationHub 3.12.0 2024-04-30 [2] Bioconductor 3.19 (R 4.4.0) +# AnnotationHubData 1.34.0 2024-04-30 [1] Bioconductor 3.19 (R 4.4.0) +# Biobase 2.64.0 2024-04-30 [2] Bioconductor 3.19 (R 4.4.0) +# BiocBaseUtils 1.6.0 2024-04-30 [2] Bioconductor 3.19 (R 4.4.0) +# BiocCheck 1.40.0 2024-04-30 [2] Bioconductor 3.19 (R 4.4.0) +# BiocFileCache 2.12.0 2024-04-30 [2] Bioconductor 3.19 (R 4.4.0) +# BiocGenerics 0.50.0 2024-04-30 [2] Bioconductor 3.19 (R 4.4.0) +# BiocIO 1.14.0 2024-04-30 [2] Bioconductor 3.19 (R 4.4.0) +# BiocManager 1.30.23 2024-05-04 [2] CRAN (R 4.4.0) +# BiocParallel 1.38.0 2024-04-30 [2] Bioconductor 3.19 (R 4.4.0) +# BiocVersion 3.19.1 2024-04-17 [2] Bioconductor 3.19 (R 4.4.0) +# biocViews 1.72.0 2024-04-30 [2] Bioconductor 3.19 (R 4.4.0) +# biomaRt 2.60.0 2024-04-30 [2] Bioconductor 3.19 (R 4.4.0) +# Biostrings 2.72.0 2024-04-30 [2] Bioconductor 3.19 (R 4.4.0) +# bit 4.0.5 2022-11-15 [2] CRAN (R 4.4.0) +# bit64 4.0.5 2020-08-30 [2] CRAN (R 4.4.0) +# bitops 1.0-7 2021-04-24 [2] CRAN (R 4.4.0) +# blob 1.2.4 2023-03-17 [2] CRAN (R 4.4.0) +# cachem 1.1.0 2024-05-16 [2] CRAN (R 4.4.0) +# cli 3.6.2 2023-12-11 [2] CRAN (R 4.4.0) +# codetools 0.2-20 2024-03-31 [3] CRAN (R 4.4.0) +# colorspace 2.1-0 2023-01-23 [2] CRAN (R 4.4.0) +# crayon 1.5.2 2022-09-29 [2] CRAN (R 4.4.0) +# curl 5.2.1 2024-03-01 [2] CRAN (R 4.4.0) +# DBI 1.2.2 2024-02-16 [2] CRAN (R 4.4.0) +# dbplyr 2.5.0 2024-03-19 [2] CRAN (R 4.4.0) +# DelayedArray 0.30.1 2024-05-07 [2] Bioconductor 3.19 (R 4.4.0) +# digest 0.6.35 2024-03-11 [2] CRAN (R 4.4.0) +# dplyr * 1.1.4 2023-11-17 [2] CRAN (R 4.4.0) +# fansi 1.0.6 2023-12-08 [2] CRAN (R 4.4.0) +# fastmap 1.2.0 2024-05-15 [2] CRAN (R 4.4.0) +# filelock 1.0.3 2023-12-11 [2] CRAN (R 4.4.0) +# forcats * 1.0.0 2023-01-29 [2] CRAN (R 4.4.0) +# formatR 1.14 2023-01-17 [2] CRAN (R 4.4.0) +# futile.logger * 1.4.3 2016-07-10 [2] CRAN (R 4.4.0) +# futile.options 1.0.1 2018-04-20 [2] CRAN (R 4.4.0) +# generics 0.1.3 2022-07-05 [2] CRAN (R 4.4.0) +# GenomeInfoDb 1.40.0 2024-04-30 [2] Bioconductor 3.19 (R 4.4.0) +# GenomeInfoDbData 1.2.12 2024-05-23 [2] Bioconductor +# GenomicAlignments 1.40.0 2024-04-30 [2] Bioconductor 3.19 (R 4.4.0) +# GenomicFeatures 1.56.0 2024-04-30 [2] Bioconductor 3.19 (R 4.4.0) +# GenomicRanges 1.56.0 2024-05-01 [2] Bioconductor 3.19 (R 4.4.0) +# ggplot2 * 3.5.1 2024-04-23 [2] CRAN (R 4.4.0) +# glue 1.7.0 2024-01-09 [2] CRAN (R 4.4.0) +# graph 1.82.0 2024-04-30 [2] Bioconductor 3.19 (R 4.4.0) +# gtable 0.3.5 2024-04-22 [2] CRAN (R 4.4.0) +# here * 1.0.1 2020-12-13 [2] CRAN (R 4.4.0) +# hms 1.1.3 2023-03-21 [2] CRAN (R 4.4.0) +# httr 1.4.7 2023-08-15 [2] CRAN (R 4.4.0) +# httr2 1.0.1 2024-04-01 [2] CRAN (R 4.4.0) +# IRanges 2.38.0 2024-04-30 [2] Bioconductor 3.19 (R 4.4.0) +# jsonlite 1.8.8 2023-12-04 [2] CRAN (R 4.4.0) +# KEGGREST 1.44.0 2024-04-30 [2] Bioconductor 3.19 (R 4.4.0) +# knitr 1.46 2024-04-06 [2] CRAN (R 4.4.0) +# lambda.r 1.2.4 2019-09-18 [2] CRAN (R 4.4.0) +# lattice 0.22-6 2024-03-20 [3] CRAN (R 4.4.0) +# lifecycle 1.0.4 2023-11-07 [2] CRAN (R 4.4.0) +# lubridate * 1.9.3 2023-09-27 [2] CRAN (R 4.4.0) +# magrittr 2.0.3 2022-03-30 [2] CRAN (R 4.4.0) +# Matrix 1.7-0 2024-04-26 [3] CRAN (R 4.4.0) +# MatrixGenerics 1.16.0 2024-04-30 [2] Bioconductor 3.19 (R 4.4.0) +# matrixStats 1.3.0 2024-04-11 [2] CRAN (R 4.4.0) +# memoise 2.0.1 2021-11-26 [2] CRAN (R 4.4.0) +# munsell 0.5.1 2024-04-01 [2] CRAN (R 4.4.0) +# OrganismDbi 1.46.0 2024-04-30 [2] Bioconductor 3.19 (R 4.4.0) +# pillar 1.9.0 2023-03-22 [2] CRAN (R 4.4.0) +# pkgconfig 2.0.3 2019-09-22 [2] CRAN (R 4.4.0) +# png 0.1-8 2022-11-29 [2] CRAN (R 4.4.0) +# prettyunits 1.2.0 2023-09-24 [2] CRAN (R 4.4.0) +# progress 1.2.3 2023-12-06 [2] CRAN (R 4.4.0) +# purrr * 1.0.2 2023-08-10 [2] CRAN (R 4.4.0) +# R6 2.5.1 2021-08-19 [2] CRAN (R 4.4.0) +# rappdirs 0.3.3 2021-01-31 [2] CRAN (R 4.4.0) +# RBGL 1.80.0 2024-04-30 [2] Bioconductor 3.19 (R 4.4.0) +# RCurl 1.98-1.14 2024-01-09 [2] CRAN (R 4.4.0) +# readr * 2.1.5 2024-01-10 [2] CRAN (R 4.4.0) +# restfulr 0.0.15 2022-06-16 [2] CRAN (R 4.4.0) +# rjson 0.2.21 2022-01-09 [2] CRAN (R 4.4.0) +# rlang 1.1.3 2024-01-10 [2] CRAN (R 4.4.0) +# rprojroot 2.0.4 2023-11-05 [2] CRAN (R 4.4.0) +# Rsamtools 2.20.0 2024-04-30 [2] Bioconductor 3.19 (R 4.4.0) +# RSQLite 2.3.6 2024-03-31 [2] CRAN (R 4.4.0) +# rtracklayer 1.64.0 2024-04-30 [2] Bioconductor 3.19 (R 4.4.0) +# RUnit 0.4.33 2024-02-22 [2] CRAN (R 4.4.0) +# S4Arrays 1.4.1 2024-05-20 [2] Bioconductor 3.19 (R 4.4.0) +# S4Vectors 0.42.0 2024-04-30 [2] Bioconductor 3.19 (R 4.4.0) +# scales 1.3.0 2023-11-28 [2] CRAN (R 4.4.0) +# sessioninfo * 1.2.2 2021-12-06 [2] CRAN (R 4.4.0) +# SparseArray 1.4.5 2024-05-20 [2] Bioconductor 3.19 (R 4.4.0) +# stringdist 0.9.12 2023-11-28 [2] CRAN (R 4.4.0) +# stringi 1.8.4 2024-05-06 [2] CRAN (R 4.4.0) +# stringr * 1.5.1 2023-11-14 [2] CRAN (R 4.4.0) +# SummarizedExperiment 1.34.0 2024-05-01 [2] Bioconductor 3.19 (R 4.4.0) +# tibble * 3.2.1 2023-03-20 [2] CRAN (R 4.4.0) +# tidyr * 1.3.1 2024-01-24 [2] CRAN (R 4.4.0) +# tidyselect 1.2.1 2024-03-11 [2] CRAN (R 4.4.0) +# tidyverse * 2.0.0 2023-02-22 [2] CRAN (R 4.4.0) +# timechange 0.3.0 2024-01-18 [2] CRAN (R 4.4.0) +# txdbmaker 1.0.0 2024-04-30 [2] Bioconductor 3.19 (R 4.4.0) +# tzdb 0.4.0 2023-05-12 [2] CRAN (R 4.4.0) +# UCSC.utils 1.0.0 2024-04-30 [2] Bioconductor 3.19 (R 4.4.0) +# utf8 1.2.4 2023-10-22 [2] CRAN (R 4.4.0) +# vctrs 0.6.5 2023-12-01 [2] CRAN (R 4.4.0) +# vroom 1.6.5 2023-12-05 [2] CRAN (R 4.4.0) +# withr 3.0.0 2024-01-16 [2] CRAN (R 4.4.0) +# xfun 0.44 2024-05-15 [2] CRAN (R 4.4.0) +# XML 3.99-0.16.1 2024-01-22 [2] CRAN (R 4.4.0) +# xml2 1.3.6 2023-12-04 [2] CRAN (R 4.4.0) +# XVector 0.44.0 2024-04-30 [2] Bioconductor 3.19 (R 4.4.0) +# yaml 2.3.8 2023-12-11 [2] CRAN (R 4.4.0) +# zlibbioc 1.50.0 2024-04-30 [2] Bioconductor 3.19 (R 4.4.0) + +# [1] /users/neagles/R/4.4 +# [2] /jhpce/shared/community/core/conda_R/4.4/R/lib64/R/site-library +# [3] /jhpce/shared/community/core/conda_R/4.4/R/lib64/R/library + +# ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── diff --git a/inst/spe_wrapper_app/www/documentation_spe.md b/inst/spe_wrapper_app/www/documentation_spe.md index fea08d52..45df9d1c 100644 --- a/inst/spe_wrapper_app/www/documentation_spe.md +++ b/inst/spe_wrapper_app/www/documentation_spe.md @@ -7,9 +7,9 @@ This document describes the spot-level portion of the shiny web application made You might find the following slides useful for understanding the features from this part of the web application. - + -These slides were part of our 2021-04-27 webinar for BioTuring that you can watch from [their website](https://bioturing.com/sources/webinar/60752954a433e26dd8affcbd) or YouTube: +These slides were part of our 2021-04-27 webinar for BioTuring that you can watch on YouTube: diff --git a/man/add_images.Rd b/man/add_images.Rd index dfe964fd..6e85ffb5 100644 --- a/man/add_images.Rd +++ b/man/add_images.Rd @@ -14,11 +14,11 @@ add_images( ) } \arguments{ -\item{spe}{Defaults to the output of -\code{fetch_data(type = 'spe')}. This is a +\item{spe}{A \link[SpatialExperiment:SpatialExperiment]{SpatialExperiment-class} -object with the spot-level Visium data and information required for -visualizing the histology. See \code{\link[=fetch_data]{fetch_data()}} for more details.} +object. See \code{\link[=fetch_data]{fetch_data()}} for how to download some example objects or +\code{\link[=read10xVisiumWrapper]{read10xVisiumWrapper()}} to read in \code{spaceranger --count} output files and +build your own \code{spe} object.} \item{image_dir}{A \code{character(1)} specifying a path to a directory containing image files with the pattern \code{sampleID_pattern.png}.} diff --git a/man/add_key.Rd b/man/add_key.Rd index 699aaf67..b13e780f 100644 --- a/man/add_key.Rd +++ b/man/add_key.Rd @@ -31,12 +31,19 @@ if (enough_ram()) { head(spe$key) ## We can clean it + spe$key_original <- spe$key spe$key <- NULL ## and then add it back - head(add_key(spe)$key) + spe <- add_key(spe) + head(spe$key) ## Note that the original 'key' order was 'sample_id'_'barcode' and we' ## have since changed it to 'barcode'_'sample_id'. + + ## Below we restore the original 'key' + spe$key <- spe$key_original + spe$key_original <- NULL + head(spe$key) } } diff --git a/man/add_qc_metrics.Rd b/man/add_qc_metrics.Rd new file mode 100644 index 00000000..7623b2c2 --- /dev/null +++ b/man/add_qc_metrics.Rd @@ -0,0 +1,103 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/add_qc_metrics.R +\name{add_qc_metrics} +\alias{add_qc_metrics} +\title{Quality Control for Spatial Data} +\usage{ +add_qc_metrics(spe, overwrite = FALSE) +} +\arguments{ +\item{spe}{a \link[SpatialExperiment:SpatialExperiment]{SpatialExperiment} +object that has \code{sum_umi}, \code{sum_gene}, \code{expr_chrM_ratio}, and \code{in_tissue} +variables in the \code{colData(spe)}. Note that these are automatically created +when you build your \code{spe} object with \code{spatialLIBD::read10xVisiumWrapper()}.} + +\item{overwrite}{a \code{logical(1)} specifying whether to overwrite the 7 +\code{colData(spe)} columns that this function creates. If set to \code{FALSE} and any +of them are present, the function will return an error.} +} +\value{ +A \link[SpatialExperiment:SpatialExperiment]{SpatialExperiment} +with added quality control information added to the \code{colData()}. +\describe{ +\item{\code{scran_low_lib_size}}{shows spots that have a low library size.} +\item{\code{scran_low_n_features}}{spots with a low number of expressed genes.} +\item{\code{scran_high_Mito_percent}}{spots with a high percent of mitochondrial gene expression.} +\item{\code{scran_discard}}{spots belonging to either \code{scran_low_lib_size}, +\code{scran_low_n_feature}, or \code{scran_high_Mito_percent}.} +\item{\code{edge_spot}}{spots that are automatically detected as the edge spots +of the \code{in_tissue} section.} +\item{\code{edge_distance}}{closest distance in number of spots to either the +vertical or horizontal edge.} +\item{\code{scran_low_lib_size_edge}}{spots that have a low library size and +are an edge spot.} +} +} +\description{ +This function identify spots in a +\link[SpatialExperiment:SpatialExperiment]{SpatialExperiment-class} (SPE) +with outlier quality control values: low \code{sum_umi} or \code{sum_gene}, or high +\code{expr_chrM_ratio}, utilizing \link[scuttle:isOutlier]{scuttle::isOutlier}. Also identifies in-tissue +edge spots and distance to the edge for each spot. +} +\details{ +The initial version of this function lives at +\url{https://github.com/LieberInstitute/Visium_SPG_AD/blob/master/code/07_spot_qc/01_qc_metrics_and_segmentation.R}. +} +\examples{ +## Obtain the necessary data +spe_pre_qc <- fetch_data("spatialDLPFC_Visium_example_subset") + +## For now, we fake out tissue spots in example data +spe_qc <- spe_pre_qc +spe_qc$in_tissue[spe_qc$array_col < 10] <- FALSE + +## adds QC metrics to colData of the spe +spe_qc <- add_qc_metrics(spe_qc, overwrite = TRUE) +vars <- colnames(colData(spe_qc)) +vars[grep("^(scran|edge)", vars)] + +## visualize edge spots +vis_clus(spe_qc, sampleid = "Br6432_ant", clustervar = "edge_spot") + +## specify your own colors +vis_clus( + spe_qc, + sampleid = "Br6432_ant", + clustervar = "edge_spot", + colors = c( + "TRUE" = "lightgreen", + "FALSE" = "pink", + "NA" = "red" + ) +) +vis_gene(spe_qc, sampleid = "Br6432_ant", geneid = "edge_distance", minCount = -1) + +## Visualize scran QC flags + +## Check the spots with low library size as detected by scran::isOutlier() +vis_clus(spe_qc, sample_id = "Br6432_ant", clustervar = "scran_low_lib_size") + +## Violin plot of library size with low library size highlighted in a +## different color. +scater::plotColData(spe_qc[, spe_qc$in_tissue], x = "sample_id", y = "sum_umi", colour_by = "scran_low_lib_size") + +## Check any spots that scran::isOutlier() flagged +vis_clus(spe_qc, sampleid = "Br6432_ant", clustervar = "scran_discard") + +## Low library spots that are on the edge of the tissue +vis_clus(spe_qc, sampleid = "Br6432_ant", clustervar = "scran_low_lib_size_edge") + +## Use `low_library_size` (or other variables) and `edge_distance` as you +## please. +spe_qc$our_low_lib_edge <- spe_qc$scran_low_lib_size & spe_qc$edge_distance < 5 + +vis_clus(spe_qc, sample_id = "Br6432_ant", clustervar = "our_low_lib_edge") + +## Clean up +rm(spe_qc, spe_pre_qc, vars) + +} +\author{ +Louise A. Huuki-Myers +} diff --git a/man/annotate_registered_clusters.Rd b/man/annotate_registered_clusters.Rd index a8305229..41d6245a 100644 --- a/man/annotate_registered_clusters.Rd +++ b/man/annotate_registered_clusters.Rd @@ -64,7 +64,7 @@ annotate_registered_clusters(cor_stats_layer, cutoff_merge_ratio = 1) } \seealso{ Other Layer correlation functions: -\code{\link{layer_stat_cor_plot}()}, -\code{\link{layer_stat_cor}()} +\code{\link{layer_stat_cor}()}, +\code{\link{layer_stat_cor_plot}()} } \concept{Layer correlation functions} diff --git a/man/check_modeling_results.Rd b/man/check_modeling_results.Rd index 38aa3b8b..284c07f3 100644 --- a/man/check_modeling_results.Rd +++ b/man/check_modeling_results.Rd @@ -12,7 +12,8 @@ check_modeling_results(modeling_results) columns \verb{f_stat_*} or \verb{t_stat_*} as well as \verb{p_value_*} and \verb{fdr_*} plus \code{ensembl}. The column name is used to extract the statistic results, the p-values, and the FDR adjusted p-values. Then the \code{ensembl} column is used -for matching in some cases. See \code{\link[=fetch_data]{fetch_data()}} for more details.} +for matching in some cases. See \code{\link[=fetch_data]{fetch_data()}} for more details. Typically +this is the set of reference statistics used in \code{layer_stat_cor()}.} } \value{ The input object if all checks are passed. @@ -32,8 +33,8 @@ xx <- check_modeling_results(modeling_results) } \seealso{ Other Check input functions: -\code{\link{check_sce_layer}()}, \code{\link{check_sce}()}, +\code{\link{check_sce_layer}()}, \code{\link{check_spe}()} } \concept{Check input functions} diff --git a/man/check_sce_layer.Rd b/man/check_sce_layer.Rd index cac2d6c7..d2c9bf32 100644 --- a/man/check_sce_layer.Rd +++ b/man/check_sce_layer.Rd @@ -25,10 +25,11 @@ For more details please check the vignette documentation. } \examples{ -## Obtain the necessary data +## Obtain example data from the HumanPilot project +## (Maynard, Collado-Torres, et al, 2021) if (!exists("sce_layer")) sce_layer <- fetch_data("sce_layer") -## Check the object +## Check the pseudo-bulked data check_sce_layer(sce_layer) } \seealso{ diff --git a/man/check_spe.Rd b/man/check_spe.Rd index ec0a6a5a..577afeec 100644 --- a/man/check_spe.Rd +++ b/man/check_spe.Rd @@ -10,11 +10,11 @@ check_spe( ) } \arguments{ -\item{spe}{Defaults to the output of -\code{fetch_data(type = 'spe')}. This is a +\item{spe}{A \link[SpatialExperiment:SpatialExperiment]{SpatialExperiment-class} -object with the spot-level Visium data and information required for -visualizing the histology. See \code{\link[=fetch_data]{fetch_data()}} for more details.} +object. See \code{\link[=fetch_data]{fetch_data()}} for how to download some example objects or +\code{\link[=read10xVisiumWrapper]{read10xVisiumWrapper()}} to read in \code{spaceranger --count} output files and +build your own \code{spe} object.} \item{variables}{A \code{character()} vector of variable names expected to be present in \code{colData(spe)}.} @@ -39,8 +39,8 @@ if (enough_ram()) { \seealso{ Other Check input functions: \code{\link{check_modeling_results}()}, -\code{\link{check_sce_layer}()}, -\code{\link{check_sce}()} +\code{\link{check_sce}()}, +\code{\link{check_sce_layer}()} } \author{ Brenda Pardo, Leonardo Collado-Torres diff --git a/man/cluster_export.Rd b/man/cluster_export.Rd index 613e5cdf..238cb5aa 100644 --- a/man/cluster_export.Rd +++ b/man/cluster_export.Rd @@ -12,11 +12,11 @@ cluster_export( ) } \arguments{ -\item{spe}{Defaults to the output of -\code{fetch_data(type = 'spe')}. This is a +\item{spe}{A \link[SpatialExperiment:SpatialExperiment]{SpatialExperiment-class} -object with the spot-level Visium data and information required for -visualizing the histology. See \code{\link[=fetch_data]{fetch_data()}} for more details.} +object. See \code{\link[=fetch_data]{fetch_data()}} for how to download some example objects or +\code{\link[=read10xVisiumWrapper]{read10xVisiumWrapper()}} to read in \code{spaceranger --count} output files and +build your own \code{spe} object.} \item{cluster_var}{A \code{character(1)} with the name of the variable you wish to export.} diff --git a/man/cluster_import.Rd b/man/cluster_import.Rd index 778a73d8..2c7288fa 100644 --- a/man/cluster_import.Rd +++ b/man/cluster_import.Rd @@ -12,11 +12,11 @@ cluster_import( ) } \arguments{ -\item{spe}{Defaults to the output of -\code{fetch_data(type = 'spe')}. This is a +\item{spe}{A \link[SpatialExperiment:SpatialExperiment]{SpatialExperiment-class} -object with the spot-level Visium data and information required for -visualizing the histology. See \code{\link[=fetch_data]{fetch_data()}} for more details.} +object. See \code{\link[=fetch_data]{fetch_data()}} for how to download some example objects or +\code{\link[=read10xVisiumWrapper]{read10xVisiumWrapper()}} to read in \code{spaceranger --count} output files and +build your own \code{spe} object.} \item{cluster_dir}{A \code{character(1)} specifying the output directory, similar to the \code{outs/analysis/clustering} produced by SpaceRanger.} diff --git a/man/fetch_data.Rd b/man/fetch_data.Rd index a14c2cc3..8f40a5b7 100644 --- a/man/fetch_data.Rd +++ b/man/fetch_data.Rd @@ -6,9 +6,13 @@ \usage{ fetch_data( type = c("sce", "sce_layer", "modeling_results", "sce_example", "spe", - "spatialDLPFC_Visium", "spatialDLPFC_Visium_pseudobulk", - "spatialDLPFC_Visium_modeling_results", "spatialDLPFC_Visium_SPG", - "spatialDLPFC_snRNAseq"), + "spatialDLPFC_Visium", "spatialDLPFC_Visium_example_subset", + "spatialDLPFC_Visium_pseudobulk", "spatialDLPFC_Visium_modeling_results", + "spatialDLPFC_Visium_SPG", "spatialDLPFC_snRNAseq", + "Visium_SPG_AD_Visium_wholegenome_spe", "Visium_SPG_AD_Visium_targeted_spe", + "Visium_SPG_AD_Visium_wholegenome_pseudobulk_spe", + "Visium_SPG_AD_Visium_wholegenome_modeling_results", "visiumStitched_brain_spe", + "visiumStitched_brain_spaceranger", "visiumStitched_brain_Fiji_out"), destdir = tempdir(), eh = ExperimentHub::ExperimentHub(), bfc = BiocFileCache::BiocFileCache() @@ -26,10 +30,14 @@ object containing the layer-level data (pseudo-bulked from the spot-level), or \code{modeling_results} for the list of tables with the \code{enrichment}, \code{pairwise}, and \code{anova} model results from the layer-level data. It can also be \code{sce_example} which is a reduced version of \code{sce} just for example -purposes. As of BioC version 3.13 \code{spe} downloads a +purposes. The initial version of \code{spatialLIBD} downloaded data only from +\url{https://github.com/LieberInstitute/HumanPilot}. As of BioC version 3.13 +\code{spe} downloads a \link[SpatialExperiment:SpatialExperiment]{SpatialExperiment-class} object. -As of version 1.11.6 this function also allows downloading data from the -\url{http://research.libd.org/spatialDLPFC/} project.} +As of version 1.11.6, this function also allows downloading data from the +\url{http://research.libd.org/spatialDLPFC/} project. As of version 1.11.12, +data from \url{https://github.com/LieberInstitute/Visium_SPG_AD} can be +downloaded.} \item{destdir}{The destination directory to where files will be downloaded to in case the \code{ExperimentHub} resource is not available. If you already @@ -49,13 +57,15 @@ you have to assign to an object. If you didn't you can still avoid re-loading the object by using \code{.Last.value}. } \description{ -This function downloads from \code{ExperimentHub} the dorsolateral prefrontal -cortex (DLPFC) human Visium data and results analyzed by LIBD. If -\code{ExperimentHub} is not available, it will download the files from Dropbox -using \code{\link[utils:download.file]{utils::download.file()}} unless the files are present already at -\code{destdir}. Note that \code{ExperimentHub} will cache the data and automatically -detect if you have previously downloaded it, thus making it the preferred -way to interact with the data. +This function downloads from \code{ExperimentHub} Visium, Visium Spatial +Proteogenomics (Visium-SPG), or single nucleus RNA-seq (snRNA-seq) data +and results analyzed by LIBD from multiple projects. +If \code{ExperimentHub} is not available, this function will +download the files from Dropbox using \code{\link[BiocFileCache:BiocFileCache-class]{BiocFileCache::bfcrpath()}} unless the +files are present already at \code{destdir}. Note that \code{ExperimentHub} and +\code{BiocFileCache} will cache the data and automatically detect if you have +previously downloaded it, thus making it the preferred way to interact with +the data. } \details{ The data was initially prepared by scripts at @@ -70,4 +80,27 @@ if (!exists("sce_layer")) sce_layer <- fetch_data("sce_layer") ## Explore the data sce_layer + +## How to download and load "spatialDLPFC_snRNAseq" +\dontrun{ +sce_path_zip <- fetch_data("spatialDLPFC_snRNAseq") +sce_path <- unzip(sce_path_zip, exdir = tempdir()) +sce <- HDF5Array::loadHDF5SummarizedExperiment( + file.path(tempdir(), "sce_DLPFC_annotated") +) +sce +#> class: SingleCellExperiment +#> dim: 36601 77604 +#> metadata(3): Samples cell_type_colors cell_type_colors_broad +#> assays(2): counts logcounts +#> rownames(36601): MIR1302-2HG FAM138A ... AC007325.4 AC007325.2 +#> rowData names(7): source type ... gene_type binomial_deviance +#> colnames(77604): 1_AAACCCAAGTTCTCTT-1 1_AAACCCACAAGGTCTT-1 ... 19_TTTGTTGTCTCATTGT-1 19_TTTGTTGTCTTAAGGC-1 +#> colData names(32): Sample Barcode ... cellType_layer layer_annotation +#> reducedDimNames(4): GLMPCA_approx TSNE UMAP HARMONY +#> mainExpName: NULL +#> altExpNames(0): +lobstr::obj_size(sce) +#> 172.28 MB +} } diff --git a/man/figures/spatial_registration.png b/man/figures/spatial_registration.png new file mode 100755 index 00000000..4982574e Binary files /dev/null and b/man/figures/spatial_registration.png differ diff --git a/man/frame_limits.Rd b/man/frame_limits.Rd index e7ee4089..9274af88 100644 --- a/man/frame_limits.Rd +++ b/man/frame_limits.Rd @@ -13,11 +13,11 @@ frame_limits( ) } \arguments{ -\item{spe}{Defaults to the output of -\code{fetch_data(type = 'spe')}. This is a +\item{spe}{A \link[SpatialExperiment:SpatialExperiment]{SpatialExperiment-class} -object with the spot-level Visium data and information required for -visualizing the histology. See \code{\link[=fetch_data]{fetch_data()}} for more details.} +object. See \code{\link[=fetch_data]{fetch_data()}} for how to download some example objects or +\code{\link[=read10xVisiumWrapper]{read10xVisiumWrapper()}} to read in \code{spaceranger --count} output files and +build your own \code{spe} object.} \item{sampleid}{A \code{character(1)} specifying which sample to plot from \code{colData(spe)$sample_id} (formerly \code{colData(spe)$sample_name}).} @@ -57,8 +57,8 @@ if (enough_ram()) { } \seealso{ Other Spatial cluster visualization functions: -\code{\link{vis_clus_p}()}, \code{\link{vis_clus}()}, +\code{\link{vis_clus_p}()}, \code{\link{vis_grid_clus}()} } \author{ diff --git a/man/gene_set_enrichment.Rd b/man/gene_set_enrichment.Rd index cf8889a4..04702437 100644 --- a/man/gene_set_enrichment.Rd +++ b/man/gene_set_enrichment.Rd @@ -24,7 +24,8 @@ determining significance among the modeling results genes.} columns \verb{f_stat_*} or \verb{t_stat_*} as well as \verb{p_value_*} and \verb{fdr_*} plus \code{ensembl}. The column name is used to extract the statistic results, the p-values, and the FDR adjusted p-values. Then the \code{ensembl} column is used -for matching in some cases. See \code{\link[=fetch_data]{fetch_data()}} for more details.} +for matching in some cases. See \code{\link[=fetch_data]{fetch_data()}} for more details. Typically +this is the set of reference statistics used in \code{layer_stat_cor()}.} \item{model_type}{A named element of the \code{modeling_results} list. By default that is either \code{enrichment} for the model that tests one human brain layer diff --git a/man/gene_set_enrichment_plot.Rd b/man/gene_set_enrichment_plot.Rd index 3a2cdc2a..8370f5a6 100644 --- a/man/gene_set_enrichment_plot.Rd +++ b/man/gene_set_enrichment_plot.Rd @@ -2,56 +2,70 @@ % Please edit documentation in R/gene_set_enrichment_plot.R \name{gene_set_enrichment_plot} \alias{gene_set_enrichment_plot} -\title{Plot the gene set enrichment results} +\title{Plot the gene set enrichment results with ComplexHeatmap} \usage{ gene_set_enrichment_plot( enrichment, PThresh = 12, ORcut = 3, enrichOnly = FALSE, - gene_count_col = NULL, - gene_count_row = NULL, - anno_title_col = NULL, - anno_title_row = NULL, - column_order = NULL, - anno_add = NULL, - mypal = c("white", (grDevices::colorRampPalette(RColorBrewer::brewer.pal(9, - "YlOrRd")))(50)) + mypal = c("white", RColorBrewer::brewer.pal(9, "YlOrRd")), + plot_SetSize_bar = FALSE, + gene_list_length = NULL, + model_sig_length = NULL, + model_colors = NULL, + ... ) } \arguments{ -\item{enrichment}{The output of \code{\link[=gene_set_enrichment]{gene_set_enrichment()}}. -\code{unique(enrichment$ID)}. Gets passed to \code{\link[=layer_matrix_plot]{layer_matrix_plot()}}.} +\item{enrichment}{The output of \code{\link[=gene_set_enrichment]{gene_set_enrichment()}}.} + +\item{xlabs}{A vector of names in the same order and length as +\code{unique(enrichment$ID)}.} \item{PThresh}{A \code{numeric(1)} specifying the P-value threshold for the maximum value in the \code{-log10(p)} scale.} \item{ORcut}{A \code{numeric(1)} specifying the P-value threshold for the minimum value in the \code{-log10(p)} scale for printing the odds ratio values -in the cells of the resulting plot.} +in the cells of the resulting plot. Defaults to 3 or p-val < 0.001.} \item{enrichOnly}{A \code{logical(1)} indicating whether to show only odds ratio values greater than 1.} -\item{mypal}{A vector with the color palette to use. Gets passed to -\code{\link[=layer_matrix_plot]{layer_matrix_plot()}}.} +\item{mypal}{A \code{character} vector with the color palette to use. Colors will +be in order from 0 to lowest P-val \code{max(-log(enrichment$Pval))}. Defaults to +white, yellow, red pallet.} -\item{layerHeights}{A \code{numeric()} vector of length equal to -\code{length(unique(enrichment$test)) + 1} that starts at 0 specifying where -to plot the y-axis breaks which can be used for re-creating the length of -each brain layer. Gets passed to \code{\link[=layer_matrix_plot]{layer_matrix_plot()}}.} +\item{plot_SetSize_bar}{A \code{logical(1)} indicating whether to plot SetSize +from \code{enrichment} as an \code{anno_barplot} at the top of the heatmap.} -\item{cex}{Passed to \code{\link[=layer_matrix_plot]{layer_matrix_plot()}}.} +\item{gene_list_length}{Optional named \code{numeric} vector indicating the length +of the \code{gene_list} used to calculate \code{enrichment}, if inclided and +\code{plot_setSize_bar = TRUE} then the top \code{anno_barplot} will show the \code{SetSize} +and the difference from the length of the input gene_list. +#' @param model_sig_length Optional named \code{numeric} vector indicating the +number of significant genes in \code{modeling_results} used to calculate +\code{enrichment}. If included \code{anno_barplot} will be added to rows. +#' @param model_colors named \code{character} vector of colors, Adds colors to +row annotations. +#' @param ... Additional parameters passed to +\code{\link[ComplexHeatmap:Heatmap]{ComplexHeatmap::Heatmap()}}.} } \value{ -A plot visualizing the gene set enrichment -odds ratio and p-value results. +A (\link[ComplexHeatmap:Heatmap-class]{Heatmap-class}) visualizing the +gene set enrichment odds ratio and p-value results. } \description{ This function takes the output of \code{\link[=gene_set_enrichment]{gene_set_enrichment()}} and creates a -heatmap visualization of the results. +ComplexHeatmap visualization of the results. Fill of the heatmap represents +the -log10(p-val), Odds-ratios are printed for test that pass specified +significance threshold \code{ORcut}. } \details{ +Includes functionality to plot the size of the input gene sets as barplot +annotations. + Check https://github.com/LieberInstitute/HumanPilot/blob/master/Analysis/Layer_Guesses/check_clinical_gene_sets.R to see a full script from where this family of functions is derived from. @@ -69,7 +83,7 @@ asd_sfari <- utils::read.csv( ) ## Format them appropriately -asd_sfari_geneList <- list( +asd_safari_geneList <- list( Gene_SFARI_all = asd_sfari$ensembl.id, Gene_SFARI_high = asd_sfari$ensembl.id[asd_sfari$gene.score < 3], Gene_SFARI_syndromic = asd_sfari$ensembl.id[asd_sfari$syndromic == 1] @@ -82,30 +96,79 @@ if (!exists("modeling_results")) { ## Compute the gene set enrichment results asd_sfari_enrichment <- gene_set_enrichment( - gene_list = asd_sfari_geneList, + gene_list = asd_safari_geneList, modeling_results = modeling_results, model_type = "enrichment" ) ## Visualize the gene set enrichment results -## with a custom color palette + +## Default plot +gene_set_enrichment_plot( + enrichment = asd_sfari_enrichment +) + +## Use a custom green color palette & use shorter gene set names (x-axis labels) +gene_set_enrichment_plot( + asd_sfari_enrichment, + xlabs = gsub(".*_", "", unique(asd_sfari_enrichment$ID)), + mypal = c("white",RColorBrewer::brewer.pal(9, "BuGn")) +) + +## Add bar plot annotations for SetSize of model genes in the gene_lists +gene_set_enrichment_plot( + asd_sfari_enrichment, + xlabs = gsub(".*_", "", unique(asd_sfari_enrichment$ID)), + plot_SetSize_bar = TRUE +) + +## Add stacked bar plot annotations showing SetSize and difference from the +## length of the input gene_list gene_set_enrichment_plot( asd_sfari_enrichment, xlabs = gsub(".*_", "", unique(asd_sfari_enrichment$ID)), - mypal = c( - "white", - grDevices::colorRampPalette( - RColorBrewer::brewer.pal(9, "BuGn") - )(50) - ) + plot_SetSize_bar = TRUE, + gene_list_length = lapply(asd_safari_geneList, length) ) layer_gene_count <- get_gene_enrichment_count(model_results = modeling_results) +## add bar plot annotations for number of enriched genes from layers +if (!exists("sce_layer")) sce_layer <- fetch_data(type = "sce_layer") +sig_genes <- sig_genes_extract( + modeling_results = modeling_results, + model = "enrichment", + sce_layer = sce_layer, + n = nrow(sce_layer) +) + +sig_genes <- sig_genes[sig_genes$fdr < 0.1,] +n_sig_model <- as.list(table(sig_genes$test)) + +## add barplot with n significant genes from modeling +gene_set_enrichment_plot( + asd_sfari_enrichment, + xlabs = gsub(".*_", "", unique(asd_sfari_enrichment$ID)), + plot_SetSize_bar = TRUE, + model_sig_length = n_sig_model +) + +## add color annotaions +gene_set_enrichment_plot( + asd_sfari_enrichment, + xlabs = gsub(".*_", "", unique(asd_sfari_enrichment$ID)), + plot_SetSize_bar = TRUE, + model_colors = libd_layer_colors +) + +## add barplot with n significant genes from modeling filled with model color gene_set_enrichment_plot( asd_sfari_enrichment, - gene_count_col = sfari_gene_count, - gene_count_row = layer_gene_count + xlabs = gsub(".*_", "", unique(asd_sfari_enrichment$ID)), + plot_SetSize_bar = TRUE, + model_sig_length = n_sig_model, + model_colors = libd_layer_colors ) + } \seealso{ layer_matrix_plot diff --git a/man/get_colors.Rd b/man/get_colors.Rd index ec9a4673..9373c33c 100644 --- a/man/get_colors.Rd +++ b/man/get_colors.Rd @@ -39,4 +39,16 @@ get_colors(clusters = sce_layer$kmeans_k7) ## Example where Polychrome::palette36.colors() gets used get_colors(clusters = letters[seq_len(13)]) + +## What happens if you have a logical variable with NAs? +set.seed(20240712) +log_var <- sample(c(TRUE, FALSE, NA), + 1000, + replace = TRUE, + prob = c(0.3, 0.15, 0.55) +) +log_var_sorted <- sort_clusters(log_var) +## A color does get assigned to 'NA', but will be overwritten by +## 'na_color' passed to `vis_clus_p()` and related functions. +get_colors(colors = NULL, clusters = log_var_sorted) } diff --git a/man/img_edit.Rd b/man/img_edit.Rd index bec5908e..a2392bce 100644 --- a/man/img_edit.Rd +++ b/man/img_edit.Rd @@ -26,11 +26,11 @@ img_edit( ) } \arguments{ -\item{spe}{Defaults to the output of -\code{fetch_data(type = 'spe')}. This is a +\item{spe}{A \link[SpatialExperiment:SpatialExperiment]{SpatialExperiment-class} -object with the spot-level Visium data and information required for -visualizing the histology. See \code{\link[=fetch_data]{fetch_data()}} for more details.} +object. See \code{\link[=fetch_data]{fetch_data()}} for how to download some example objects or +\code{\link[=read10xVisiumWrapper]{read10xVisiumWrapper()}} to read in \code{spaceranger --count} output files and +build your own \code{spe} object.} \item{sampleid}{A \code{character(1)} specifying which sample to plot from \code{colData(spe)$sample_id} (formerly \code{colData(spe)$sample_name}).} @@ -108,7 +108,7 @@ if (enough_ram()) { } \seealso{ Other Image editing functions: -\code{\link{img_update_all}()}, -\code{\link{img_update}()} +\code{\link{img_update}()}, +\code{\link{img_update_all}()} } \concept{Image editing functions} diff --git a/man/img_update.Rd b/man/img_update.Rd index 3d0c1315..b3b1ecdb 100644 --- a/man/img_update.Rd +++ b/man/img_update.Rd @@ -14,11 +14,11 @@ img_update( ) } \arguments{ -\item{spe}{Defaults to the output of -\code{fetch_data(type = 'spe')}. This is a +\item{spe}{A \link[SpatialExperiment:SpatialExperiment]{SpatialExperiment-class} -object with the spot-level Visium data and information required for -visualizing the histology. See \code{\link[=fetch_data]{fetch_data()}} for more details.} +object. See \code{\link[=fetch_data]{fetch_data()}} for how to download some example objects or +\code{\link[=read10xVisiumWrapper]{read10xVisiumWrapper()}} to read in \code{spaceranger --count} output files and +build your own \code{spe} object.} \item{sampleid}{A \code{character(1)} specifying which sample to plot from \code{colData(spe)$sample_id} (formerly \code{colData(spe)$sample_name}).} diff --git a/man/img_update_all.Rd b/man/img_update_all.Rd index 2040e55b..a4d4f36c 100644 --- a/man/img_update_all.Rd +++ b/man/img_update_all.Rd @@ -13,11 +13,11 @@ img_update_all( ) } \arguments{ -\item{spe}{Defaults to the output of -\code{fetch_data(type = 'spe')}. This is a +\item{spe}{A \link[SpatialExperiment:SpatialExperiment]{SpatialExperiment-class} -object with the spot-level Visium data and information required for -visualizing the histology. See \code{\link[=fetch_data]{fetch_data()}} for more details.} +object. See \code{\link[=fetch_data]{fetch_data()}} for how to download some example objects or +\code{\link[=read10xVisiumWrapper]{read10xVisiumWrapper()}} to read in \code{spaceranger --count} output files and +build your own \code{spe} object.} \item{image_id}{A \code{character(1)} with the name of the image ID you want to use in the background.} diff --git a/man/layer_boxplot.Rd b/man/layer_boxplot.Rd index ea3a0cc2..6b09350b 100644 --- a/man/layer_boxplot.Rd +++ b/man/layer_boxplot.Rd @@ -153,7 +153,7 @@ https://github.com/LieberInstitute/HumanPilot/blob/master/Analysis/Layer_Guesses } \seealso{ Other Layer modeling functions: -\code{\link{sig_genes_extract_all}()}, -\code{\link{sig_genes_extract}()} +\code{\link{sig_genes_extract}()}, +\code{\link{sig_genes_extract_all}()} } \concept{Layer modeling functions} diff --git a/man/layer_stat_cor.Rd b/man/layer_stat_cor.Rd index 3801a1d4..8ccf2c0f 100644 --- a/man/layer_stat_cor.Rd +++ b/man/layer_stat_cor.Rd @@ -13,20 +13,26 @@ layer_stat_cor( ) } \arguments{ -\item{stats}{A data.frame where the row names are Ensembl gene IDs, the -column names are labels for clusters of cells or cell types, and where +\item{stats}{A query \code{data.frame} where the row names are ENSEMBL gene IDs, +the column names are labels for clusters of cells or cell types, and where each cell contains the given statistic for that gene and cell type. These statistics should be computed similarly to the modeling results from the data we provide. For example, like the \code{enrichment} t-statistics that are derived from comparing one layer against the rest. The \code{stats} will be -matched and then correlated with our statistics.} +matched and then correlated with the reference statistics. + +If using the output of \code{registration_wrapper()} then use \verb{$enrichment} to +access the results from \code{registration_stats_enrichment()}. This function will +automatically extract the statistics and assign the ENSEMBL gene IDs to the +row names of the query matrix.} \item{modeling_results}{Defaults to the output of \code{fetch_data(type = 'modeling_results')}. This is a list of tables with the columns \verb{f_stat_*} or \verb{t_stat_*} as well as \verb{p_value_*} and \verb{fdr_*} plus \code{ensembl}. The column name is used to extract the statistic results, the p-values, and the FDR adjusted p-values. Then the \code{ensembl} column is used -for matching in some cases. See \code{\link[=fetch_data]{fetch_data()}} for more details.} +for matching in some cases. See \code{\link[=fetch_data]{fetch_data()}} for more details. Typically +this is the set of reference statistics used in \code{layer_stat_cor()}.} \item{model_type}{A named element of the \code{modeling_results} list. By default that is either \code{enrichment} for the model that tests one human brain layer @@ -45,9 +51,9 @@ into \code{layerB-layerA}.} genes. The default is \code{NULL} in which case no filtering is done.} } \value{ -A correlation matrix between \code{stats} and our statistics using only -the Ensembl gene IDs present in both tables. The columns are sorted using -a hierarchical cluster. +A correlation matrix between the query \code{stats} and the reference +statistics using only the ENSEMBL gene IDs present in both tables. +The columns are sorted using hierarchical clustering. } \description{ Layer modeling correlation of statistics diff --git a/man/layer_stat_cor_plot.Rd b/man/layer_stat_cor_plot.Rd index 73496b45..835ceb4e 100644 --- a/man/layer_stat_cor_plot.Rd +++ b/man/layer_stat_cor_plot.Rd @@ -2,98 +2,128 @@ % Please edit documentation in R/layer_stat_cor_plot.R \name{layer_stat_cor_plot} \alias{layer_stat_cor_plot} -\title{Visualize the layer modeling correlation of statistics} +\title{Visualize the correlation of layer modeling t-statistics with ComplexHeatmap} \usage{ layer_stat_cor_plot( cor_stats_layer, - max = 0.81, - min = -max, - layerHeights = NULL, - cex = 1.2 + color_max = max(cor_stats_layer), + color_min = min(cor_stats_layer), + color_scale = c("#762A83", "#F7F7F7", "#1B7837"), + query_colors = NULL, + reference_colors = NULL, + annotation = NULL, + ... ) } \arguments{ \item{cor_stats_layer}{The output of \code{\link[=layer_stat_cor]{layer_stat_cor()}}.} -\item{max}{A \code{numeric(1)} specifying the highest correlation value for the -color scale (should be between 0 and 1).} +\item{color_max}{A \code{numeric(1)} specifying the highest correlation value for +the color scale (should be between 0 and 1).} -\item{min}{A \code{numeric(1)} specifying the lowest correlation value for the -color scale (should be between 0 and -1).} +\item{color_min}{A \code{numeric(1)} specifying the lowest correlation value for +the color scale (should be between 0 and -1).} -\item{layerHeights}{A \code{numeric()} vector of length equal to -\code{ncol(cor_stats_layer) + 1} that starts at 0 specifying where -to plot the y-axis breaks which can be used for re-creating the length of -each brain layer. Gets passed to \code{\link[=layer_matrix_plot]{layer_matrix_plot()}}.} +\item{color_scale}{A \code{character(3)} vector specifying the color scale for the +fill of the heatmap. The first value is used for \code{color_min}, the second one +for zero, and the third for \code{color_max}.} -\item{cex}{Passed to \code{\link[=layer_matrix_plot]{layer_matrix_plot()}}.} +\item{query_colors}{named \code{character} vector of colors, Adds colors to query +row annotations.} + +\item{reference_colors}{named \code{character} vector of colors, Adds colors to +reference column annotations.} + +\item{annotation}{annotation data.frame output of +\code{\link[=annotate_registered_clusters]{annotate_registered_clusters()}}, adds 'X' for good confidence annotations, +'*' for poor confidence.} + +\item{...}{Additional parameters passed to +\code{\link[ComplexHeatmap:Heatmap]{ComplexHeatmap::Heatmap()}} such as \code{cluster_rows} +and \code{cluster_columns}.} } \value{ -A heatmap for the correlation matrix between statistics. +(\link[ComplexHeatmap:Heatmap-class]{Heatmap-class}) plot of t-stat +correlations } \description{ -This function makes a heatmap from the \code{\link[=layer_stat_cor]{layer_stat_cor()}} correlation matrix -between a given set of cell cluster/type statistics derived from scRNA-seq -or snRNA-seq data (among other types) and the layer statistics from the -Human DLPFC Visium data (when using the default arguments). +This function makes a ComplexHeatmap from the correlation matrix +between a reference and query modeling statistics from \code{\link[=layer_stat_cor]{layer_stat_cor()}}. +For example, between the query statistics from a set of cell cluster/types +derived from scRNA-seq or snRNA-seq data (among other types) and the +reference layer statistics from the Human DLPFC Visium data (when using the +default arguments). } \details{ -Check -https://github.com/LieberInstitute/HumanPilot/blob/master/Analysis/Layer_Guesses/dlpfc_snRNAseq_annotation.R -for a full analysis from which this family of functions is derived from. +Includes functionality to add color annotations, +(helpful to match to colors in Visium spot plots), and annotations from +\code{\link[=annotate_registered_clusters]{annotate_registered_clusters()}}. } \examples{ - ## Obtain the necessary data +## reference human pilot modeling results if (!exists("modeling_results")) { modeling_results <- fetch_data(type = "modeling_results") } +## query spatialDLPFC modeling results +query_modeling_results <- fetch_data( + type = "spatialDLPFC_Visium_modeling_results" +) + ## Compute the correlations cor_stats_layer <- layer_stat_cor( - tstats_Human_DLPFC_snRNAseq_Nguyen_topLayer, + stats = query_modeling_results$enrichment, modeling_results, model_type = "enrichment" ) ## Visualize the correlation matrix -layer_stat_cor_plot(cor_stats_layer, max = max(cor_stats_layer)) -## Annotate then re-plot -rownames(cor_stats_layer) <- paste0( - rownames(cor_stats_layer), - " - ", - annotate_registered_clusters(cor_stats_layer)$layer_label +## Default plot with no annotations and defaults for ComplexHeatmap() +layer_stat_cor_plot(cor_stats_layer) + +## add colors +## add libd_layer_colors to reference Human Pilot layers +layer_stat_cor_plot(cor_stats_layer, reference_colors = libd_layer_colors) + +## obtain colors for the query clusters +cluster_colors <- get_colors(clusters = rownames(cor_stats_layer)) +layer_stat_cor_plot(cor_stats_layer, + query_colors = cluster_colors, + reference_colors = libd_layer_colors ) -layer_stat_cor_plot(cor_stats_layer, max = max(cor_stats_layer)) -## Restrict the range of colors further -layer_stat_cor_plot(cor_stats_layer, max = 0.25) +## Apply additional ComplexHeatmap param +layer_stat_cor_plot(cor_stats_layer, + cluster_rows = FALSE, + cluster_columns = FALSE +) -## Repeat with just the top 10 layer marker genes -layer_stat_cor_plot(layer_stat_cor( - tstats_Human_DLPFC_snRNAseq_Nguyen_topLayer, - modeling_results, - model_type = "enrichment", - top_n = 10 -), max = 0.25) +## Add annotation +annotation_df <- annotate_registered_clusters( + cor_stats_layer, + confidence_threshold = .55 +) +layer_stat_cor_plot(cor_stats_layer, annotation = annotation_df) + +## All together +layer_stat_cor_plot( + cor_stats_layer, + query_colors = cluster_colors, + reference_colors = libd_layer_colors, + annotation = annotation_df, + cluster_rows = FALSE, + cluster_columns = FALSE +) -## Now with the "pairwise" modeling results and also top_n = 10 -layer_stat_cor_plot(layer_stat_cor( - tstats_Human_DLPFC_snRNAseq_Nguyen_topLayer, - modeling_results, - model_type = "pairwise", - top_n = 10 -), max = 0.25) } \seealso{ -layer_matrix_plot annotate_registered_clusters - Other Layer correlation functions: \code{\link{annotate_registered_clusters}()}, \code{\link{layer_stat_cor}()} } \author{ -Andrew E Jaffe, Leonardo Collado-Torres +Louise Huuki-Myers } \concept{Layer correlation functions} diff --git a/man/locate_images.Rd b/man/locate_images.Rd index 6041b6c8..58448fb8 100644 --- a/man/locate_images.Rd +++ b/man/locate_images.Rd @@ -7,11 +7,11 @@ locate_images(spe, image_dir, image_pattern) } \arguments{ -\item{spe}{Defaults to the output of -\code{fetch_data(type = 'spe')}. This is a +\item{spe}{A \link[SpatialExperiment:SpatialExperiment]{SpatialExperiment-class} -object with the spot-level Visium data and information required for -visualizing the histology. See \code{\link[=fetch_data]{fetch_data()}} for more details.} +object. See \code{\link[=fetch_data]{fetch_data()}} for how to download some example objects or +\code{\link[=read10xVisiumWrapper]{read10xVisiumWrapper()}} to read in \code{spaceranger --count} output files and +build your own \code{spe} object.} \item{image_dir}{A \code{character(1)} specifying a path to a directory containing image files with the pattern \code{sampleID_pattern.png}.} diff --git a/man/multi_gene_pca.Rd b/man/multi_gene_pca.Rd new file mode 100644 index 00000000..c6845d8a --- /dev/null +++ b/man/multi_gene_pca.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/multi_gene_pca.R +\name{multi_gene_pca} +\alias{multi_gene_pca} +\title{Combine multiple continuous variables through PCA} +\usage{ +multi_gene_pca(cont_mat) +} +\arguments{ +\item{cont_mat}{A \code{matrix()} with spots as rows and 2 or more continuous +variables as columns.} +} +\value{ +A \code{numeric()} vector with one element per spot, summarizing the +multiple continuous variables. +} +\description{ +PCA is performed on \code{cont_mat}, the matrix of multiple continuous +features. The first PC is returned, representing the dominant spatial +signature of the feature set. Its direction is negated if necessary so that +the majority of coefficients across features are positive (when the features +are highly correlated, this encourages spots with higher values to +represent areas of higher expression of the features). +} +\seealso{ +Other functions for summarizing expression of multiple continuous variables simultaneously: +\code{\link{multi_gene_sparsity}()}, +\code{\link{multi_gene_z_score}()} +} +\author{ +Nicholas J. Eagles +} +\concept{functions for summarizing expression of multiple continuous variables simultaneously} +\keyword{internal} diff --git a/man/multi_gene_sparsity.Rd b/man/multi_gene_sparsity.Rd new file mode 100644 index 00000000..8a564aa9 --- /dev/null +++ b/man/multi_gene_sparsity.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/multi_gene_sparsity.R +\name{multi_gene_sparsity} +\alias{multi_gene_sparsity} +\title{Combine multiple continuous variables by proportion of positive values} +\usage{ +multi_gene_sparsity(cont_mat) +} +\arguments{ +\item{cont_mat}{A \code{matrix()} with spots as rows and 2 or more continuous +variables as columns.} +} +\value{ +A \code{numeric()} vector with one element per spot, summarizing the +multiple continuous variables. +} +\description{ +To summarize multiple features, the proportion of features with positive +values for each spot is computed. +} +\seealso{ +Other functions for summarizing expression of multiple continuous variables simultaneously: +\code{\link{multi_gene_pca}()}, +\code{\link{multi_gene_z_score}()} +} +\author{ +Nicholas J. Eagles +} +\concept{functions for summarizing expression of multiple continuous variables simultaneously} +\keyword{internal} diff --git a/man/multi_gene_z_score.Rd b/man/multi_gene_z_score.Rd new file mode 100644 index 00000000..1d4d12cb --- /dev/null +++ b/man/multi_gene_z_score.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/multi_gene_z_score.R +\name{multi_gene_z_score} +\alias{multi_gene_z_score} +\title{Combine multiple continuous variables by averaging Z scores} +\usage{ +multi_gene_z_score(cont_mat) +} +\arguments{ +\item{cont_mat}{A \code{matrix()} with spots as rows and 2 or more continuous +variables as columns.} +} +\value{ +A \code{numeric()} vector with one element per spot, summarizing the +multiple continuous variables. +} +\description{ +To summarize multiple features, each is normalized to represent a Z-score. +Scores are averaged to return a single vector. +} +\seealso{ +Other functions for summarizing expression of multiple continuous variables simultaneously: +\code{\link{multi_gene_pca}()}, +\code{\link{multi_gene_sparsity}()} +} +\author{ +Nicholas J. Eagles +} +\concept{functions for summarizing expression of multiple continuous variables simultaneously} +\keyword{internal} diff --git a/man/prep_stitched_data.Rd b/man/prep_stitched_data.Rd new file mode 100644 index 00000000..e87c0140 --- /dev/null +++ b/man/prep_stitched_data.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/prep_stitched_data.R +\name{prep_stitched_data} +\alias{prep_stitched_data} +\title{Prepare stitched data for plotting} +\usage{ +prep_stitched_data(spe, point_size, image_id) +} +\arguments{ +\item{spe}{A \code{SpatialExperiment} built with +\code{visiumStitched::build_spe()}, containing a logical +\code{spe$exclude_overlapping} column specifying which spots to display in +plots} + +\item{point_size}{A \code{numeric(1)} specifying the size of the points. Defaults +to \code{1.25}. Some colors look better if you use \code{2} for instance.} + +\item{image_id}{A \code{character(1)} with the name of the image ID you want to +use in the background.} +} +\value{ +A list with names \code{spe} and \code{point_size} containing a +filtered, ready-to-plot \code{SpatialExperiment} and an appropriate spot size +(passed to \code{vis_gene()} or \code{vis_clus()}), respectively +} +\description{ +Given a \code{SpatialExperiment} built with \code{visiumStitched::build_spe()} +\url{http://research.libd.org/visiumStitched/reference/build_spe.html}, drop +excluded spots (specified by \code{spe$exclude_overlapping}) and compute an +appropriate spot size for plotting with \code{vis_gene()} or +\code{vis_clus()}, assuming the plot will be written to a PDF of default +dimensions (i.e. \code{width = 7} and \code{height = 7}). +} +\author{ +Nicholas J. Eagles +} +\keyword{internal} diff --git a/man/registration_pseudobulk.Rd b/man/registration_pseudobulk.Rd index 6df2d3a2..2d26041b 100644 --- a/man/registration_pseudobulk.Rd +++ b/man/registration_pseudobulk.Rd @@ -20,7 +20,9 @@ object or one that inherits its properties.} \item{var_registration}{A \code{character(1)} specifying the \code{colData(sce)} variable of interest against which will be used for computing the relevant -statistics.} +statistics. This should be a categorical variable, with all categories +syntaticly valid (could be used as an R variable, no special characters or +leading numbers), ex. 'L1.2', 'celltype2' not 'L1/2' or '2'.} \item{var_sample_id}{A \code{character(1)} specifying the \code{colData(sce)} variable with the sample ID.} diff --git a/man/registration_wrapper.Rd b/man/registration_wrapper.Rd index 92b38b4b..d7463870 100644 --- a/man/registration_wrapper.Rd +++ b/man/registration_wrapper.Rd @@ -23,7 +23,9 @@ object or one that inherits its properties.} \item{var_registration}{A \code{character(1)} specifying the \code{colData(sce)} variable of interest against which will be used for computing the relevant -statistics.} +statistics. This should be a categorical variable, with all categories +syntaticly valid (could be used as an R variable, no special characters or +leading numbers), ex. 'L1.2', 'celltype2' not 'L1/2' or '2'.} \item{var_sample_id}{A \code{character(1)} specifying the \code{colData(sce)} variable with the sample ID.} @@ -92,8 +94,19 @@ rowData(sce)$gene_name <- paste0("gene", seq_len(nrow(sce))) ## Compute all modeling results example_modeling_results <- registration_wrapper( sce, - "Cell_Cycle", "sample_id", c("age"), "ensembl", "gene_name", "wrapper" + var_registration = "Cell_Cycle", + var_sample_id = "sample_id", + covars = c("age"), + gene_ensembl = "ensembl", + gene_name = "gene_name", + suffix = "wrapper" ) + +## Explore the results from registration_wrapper() +class(example_modeling_results) +length(example_modeling_results) +names(example_modeling_results) +lapply(example_modeling_results, head) } \seealso{ Other spatial registration and statistical modeling functions: diff --git a/man/run_app.Rd b/man/run_app.Rd index ec7f0cb3..587d081e 100644 --- a/man/run_app.Rd +++ b/man/run_app.Rd @@ -23,6 +23,8 @@ run_app( spe_continuous_vars = c("cell_count", "sum_umi", "sum_gene", "expr_chrM", "expr_chrM_ratio"), default_cluster = "spatialLIBD", + auto_crop_default = TRUE, + is_stitched = FALSE, ... ) } @@ -44,7 +46,8 @@ layer-level (group-level) resolution. See \code{\link[=fetch_data]{fetch_data()} columns \verb{f_stat_*} or \verb{t_stat_*} as well as \verb{p_value_*} and \verb{fdr_*} plus \code{ensembl}. The column name is used to extract the statistic results, the p-values, and the FDR adjusted p-values. Then the \code{ensembl} column is used -for matching in some cases. See \code{\link[=fetch_data]{fetch_data()}} for more details.} +for matching in some cases. See \code{\link[=fetch_data]{fetch_data()}} for more details. Typically +this is the set of reference statistics used in \code{layer_stat_cor()}.} \item{sig_genes}{The output of \code{\link[=sig_genes_extract_all]{sig_genes_extract_all()}} which is a table in long format with the modeling results. You can subset this if the object @@ -69,6 +72,18 @@ as genes. They will have to be present in \code{colData(sce)}.} (discrete) variable to use. It will have to be present in both \code{colData(spe)} and \code{colData(sce_layer)}.} +\item{auto_crop_default}{A \code{logical(1)} specifying the default value for +automatically cropping the images. Set this to \code{FALSE} if your images do not +follow the Visium grid size expectations, which are key for enabling +auto-cropping.} + +\item{is_stitched}{A \code{logical(1)} vector: If \code{TRUE}, expects a +\link[SpatialExperiment:SpatialExperiment]{SpatialExperiment-class} built +with \code{visiumStitched::build_spe()}. +\url{http://research.libd.org/visiumStitched/reference/build_spe.html}; in +particular, expects a logical colData column \code{exclude_overlapping} +specifying which spots to exclude from the plot. Sets \code{auto_crop = FALSE}.} + \item{...}{Other arguments passed to the list of golem options for running the application.} } @@ -227,5 +242,27 @@ if (enough_ram(9e9)) { ## * https://github.com/LieberInstitute/spatialDLPFC/tree/main/code/deploy_app_k09_position_noWM ## * https://github.com/LieberInstitute/spatialDLPFC/tree/main/code/deploy_app_k16 ## * https://github.com/LieberInstitute/spatialDLPFC/tree/main/code/analysis_IF/03_spatialLIBD_app + + +## Example for an object with multiple capture areas stitched together with +## . +spe_stitched <- fetch_data("Visium_LS_spe") + +## Inspect this object +spe_stitched + +## Notice the use of "exclude_overlapping" +table(spe_stitched$exclude_overlapping, useNA = "ifany") + +## Run the app with this stitched data +run_app( + spe = spe_stitched, + sce_layer = NULL, modeling_results = NULL, sig_genes = NULL, + title = "visiumStitched example data", + spe_discrete_vars = c("capture_area", "scran_quick_cluster", "ManualAnnotation"), + spe_continuous_vars = c("sum_umi", "sum_gene", "expr_chrM", "expr_chrM_ratio"), + default_cluster = "scran_quick_cluster", + is_stitched = TRUE +) } } diff --git a/man/sig_genes_extract.Rd b/man/sig_genes_extract.Rd index ad838055..cd144460 100644 --- a/man/sig_genes_extract.Rd +++ b/man/sig_genes_extract.Rd @@ -20,7 +20,8 @@ sig_genes_extract( columns \verb{f_stat_*} or \verb{t_stat_*} as well as \verb{p_value_*} and \verb{fdr_*} plus \code{ensembl}. The column name is used to extract the statistic results, the p-values, and the FDR adjusted p-values. Then the \code{ensembl} column is used -for matching in some cases. See \code{\link[=fetch_data]{fetch_data()}} for more details.} +for matching in some cases. See \code{\link[=fetch_data]{fetch_data()}} for more details. Typically +this is the set of reference statistics used in \code{layer_stat_cor()}.} \item{model_type}{A named element of the \code{modeling_results} list. By default that is either \code{enrichment} for the model that tests one human brain layer diff --git a/man/sig_genes_extract_all.Rd b/man/sig_genes_extract_all.Rd index c79edf28..d984b84a 100644 --- a/man/sig_genes_extract_all.Rd +++ b/man/sig_genes_extract_all.Rd @@ -18,7 +18,8 @@ sig_genes_extract_all( columns \verb{f_stat_*} or \verb{t_stat_*} as well as \verb{p_value_*} and \verb{fdr_*} plus \code{ensembl}. The column name is used to extract the statistic results, the p-values, and the FDR adjusted p-values. Then the \code{ensembl} column is used -for matching in some cases. See \code{\link[=fetch_data]{fetch_data()}} for more details.} +for matching in some cases. See \code{\link[=fetch_data]{fetch_data()}} for more details. Typically +this is the set of reference statistics used in \code{layer_stat_cor()}.} \item{sce_layer}{Defaults to the output of \code{fetch_data(type = 'sce_layer')}. This is a diff --git a/man/sort_clusters.Rd b/man/sort_clusters.Rd index 93d05aa4..a2b911bc 100644 --- a/man/sort_clusters.Rd +++ b/man/sort_clusters.Rd @@ -13,13 +13,13 @@ sort_clusters(clusters, map_subset = NULL) which elements of \code{clusters} to use to determine the ranking of the clusters.} } \value{ -A factor of length equal to \code{clusters} where the levels are the new -ordered clusters and the names of the factor are the original values from -\code{clusters}. +A \code{factor()} version of \code{clusters} where the levels are ordered by +frequency. } \description{ -This function takes a vector with cluster labels and sorts it by frequency -such that the most frequent cluster is the first one and so on. +This function takes a vector with cluster labels, recasts it as a \code{factor()}, +and sorts the \code{factor()} levels by frequency such that the most frequent +cluster is the first level and so on. } \examples{ @@ -29,6 +29,43 @@ clus <- letters[unlist(lapply(4:1, function(x) rep(x, x)))] ## In this case, it's a character vector class(clus) -## Sort them and obtain a factor +## We see that we have 10 elements in this vector, which is +## an unnamed character vector +clus + +## letter 'd' is the most frequent +table(clus) + +## Sort them and obtain a factor. Notice that it's a named +## factor, and the names correspond to the original values +## in the character vector. sort_clusters(clus) + +## Since 'd' was the most frequent, it gets assigned to the first level +## in the factor variable. +table(sort_clusters(clus)) + +## If we skip the first 3 values of clus (which are all 'd'), we can +## change the most frequent cluster. And thus the ordering of the +## factor levels. +sort_clusters(clus, map_subset = seq_len(length(clus)) > 3) + +## Let's try with a factor variable +clus_factor <- factor(clus) +## sort_clusters() returns an identical result in this case +stopifnot(identical(sort_clusters(clus), sort_clusters(clus_factor))) + +## What happens if you have a logical variable with NAs? +set.seed(20240712) +log_var <- sample(c(TRUE, FALSE, NA), + 1000, + replace = TRUE, + prob = c(0.3, 0.15, 0.55) +) +## Here, the NAs are the most frequent group. +table(log_var, useNA = "ifany") + +## The NAs are not used for sorting. Since we have more 'TRUE' than 'FALSE' +## then, 'TRUE' becomes the first level. +table(sort_clusters(log_var), useNA = "ifany") } diff --git a/man/tstats_Human_DLPFC_snRNAseq_Nguyen_topLayer.Rd b/man/tstats_Human_DLPFC_snRNAseq_Nguyen_topLayer.Rd index 11d2c849..2996d31a 100644 --- a/man/tstats_Human_DLPFC_snRNAseq_Nguyen_topLayer.Rd +++ b/man/tstats_Human_DLPFC_snRNAseq_Nguyen_topLayer.Rd @@ -19,8 +19,12 @@ and tstats_Human_DLPFC_snRNAseq_Nguyen_topLayer } \description{ -Using the DLPFC snRNA-seq data from Matthew N Tran et al we computed -enrichment t-statistics for the cell clusters. This is a subset of them -used in examples such as in \code{\link[=layer_stat_cor_plot]{layer_stat_cor_plot()}}. +Using the DLPFC snRNA-seq data from Matthew N Tran et al +\url{https://doi.org/10.1016/j.neuron.2021.09.001} we computed +enrichment t-statistics for the cell clusters. The Tran et al data has been +subset to the top 100 DLPFC layer markers found in Maynard, Collado-Torres, +et al 2021. This data is used in examples such as in +\code{\link[=layer_stat_cor_plot]{layer_stat_cor_plot()}}. The Tran et al data is from the pre-print version +of that project. } \keyword{datasets} diff --git a/man/vis_clus.Rd b/man/vis_clus.Rd index 4bb0c369..34513c66 100644 --- a/man/vis_clus.Rd +++ b/man/vis_clus.Rd @@ -15,15 +15,17 @@ vis_clus( alpha = NA, point_size = 2, auto_crop = TRUE, + na_color = "#CCCCCC40", + is_stitched = FALSE, ... ) } \arguments{ -\item{spe}{Defaults to the output of -\code{fetch_data(type = 'spe')}. This is a +\item{spe}{A \link[SpatialExperiment:SpatialExperiment]{SpatialExperiment-class} -object with the spot-level Visium data and information required for -visualizing the histology. See \code{\link[=fetch_data]{fetch_data()}} for more details.} +object. See \code{\link[=fetch_data]{fetch_data()}} for how to download some example objects or +\code{\link[=read10xVisiumWrapper]{read10xVisiumWrapper()}} to read in \code{spaceranger --count} output files and +build your own \code{spe} object.} \item{sampleid}{A \code{character(1)} specifying which sample to plot from \code{colData(spe)$sample_id} (formerly \code{colData(spe)$sample_name}).} @@ -52,6 +54,19 @@ to \code{1.25}. Some colors look better if you use \code{2} for instance.} the image / plotting area, which is useful if the Visium capture area is not centered on the image and if the image is not a square.} +\item{na_color}{A \code{character(1)} specifying a color for the NA values. +If you set \code{alpha = NA} then it's best to set \code{na_color} to a color that has +alpha blending already, which will make non-NA values pop up more and the NA +values will show with a lighter color. This behavior is lost when \code{alpha} is +set to a non-\code{NA} value.} + +\item{is_stitched}{A \code{logical(1)} vector: If \code{TRUE}, expects a +\link[SpatialExperiment:SpatialExperiment]{SpatialExperiment-class} built +with \code{visiumStitched::build_spe()}. +\url{http://research.libd.org/visiumStitched/reference/build_spe.html}; in +particular, expects a logical colData column \code{exclude_overlapping} +specifying which spots to exclude from the plot. Sets \code{auto_crop = FALSE}.} + \item{...}{Passed to \link[base:paste]{paste0()} for making the title of the plot following the \code{sampleid}.} } @@ -107,6 +122,19 @@ if (enough_ram()) { spatial = FALSE ) print(p3) + + ## With some NA values + spe$tmp <- spe$layer_guess_reordered + spe$tmp[spe$sample_id == "151673"][seq_len(500)] <- NA + p4 <- vis_clus( + spe = spe, + clustervar = "tmp", + sampleid = "151673", + colors = libd_layer_colors, + na_color = "white", + ... = " LIBD Layers" + ) + print(p4) } } \seealso{ diff --git a/man/vis_clus_p.Rd b/man/vis_clus_p.Rd index ba154839..b8202f1f 100644 --- a/man/vis_clus_p.Rd +++ b/man/vis_clus_p.Rd @@ -15,18 +15,19 @@ vis_clus_p( image_id = "lowres", alpha = NA, point_size = 2, - auto_crop = TRUE + auto_crop = TRUE, + na_color = "#CCCCCC40" ) } \arguments{ -\item{spe}{Defaults to the output of -\code{fetch_data(type = 'spe')}. This is a +\item{spe}{A \link[SpatialExperiment:SpatialExperiment]{SpatialExperiment-class} -object with the spot-level Visium data and information required for -visualizing the histology. See \code{\link[=fetch_data]{fetch_data()}} for more details.} +object. See \code{\link[=fetch_data]{fetch_data()}} for how to download some example objects or +\code{\link[=read10xVisiumWrapper]{read10xVisiumWrapper()}} to read in \code{spaceranger --count} output files and +build your own \code{spe} object.} -\item{d}{A data.frame with the sample-level information. This is typically -obtained using \code{cbind(colData(spe), spatialCoords(spe))}.} +\item{d}{A \code{data.frame()} with the sample-level information. This is +typically obtained using \code{cbind(colData(spe), spatialCoords(spe))}.} \item{clustervar}{A \code{character(1)} with the name of the \code{colData(spe)} column that has the cluster values.} @@ -56,6 +57,12 @@ to \code{1.25}. Some colors look better if you use \code{2} for instance.} \item{auto_crop}{A \code{logical(1)} indicating whether to automatically crop the image / plotting area, which is useful if the Visium capture area is not centered on the image and if the image is not a square.} + +\item{na_color}{A \code{character(1)} specifying a color for the NA values. +If you set \code{alpha = NA} then it's best to set \code{na_color} to a color that has +alpha blending already, which will make non-NA values pop up more and the NA +values will show with a lighter color. This behavior is lost when \code{alpha} is +set to a non-\code{NA} value.} } \value{ A \link[ggplot2:ggplot]{ggplot2} object. diff --git a/man/vis_gene.Rd b/man/vis_gene.Rd index 339c5296..bba4dc60 100644 --- a/man/vis_gene.Rd +++ b/man/vis_gene.Rd @@ -19,23 +19,27 @@ vis_gene( point_size = 2, auto_crop = TRUE, na_color = "#CCCCCC40", + multi_gene_method = c("z_score", "pca", "sparsity"), + is_stitched = FALSE, ... ) } \arguments{ -\item{spe}{Defaults to the output of -\code{fetch_data(type = 'spe')}. This is a +\item{spe}{A \link[SpatialExperiment:SpatialExperiment]{SpatialExperiment-class} -object with the spot-level Visium data and information required for -visualizing the histology. See \code{\link[=fetch_data]{fetch_data()}} for more details.} +object. See \code{\link[=fetch_data]{fetch_data()}} for how to download some example objects or +\code{\link[=read10xVisiumWrapper]{read10xVisiumWrapper()}} to read in \code{spaceranger --count} output files and +build your own \code{spe} object.} \item{sampleid}{A \code{character(1)} specifying which sample to plot from \code{colData(spe)$sample_id} (formerly \code{colData(spe)$sample_name}).} -\item{geneid}{A \code{character(1)} specifying the gene ID stored in -\code{rowData(spe)$gene_search} or a continuous variable stored in \code{colData(spe)} -to visualize. If \code{rowData(spe)$gene_search} is missing, then \code{rownames(spe)} -is used to search for the gene ID.} +\item{geneid}{A \code{character()} specifying the gene ID(s) stored in +\code{rowData(spe)$gene_search} or a continuous variable(s) stored in +\code{colData(spe)} to visualize. For each ID, if \code{rowData(spe)$gene_search} is +missing, then \code{rownames(spe)} is used to search for the gene ID. When a +vector of length > 1 is supplied, the continuous variables are combined +according to \code{multi_gene_method}, producing a single value for each spot.} \item{spatial}{A \code{logical(1)} indicating whether to include the histology layer from \code{\link[=geom_spatial]{geom_spatial()}}. If you plan to use @@ -77,6 +81,25 @@ alpha blending already, which will make non-NA values pop up more and the NA values will show with a lighter color. This behavior is lost when \code{alpha} is set to a non-\code{NA} value.} +\item{multi_gene_method}{A \code{character(1)}: either \code{"pca"}, \code{"sparsity"}, or +\code{"z_score"}. This parameter controls how multiple continuous variables are +combined for visualization, and only applies when \code{geneid} has length +great than 1. \code{z_score}: to summarize multiple continuous variables, each is +normalized to represent a Z-score. The multiple scores are then averaged. +\code{pca}: PCA dimension reduction is conducted on the matrix formed by the +continuous variables, and the first PC is then used and multiplied by -1 if +needed to have the majority of the values for PC1 to be positive. \code{sparsity}: +the proportion of continuous variables with positive values for each spot is +computed. For more details, check the multi gene vignette at +\url{https://research.libd.org/spatialLIBD/articles/multi_gene_plots.html}.} + +\item{is_stitched}{A \code{logical(1)} vector: If \code{TRUE}, expects a +\link[SpatialExperiment:SpatialExperiment]{SpatialExperiment-class} built +with \code{visiumStitched::build_spe()}. +\url{http://research.libd.org/visiumStitched/reference/build_spe.html}; in +particular, expects a logical colData column \code{exclude_overlapping} +specifying which spots to exclude from the plot. Sets \code{auto_crop = FALSE}.} + \item{...}{Passed to \link[base:paste]{paste0()} for making the title of the plot following the \code{sampleid}.} } @@ -167,6 +190,42 @@ if (enough_ram()) { auto_crop = FALSE ) print(p5) + + # Define several markers for white matter + white_matter_genes <- c( + "ENSG00000197971", "ENSG00000131095", "ENSG00000123560", + "ENSG00000171885" + ) + + ## Plot all white matter markers at once using the Z-score combination + ## method + p6 <- vis_gene( + spe = spe, + sampleid = "151507", + geneid = white_matter_genes, + multi_gene_method = "z_score" + ) + print(p6) + + ## Plot all white matter markers at once using the sparsity combination + ## method + p7 <- vis_gene( + spe = spe, + sampleid = "151507", + geneid = white_matter_genes, + multi_gene_method = "sparsity" + ) + print(p7) + + ## Plot all white matter markers at once using the PCA combination + ## method + p8 <- vis_gene( + spe = spe, + sampleid = "151507", + geneid = white_matter_genes, + multi_gene_method = "pca" + ) + print(p8) } } \seealso{ diff --git a/man/vis_gene_p.Rd b/man/vis_gene_p.Rd index 1c65b9a4..ca9d6a29 100644 --- a/man/vis_gene_p.Rd +++ b/man/vis_gene_p.Rd @@ -13,12 +13,8 @@ vis_gene_p( viridis = TRUE, image_id = "lowres", alpha = NA, - cont_colors = if (viridis) { - viridisLite::viridis(21) - } else { - - c("aquamarine4", "springgreen", "goldenrod", "red") - }, + cont_colors = if (viridis) viridisLite::viridis(21) else c("aquamarine4", + "springgreen", "goldenrod", "red"), point_size = 2, auto_crop = TRUE, na_color = "#CCCCCC40", @@ -26,15 +22,15 @@ vis_gene_p( ) } \arguments{ -\item{spe}{Defaults to the output of -\code{fetch_data(type = 'spe')}. This is a +\item{spe}{A \link[SpatialExperiment:SpatialExperiment]{SpatialExperiment-class} -object with the spot-level Visium data and information required for -visualizing the histology. See \code{\link[=fetch_data]{fetch_data()}} for more details.} +object. See \code{\link[=fetch_data]{fetch_data()}} for how to download some example objects or +\code{\link[=read10xVisiumWrapper]{read10xVisiumWrapper()}} to read in \code{spaceranger --count} output files and +build your own \code{spe} object.} -\item{d}{A data.frame with the sample-level information. This is typically -obtained using \code{cbind(colData(spe), spatialCoords(spe))}. -The data.frame has to contain +\item{d}{A \code{data.frame()} with the sample-level information. This is +typically obtained using \code{cbind(colData(spe), spatialCoords(spe))}. +The \code{data.frame} has to contain a column with the continuous variable data to plot stored under \code{d$COUNT}.} \item{sampleid}{A \code{character(1)} specifying which sample to plot from @@ -84,7 +80,7 @@ A \link[ggplot2:ggplot]{ggplot2} object. This function visualizes the gene expression stored in \code{assays(spe)} or any continuous variable stored in \code{colData(spe)} for one given sample at the spot-level using (by default) the histology information on the background. -This is the function that does all the plotting behind \code{\link[=vis_gene]{vis_gene()}}. +This is the function that does all the plotting behind \code{\link[=vis_gene]{vis_gene()}} To visualize clusters (or any discrete variable) use \code{\link[=vis_clus_p]{vis_clus_p()}}. } \examples{ @@ -98,7 +94,6 @@ if (enough_ram()) { df <- as.data.frame(cbind(colData(spe_sub), SpatialExperiment::spatialCoords(spe_sub)), optional = TRUE) df$COUNT <- df$expr_chrM_ratio - ## Use the manual color palette by Lukas M Weber ## Don't plot the histology information p <- vis_gene_p( spe = spe_sub, diff --git a/man/vis_grid_clus.Rd b/man/vis_grid_clus.Rd index 52fd0be5..af943564 100644 --- a/man/vis_grid_clus.Rd +++ b/man/vis_grid_clus.Rd @@ -19,15 +19,17 @@ vis_grid_clus( sample_order = unique(spe$sample_id), point_size = 2, auto_crop = TRUE, + na_color = "#CCCCCC40", + is_stitched = FALSE, ... ) } \arguments{ -\item{spe}{Defaults to the output of -\code{fetch_data(type = 'spe')}. This is a +\item{spe}{A \link[SpatialExperiment:SpatialExperiment]{SpatialExperiment-class} -object with the spot-level Visium data and information required for -visualizing the histology. See \code{\link[=fetch_data]{fetch_data()}} for more details.} +object. See \code{\link[=fetch_data]{fetch_data()}} for how to download some example objects or +\code{\link[=read10xVisiumWrapper]{read10xVisiumWrapper()}} to read in \code{spaceranger --count} output files and +build your own \code{spe} object.} \item{clustervar}{A \code{character(1)} with the name of the \code{colData(spe)} column that has the cluster values.} @@ -69,6 +71,19 @@ to \code{1.25}. Some colors look better if you use \code{2} for instance.} the image / plotting area, which is useful if the Visium capture area is not centered on the image and if the image is not a square.} +\item{na_color}{A \code{character(1)} specifying a color for the NA values. +If you set \code{alpha = NA} then it's best to set \code{na_color} to a color that has +alpha blending already, which will make non-NA values pop up more and the NA +values will show with a lighter color. This behavior is lost when \code{alpha} is +set to a non-\code{NA} value.} + +\item{is_stitched}{A \code{logical(1)} vector: If \code{TRUE}, expects a +\link[SpatialExperiment:SpatialExperiment]{SpatialExperiment-class} built +with \code{visiumStitched::build_spe()}. +\url{http://research.libd.org/visiumStitched/reference/build_spe.html}; in +particular, expects a logical colData column \code{exclude_overlapping} +specifying which spots to exclude from the plot. Sets \code{auto_crop = FALSE}.} + \item{...}{Passed to \link[base:paste]{paste0()} for making the title of the plot following the \code{sampleid}.} } @@ -110,7 +125,7 @@ if (enough_ram()) { \seealso{ Other Spatial cluster visualization functions: \code{\link{frame_limits}()}, -\code{\link{vis_clus_p}()}, -\code{\link{vis_clus}()} +\code{\link{vis_clus}()}, +\code{\link{vis_clus_p}()} } \concept{Spatial cluster visualization functions} diff --git a/man/vis_grid_gene.Rd b/man/vis_grid_gene.Rd index 257c0d05..40b7461c 100644 --- a/man/vis_grid_gene.Rd +++ b/man/vis_grid_gene.Rd @@ -23,20 +23,23 @@ vis_grid_gene( point_size = 2, auto_crop = TRUE, na_color = "#CCCCCC40", + is_stitched = FALSE, ... ) } \arguments{ -\item{spe}{Defaults to the output of -\code{fetch_data(type = 'spe')}. This is a +\item{spe}{A \link[SpatialExperiment:SpatialExperiment]{SpatialExperiment-class} -object with the spot-level Visium data and information required for -visualizing the histology. See \code{\link[=fetch_data]{fetch_data()}} for more details.} +object. See \code{\link[=fetch_data]{fetch_data()}} for how to download some example objects or +\code{\link[=read10xVisiumWrapper]{read10xVisiumWrapper()}} to read in \code{spaceranger --count} output files and +build your own \code{spe} object.} -\item{geneid}{A \code{character(1)} specifying the gene ID stored in -\code{rowData(spe)$gene_search} or a continuous variable stored in \code{colData(spe)} -to visualize. If \code{rowData(spe)$gene_search} is missing, then \code{rownames(spe)} -is used to search for the gene ID.} +\item{geneid}{A \code{character()} specifying the gene ID(s) stored in +\code{rowData(spe)$gene_search} or a continuous variable(s) stored in +\code{colData(spe)} to visualize. For each ID, if \code{rowData(spe)$gene_search} is +missing, then \code{rownames(spe)} is used to search for the gene ID. When a +vector of length > 1 is supplied, the continuous variables are combined +according to \code{multi_gene_method}, producing a single value for each spot.} \item{pdf_file}{A \code{character(1)} specifying the path for the resulting PDF.} @@ -91,6 +94,13 @@ alpha blending already, which will make non-NA values pop up more and the NA values will show with a lighter color. This behavior is lost when \code{alpha} is set to a non-\code{NA} value.} +\item{is_stitched}{A \code{logical(1)} vector: If \code{TRUE}, expects a +\link[SpatialExperiment:SpatialExperiment]{SpatialExperiment-class} built +with \code{visiumStitched::build_spe()}. +\url{http://research.libd.org/visiumStitched/reference/build_spe.html}; in +particular, expects a logical colData column \code{exclude_overlapping} +specifying which spots to exclude from the plot. Sets \code{auto_crop = FALSE}.} + \item{...}{Passed to \link[base:paste]{paste0()} for making the title of the plot following the \code{sampleid}.} } @@ -129,7 +139,7 @@ if (enough_ram()) { } \seealso{ Other Spatial gene visualization functions: -\code{\link{vis_gene_p}()}, -\code{\link{vis_gene}()} +\code{\link{vis_gene}()}, +\code{\link{vis_gene_p}()} } \concept{Spatial gene visualization functions} diff --git a/tests/testthat/test-add_qc_metrics.R b/tests/testthat/test-add_qc_metrics.R new file mode 100644 index 00000000..3d3ee568 --- /dev/null +++ b/tests/testthat/test-add_qc_metrics.R @@ -0,0 +1,15 @@ +test_that( + "add_qc_metrics returns modified spe", + { + if (!exists("spe")) spe <- fetch_data("spe") + + # run metrics spe + spe_qc <- add_qc_metrics(spe, overwrite = TRUE) + expect_equal(ncol(spe), ncol(spe_qc)) ## same number of spots + expect_equal(ncol(colData(spe)) + 7, ncol(colData(spe_qc))) ## add 7 QC cols to colData + # [1] "scran_discard" "scran_low_lib_size" "scran_low_n_features" + # [4] "scran_high_subsets_Mito_percent" "edge_spot" "edge_distance" + # [7] "scran_low_lib_size_edge" + rm(spe_qc) + } +) diff --git a/tests/testthat/test-gene_set_enrichment.R b/tests/testthat/test-gene_set_enrichment.R index 015d7b6f..e0e1a0f2 100644 --- a/tests/testthat/test-gene_set_enrichment.R +++ b/tests/testthat/test-gene_set_enrichment.R @@ -1,51 +1,59 @@ - asd_sfari <- utils::read.csv( - system.file( - "extdata", - "SFARI-Gene_genes_01-03-2020release_02-04-2020export.csv", - package = "spatialLIBD" - ), - as.is = TRUE - ) + system.file( + "extdata", + "SFARI-Gene_genes_01-03-2020release_02-04-2020export.csv", + package = "spatialLIBD" + ), + as.is = TRUE +) - ## Format them appropriately - asd_sfari_geneList <- list( - Gene_SFARI_all = asd_sfari$ensembl.id, - Gene_SFARI_high = asd_sfari$ensembl.id[asd_sfari$gene.score < 3], - Gene_SFARI_syndromic = asd_sfari$ensembl.id[asd_sfari$syndromic == 1] - ) +## Format them appropriately +asd_sfari_geneList <- list( + Gene_SFARI_all = asd_sfari$ensembl.id, + Gene_SFARI_high = asd_sfari$ensembl.id[asd_sfari$gene.score < 3], + Gene_SFARI_syndromic = asd_sfari$ensembl.id[asd_sfari$syndromic == 1] +) - ## Obtain the necessary data - if (!exists("modeling_results")) { - modeling_results <- fetch_data(type = "modeling_results") - } +## Obtain the necessary data +if (!exists("modeling_results")) { + modeling_results <- fetch_data(type = "modeling_results") +} - ## Compute the gene set enrichment results - asd_sfari_enrichment <- gene_set_enrichment( - gene_list = asd_sfari_geneList, - modeling_results = modeling_results, - model_type = "enrichment" - ) +## Compute the gene set enrichment results +asd_sfari_enrichment <- gene_set_enrichment( + gene_list = asd_sfari_geneList, + modeling_results = modeling_results, + model_type = "enrichment" +) test_that("result for each gene list & model test", { - expect_equal(nrow(asd_sfari_enrichment), - length(asd_sfari_geneList)*length(grep("fdr",colnames(modeling_results$enrichment)))) + expect_equal( + nrow(asd_sfari_enrichment), + length(asd_sfari_geneList) * length(grep( + "fdr", colnames(modeling_results$enrichment) + )) + ) }) ## check behavior for OR < 1 results -WM_enriched <- modeling_results$enrichment$fdr_WM < 0.1 & modeling_results$enrichment$t_stat_WM > 0 +WM_enriched <- + modeling_results$enrichment$fdr_WM < 0.1 & + modeling_results$enrichment$t_stat_WM > 0 -safari_no_wm_enrich <- asd_sfari_geneList$Gene_SFARI_all[asd_sfari_geneList$Gene_SFARI_all %in% modeling_results$enrichment$ensembl[!WM_enriched]] +safari_no_wm_enrich <- + asd_sfari_geneList$Gene_SFARI_all[asd_sfari_geneList$Gene_SFARI_all %in% modeling_results$enrichment$ensembl[!WM_enriched]] -safari_edge_cases <- list(no_WM_enrich = safari_no_wm_enrich, - short = asd_sfari_geneList$Gene_SFARI_all[1:20]) +safari_edge_cases <- list( + no_WM_enrich = safari_no_wm_enrich, + short = asd_sfari_geneList$Gene_SFARI_all[1:20] +) edge_safari_enrichment <- gene_set_enrichment( - gene_list = safari_edge_cases["no_WM_enrich"], - modeling_results = modeling_results, - model_type = "enrichment" + gene_list = safari_edge_cases["no_WM_enrich"], + modeling_results = modeling_results, + model_type = "enrichment" ) ## with alternative = "two.sided" @@ -53,15 +61,15 @@ edge_safari_enrichment <- gene_set_enrichment( # 1 0.0000000 5.752600e-72 WM 0 638 no_WM_enrich enrichment 0.1 test_that("warn for small gene list", { - expect_warning(gene_set_enrichment( - gene_list = safari_edge_cases["short"], - modeling_results = modeling_results, - model_type = "enrichment") - ) + expect_warning( + gene_set_enrichment( + gene_list = safari_edge_cases["short"], + modeling_results = modeling_results, + model_type = "enrichment" + ) + ) }) -test_that("Not signficant for OR==0",{ - expect_true(all(edge_safari_enrichment$Pval[edge_safari_enrichment$OR == 0] > 0.05)) +test_that("Not signficant for OR==0", { + expect_true(all(edge_safari_enrichment$Pval[edge_safari_enrichment$OR == 0] > 0.05)) }) - - diff --git a/tests/testthat/test-multi_gene_pca.R b/tests/testthat/test-multi_gene_pca.R new file mode 100644 index 00000000..32c1d83d --- /dev/null +++ b/tests/testthat/test-multi_gene_pca.R @@ -0,0 +1,30 @@ +test_that( + "multi_gene_pca", + { + # With two good columns but 1 zero-variance column, the zero-variance + # column should be dropped with a warning + cont_mat <- matrix(c(1, 0, 3, 3, 2, -5), ncol = 3) + colnames(cont_mat) <- c("good1", "bad", "good2") + expect_warning( + multi_gene_pca(cont_mat), + "Dropping features\\(s\\) 'bad' which have NAs or no expression variation" + ) + + # With two good columns but 1 zero-variance column, the zero-variance + # column should be dropped with a warning + cont_mat <- matrix(c(1, NA, 3, 4, 2, -5), ncol = 3) + colnames(cont_mat) <- c("bad", "good1", "good2") + expect_warning( + multi_gene_pca(cont_mat), + "Dropping features\\(s\\) 'bad' which have NAs or no expression variation" + ) + + # With only one good column, an error should be thrown + cont_mat <- matrix(c(1, NA, 3, 4, 2, 2), ncol = 3) + colnames(cont_mat) <- c("bad1", "good", "bad2") + expect_error( + multi_gene_pca(cont_mat), + "After dropping features with NAs or no expression variation, less than 2 features were left" + ) + } +) diff --git a/tests/testthat/test-multi_gene_z_score.R b/tests/testthat/test-multi_gene_z_score.R new file mode 100644 index 00000000..27c8ae7d --- /dev/null +++ b/tests/testthat/test-multi_gene_z_score.R @@ -0,0 +1,44 @@ +test_that( + "multi_gene_z_score", + { + # With two good columns but 1 zero-variance column, the zero-variance + # column should be dropped with a warning + cont_mat <- matrix(c(1, 0, 3, 3, 2, -5), ncol = 3) + colnames(cont_mat) <- c("good1", "bad", "good2") + expect_warning( + multi_gene_z_score(cont_mat), + "Dropping features\\(s\\) 'bad' which have no expression variation" + ) + + # NAs should be correctly removed from columns (as long as 2 non-NAs remain + # in at least 1 column), and the result should have no NAs + cont_mat <- matrix(c(1, NA, 3, NA, 2, 0), ncol = 2) + colnames(cont_mat) <- c("good1", "good2") + expect_equal(any(is.na(multi_gene_z_score(cont_mat))), FALSE) + + # With only one good column, the result should simply be the + # Z-score-normalized good column. A warning should indicate which + # columns were dropped + cont_mat <- matrix(c(1, NA, 3, 4, 2, 2), ncol = 3) + colnames(cont_mat) <- c("bad1", "good", "bad2") + + temp <- c(3, 4) + expected_result <- (temp - mean(temp)) / sd(temp) + + expect_warning( + { + actual_result <- multi_gene_z_score(cont_mat) + }, + "Dropping features\\(s\\) 'bad1', 'bad2' which have no expression variation" + ) + expect_equal(actual_result, expected_result) + + # An error should be thrown if no columns have variation + cont_mat <- matrix(c(1, 1, 0, 0, 2, 2), ncol = 3) + colnames(cont_mat) <- c("bad1", "bad2", "bad3") + expect_error( + multi_gene_z_score(cont_mat), + "^After dropping features with no expression variation" + ) + } +) diff --git a/tests/testthat/test-prep_stitched_data.R b/tests/testthat/test-prep_stitched_data.R new file mode 100644 index 00000000..3866042d --- /dev/null +++ b/tests/testthat/test-prep_stitched_data.R @@ -0,0 +1,40 @@ +test_that( + "prep_stitched_data", + { + if (!exists("spe")) spe <- fetch_data("spe") + + # Missing exclude_overlapping + expect_error( + { + temp <- prep_stitched_data( + spe, + point_size = 2, image_id = "lowres" + ) + }, + "^Missing at least one of the following colData" + ) + + # Can't exclude all spots + spe$exclude_overlapping <- TRUE + expect_error( + { + temp <- prep_stitched_data( + spe, + point_size = 2, image_id = "lowres" + ) + }, + "^spe\\$exclude_overlapping must include some FALSE values to plot$" + ) + + # Output should be a list with the correct names, and the + # SpatialExperiment should have no excluded spots + spe$exclude_overlapping[1:100] <- FALSE + temp <- prep_stitched_data(spe, point_size = 2, image_id = "lowres") + expect_equal(class(temp), "list") + expect_equal(names(temp), c("spe", "point_size")) + expect_equal(all(temp$spe$exclude_overlapping), FALSE) + + # Note bad image_id is not tested, since this function is only used + # internally after checks for legitimate image_id are performed + } +) diff --git a/tests/testthat/test-registration_pseudobulk.R b/tests/testthat/test-registration_pseudobulk.R index 0b6deb6c..841dd12b 100644 --- a/tests/testthat/test-registration_pseudobulk.R +++ b/tests/testthat/test-registration_pseudobulk.R @@ -11,3 +11,45 @@ test_that("NA check works", { "var_registration" ) }) + + +#### Syntactic Variable Test #### +set.seed(20220907) ## Ensure reproducibility of example data +sce <- scuttle::mockSCE() +## Add some sample IDs +sce$sample_id <- sample(LETTERS[1:5], ncol(sce), replace = TRUE) + +## Add a sample-level covariate: age +ages <- rnorm(5, mean = 20, sd = 4) +names(ages) <- LETTERS[1:5] +sce$age <- ages[sce$sample_id] + +## add variable with one group +sce$batch <- "batch1" + +## non-syntactic inputs +sce$cluster_int <- sample(1:4, ncol(sce), replace = TRUE) +# sce$cluster_k <- paste0("k", sce$cluster_int) +sce$cluster_j <- paste0(sce$cluster_int, "j") +sce$cluster_l <- sample(c("L-1", "L2/3", "4L", "L5"), ncol(sce), replace = TRUE) + +test_that( + "warn for numeric var_registration", + expect_warning(registration_pseudobulk(sce, + var_registration = "cluster_int", + var_sample_id = "sample_id", + covars = c("age"), + min_ncells = NULL + )) +) + + +test_that( + "warn for non-syntactic var_registration", + expect_warning(registration_pseudobulk(sce, + var_registration = "cluster_l", + var_sample_id = "sample_id", + covars = c("age"), + min_ncells = NULL + )) +) diff --git a/tests/testthat/test-registration_wrapper.R b/tests/testthat/test-registration_wrapper.R new file mode 100644 index 00000000..16e3bdee --- /dev/null +++ b/tests/testthat/test-registration_wrapper.R @@ -0,0 +1,76 @@ +## Ensure reproducibility of example data +set.seed(20220907) + +## Generate example data +sce <- scuttle::mockSCE() + +## Add some sample IDs +sce$sample_id <- sample(LETTERS[1:5], ncol(sce), replace = TRUE) + +## Add a sample-level covariate: age +ages <- rnorm(5, mean = 20, sd = 4) +names(ages) <- LETTERS[1:5] +sce$age <- ages[sce$sample_id] + +## add variable with one group +sce$batch <- "batch1" + +## Add gene-level information +rowData(sce)$ensembl <- paste0("ENSG", seq_len(nrow(sce))) +rowData(sce)$gene_name <- paste0("gene", seq_len(nrow(sce))) + + +test_that( + "warning for k=2 variable", + example_modeling_results <- expect_warning( + registration_wrapper( + sce, + var_registration = "Treatment", + var_sample_id = "sample_id", + covars = c("age"), + gene_ensembl = "ensembl", + gene_name = "gene_name", + suffix = "wrapper" + ) + ) +) + +#### Syntactic Variable Test #### + +## catagorical var as int +sce$cluster_int <- sample(1:4, ncol(sce), replace = TRUE) +# sce$cluster_k <- paste0("k", sce$cluster_int) +sce$cluster_j <- paste0(sce$cluster_int, "j") +sce$cluster_l <- sample(c("L-1", "L2/3", "4L", "L5"), ncol(sce), replace = TRUE) + +table(sce$cluster_j) + +test_that( + "Numeric var_regisration throws warning", + expect_warning( + registration_wrapper( + sce, + var_registration = "cluster_int", + var_sample_id = "sample_id", + covars = c("age"), + gene_ensembl = "ensembl", + gene_name = "gene_name", + suffix = "wrapper" + ) + ) +) + +test_that( + "Non-Syntactic throws warning", + expect_warning( + registration_wrapper( + sce, + var_registration = "cluster_l", + var_sample_id = "sample_id", + covars = c("age"), + gene_ensembl = "ensembl", + gene_name = "gene_name", + suffix = "wrapper" + ) + ) +) diff --git a/tests/testthat/test-vis_clus.R b/tests/testthat/test-vis_clus.R new file mode 100644 index 00000000..ab011efb --- /dev/null +++ b/tests/testthat/test-vis_clus.R @@ -0,0 +1,17 @@ +test_that( + "vis_clus", + { + if (!exists("spe")) spe <- fetch_data("spe") + + # Bad spatialCoords + spe_temp <- spe + colnames(spatialCoords(spe_temp)) <- c("a", "b") + expect_error( + { + p <- vis_clus(spe_temp, clustervar = "sample_id") + }, + "^Abnormal spatial coordinates" + ) + rm(spe_temp) + } +) diff --git a/tests/testthat/test-vis_gene.R b/tests/testthat/test-vis_gene.R new file mode 100644 index 00000000..b4a171c1 --- /dev/null +++ b/tests/testthat/test-vis_gene.R @@ -0,0 +1,67 @@ +test_that( + "vis_gene", + { + if (!exists("spe")) spe <- fetch_data("spe") + + # Non-numeric column to plot + expect_error( + { + p <- vis_gene( + spe, + geneid = c("sum_umi", rownames(spe)[1], "layer_guess") + ) + }, + "'geneid' can not contain non-numeric colData columns\\." + ) + + # Bad sample ID + expect_error( + { + p <- vis_gene( + spe, + geneid = c("sum_umi", rownames(spe)[1]), + sampleid = "aaa" + ) + }, + "'spe\\$sample_id' must exist and contain the ID aaa" + ) + + # Bad assayname + expect_error( + { + p <- vis_gene( + spe, + geneid = c("sum_umi", rownames(spe)[1]), + assayname = "aaa" + ) + }, + "'aaa' is not an assay in 'spe'" + ) + + # Bad geneid + expect_error( + { + p <- vis_gene(spe, geneid = "aaa") + }, + "Could not find the 'geneid'\\(s\\) aaa" + ) + + # Trivially check success with legitimate input + expect_equal( + class(vis_gene(spe, geneid = c("sum_umi", rownames(spe)[1]))), + c("gg", "ggplot") + ) + + + # Bad spatialCoords + spe_temp <- spe + colnames(spatialCoords(spe_temp)) <- c("a", "b") + expect_error( + { + p <- vis_gene(spe_temp, geneid = "sum_umi") + }, + "^Abnormal spatial coordinates" + ) + rm(spe_temp) + } +) diff --git a/vignettes/guide_to_spatial_registration.Rmd b/vignettes/guide_to_spatial_registration.Rmd new file mode 100644 index 00000000..e1442a79 --- /dev/null +++ b/vignettes/guide_to_spatial_registration.Rmd @@ -0,0 +1,378 @@ +--- +title: "Guide to Spatial Registration" +author: + - name: Louise Huuki-Myers + affiliation: + - Lieber Institute for Brain Development + email: lahuuki@gmail.com +output: + BiocStyle::html_document: + self_contained: yes + toc: true + toc_float: true + toc_depth: 2 + code_folding: show +date: "`r doc_date()`" +package: "`r pkg_ver('spatialLIBD')`" +vignette: > + %\VignetteIndexEntry{Guide to Spatial Registration} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r setup, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>", + crop = NULL ## Related to https://stat.ethz.ch/pipermail/bioc-devel/2020-April/016656.html +) +``` + + +```{r vignetteSetup, echo=FALSE, message=FALSE, warning = FALSE} +## Track time spent on making the vignette +startTime <- Sys.time() + +## Bib setup +library("RefManageR") + +## Write bibliography information +bib <- c( + R = citation(), + BiocStyle = citation("BiocStyle")[1], + knitr = citation("knitr")[1], + RefManageR = citation("RefManageR")[1], + rmarkdown = citation("rmarkdown")[1], + sessioninfo = citation("sessioninfo")[1], + testthat = citation("testthat")[1], + spatialLIBD = citation("spatialLIBD")[1], + spatialLIBDpaper = citation("spatialLIBD")[2], + tran2021 = RefManageR::BibEntry( + bibtype = "Article", + key = "tran2021", + author = "Tran, Matthew N. and Maynard, Kristen R. and Spangler, Abby and Huuki, Louise A. and Montgomery, Kelsey D. and Sadashivaiah, Vijay and Tippani, Madhavi and Barry, Brianna K. and Hancock, Dana B. and Hicks, Stephanie C. and Kleinman, Joel E. and Hyde, Thomas M. and Collado-Torres, Leonardo and Jaffe, Andrew E. and Martinowich, Keri", + title = "Single-nucleus transcriptome analysis reveals cell-type-specific molecular signatures across reward circuitry in the human brain", + year = 2021, doi = "10.1016/j.neuron.2021.09.001", + journal = "Neuron" + ) +) +``` + + +# What is Spatial Registration? + +Spatial Registration is an analysis that compares the gene expression of groups +in a query RNA-seq data set (typically spatially resolved RNA-seq or single cell RNA-seq) to +groups in a reference spatially resolved RNA-seq data set (such annotated anatomical features). + +For spatial data, this can be helpful to compare manual annotations, +or annotating clusters. For scRNA-seq data it can check if +a cell type might be more concentrated in one area or anatomical feature of the +tissue. + +The spatial annotation process correlates the $t$-statistics from the gene enrichment +analysis between spatial features from the reference data set, with the $t$-statistics +from the gene enrichment of features in the query data set. Pairs with high +positive correlation show where similar patterns of gene expression are occurring +and what anatomical feature the new spatial feature or cell population may map to. + +## Overview of the Spatial Registration method + +1. Perform gene set enrichment analysis between spatial features (ex. anatomical +features, histological layers) on reference spatial data set. Or access existing statistics. + +2. Perform gene set enrichment analysis between features (ex. new +annotations, data-driven clusters) on new query data set. + +3. Correlate the $t$-statistics between the reference and query features. + +4. Annotate new spatial features with the most strongly associated reference feature. + +5. Plot correlation heat map to observe patterns between the two data sets. + +

+![Spatial Registration Overview](http://research.libd.org/spatialLIBD/reference/figures/spatial_registration.png){width=100%} +

+ + +# How to run Spatial Registration with `spatialLIBD` tools + +## Introduction. + +In this example we will utilize the human DLPFC 10x Genomics Visium dataset +from Maynard, Collado-Torres et al. `r Citep(bib[['spatialLIBDpaper']])` as the **reference**. +This data contains manually annotated features: the **six cortical layers + white matter** +present in the DLPFC. We will use the pre-calculated enrichment statistics for the +layers, which are available from `r Biocpkg("spatialLIBD")`. + +

+ +![Dotplot of sample from refernce DLPFC data, colored by annotated layers](http://research.libd.org/spatialLIBD/reference/figures/README-access_data-1.png){width=100%} +

+ + +The **query** dataset will be the DLPFC single nucleus RNA-seq (snRNA-seq) data from `r Citep(bib[['tran2021']])`. + +We will compare the gene expression in the cell type populations of the **query** +dataset to the annotated **layers** in the **reference**. + + + +## Important Notes + +### Required knowledge + +It may be helpful to review _Introduction to spatialLIBD_ vignette available through [GitHub](http://research.libd.org/spatialLIBD/articles/spatialLIBD.html) or [Bioconductor](https://bioconductor.org/packages/spatialLIBD) for more information about this data set and R package. + + +### Citing `spatialLIBD` + +We hope that `r Biocpkg("spatialLIBD")` will be useful for your research. Please use the following information to cite the package and the overall approach. Thank you! + +```{r "citation"} +## Citation info +citation("spatialLIBD") +``` + +## Setup + +### Install `spatialLIBD` + +```{r "install", eval = FALSE} +if (!requireNamespace("BiocManager", quietly = TRUE)) { + install.packages("BiocManager") +} + +BiocManager::install("spatialLIBD") + +## Check that you have a valid Bioconductor installation +BiocManager::valid() +``` + +### Load required packages + +```{r "start", message=FALSE} +library("spatialLIBD") +library("SingleCellExperiment") +``` + +## Download Data + +### Spatial Reference + +The reference data is easily accessed through `r Biocpkg("spatialLIBD")`. The modeling results +for the annotated layers is already calculated and can be accessed with the `fetch_data()` function. + +This data contains the results form three models (anova, enrichment, and pairwise), +we will use the **enrichment** results for spatial registration. The tables contain the +$t$-statistics, p-values, and gene ensembl ID and symbol. + +```{r "fetch_refrence"} +## get reference layer enrichment statistics +layer_modeling_results <- fetch_data(type = "modeling_results") + +layer_modeling_results$enrichment[1:5, 1:5] +``` + + +### Query Data: snRNA-seq + +For the query data set, we will use the public single nucleus RNA-seq (snRNA-seq) +data from `r Citep(bib[['tran2021']])` can be accessed on [github](https://github.com/LieberInstitute/10xPilot_snRNAseq-human#processed-data). + +This data is also from postmortem human brain DLPFC, and contains gene +expression data for 11k nuclei and 19 cell types. + +We will use `BiocFileCache()` to cache this data. It is stored as a `SingleCellExperiment` +object named `sce.dlpfc.tran`, and takes 1.01 GB of RAM memory to load. + +```{r "download_sce_data"} +# Download and save a local cache of the data available at: +# https://github.com/LieberInstitute/10xPilot_snRNAseq-human#processed-data +bfc <- BiocFileCache::BiocFileCache() +url <- paste0( + "https://libd-snrnaseq-pilot.s3.us-east-2.amazonaws.com/", + "SCE_DLPFC-n3_tran-etal.rda" +) +local_data <- BiocFileCache::bfcrpath(url, x = bfc) + +load(local_data, verbose = TRUE) +``` + + +DLPFC tissue consists of many cell types, some are quite rare and will not have enough data to complete the analysis + +```{r "check_cell_types"} +table(sce.dlpfc.tran$cellType) +``` + +The data will be pseudo-bulked over `donor` x `cellType`, it is recommended to drop +groups with < 10 nuclei (this is done automatically in the pseudobulk step). + +```{r "donor_x_cellType"} +table(sce.dlpfc.tran$donor, sce.dlpfc.tran$cellType) +``` + + +## Get Enrichment statistics for snRNA-seq data + +`spatialLIBD` contains many functions to compute `modeling_results` for the query sc/snRNA-seq or spatial data. + +**The process includes the following steps** + +1. `registration_pseudobulk()`: Pseudo-bulks data, filter low expressed genes, and normalize counts +2. `registration_mod()`: Defines the statistical model that will be used for computing the block correlation +3. `registration_block_cor()` : Computes the block correlation using the sample ID as the blocking factor, used as correlation in eBayes call +2. `registration_stats_enrichment()` : Computes the gene enrichment $t$-statistics (one group vs. All other groups) + +The function `registration_wrapper()` makes life easy by wrapping these functions together in to one step! + +```{r "run_registration_wrapper"} +## Perform the spatial registration +sce_modeling_results <- registration_wrapper( + sce = sce.dlpfc.tran, + var_registration = "cellType", + var_sample_id = "donor", + gene_ensembl = "gene_id", + gene_name = "gene_name" +) +``` + +## Extract Enrichment t-statistics + +```{r "extract_t_stats"} +## check out table on enrichment t-statistics +sce_modeling_results$enrichment[1:5, 1:5] +``` + + +## Correlate statsics with Layer Reference + +```{r "layer_stat_cor"} +cor_layer <- layer_stat_cor( + stats = sce_modeling_results$enrichment, + modeling_results = layer_modeling_results, + model_type = "enrichment", + top_n = 100 +) + +cor_layer +``` + +# Explore Results + +Now we can use these correlation values to learn about the cell types. + +## Create Heatmap of Correlations + +We can see from this heatmap what layers the different cell types are associated with. + +* Oligo with WM + +* Astro with Layer 1 + +* Excitatory neurons to different layers of the cortex + +* Weak associate with Inhibitory Neurons + + +```{r layer_cor_plot} +layer_stat_cor_plot(cor_layer) +``` + +## Annotate Cell Types by Top Correlation + +We can use `annotate_registered_clusters` to create annotation labels for the +cell types based on the correlation values. + +```{r "annotate"} +anno <- annotate_registered_clusters( + cor_stats_layer = cor_layer, + confidence_threshold = 0.25, + cutoff_merge_ratio = 0.25 +) + +anno +``` + +## Plot Annotated Cell Types + +Finally, we can update our heatmap with colors and annotations based on cluster +registration for the snRNA-seq clusters. + +```{r "plot_anno"} +layer_stat_cor_plot( + cor_layer, + query_colors = get_colors(clusters = rownames(cor_layer)), + reference_colors = libd_layer_colors, + annotation = anno, + cluster_rows = FALSE, + cluster_columns = FALSE +) +``` + + +# Reproducibility + +The `r Biocpkg("spatialLIBD")` package `r Citep(bib[["spatialLIBD"]])` was made possible thanks to: + +* R `r Citep(bib[["R"]])` +* `r Biocpkg("BiocStyle")` `r Citep(bib[["BiocStyle"]])` +* `r CRANpkg("knitr")` `r Citep(bib[["knitr"]])` +* `r CRANpkg("RefManageR")` `r Citep(bib[["RefManageR"]])` +* `r CRANpkg("rmarkdown")` `r Citep(bib[["rmarkdown"]])` +* `r CRANpkg("sessioninfo")` `r Citep(bib[["sessioninfo"]])` +* `r CRANpkg("testthat")` `r Citep(bib[["testthat"]])` + +This package was developed using `r BiocStyle::Biocpkg("biocthis")`. + + +Code for creating the vignette + +```{r createVignette, eval=FALSE} +## Create the vignette +library("rmarkdown") +system.time(render("guide_to_spatial_registration.Rmd", "BiocStyle::html_document")) + +## Extract the R code +library("knitr") +knit("guide_to_spatial_registration.Rmd", tangle = TRUE) +``` + +Date the vignette was generated. + +```{r reproduce1, echo=FALSE} +## Date the vignette was generated +Sys.time() +``` + +Wallclock time spent generating the vignette. + +```{r reproduce2, echo=FALSE} +## Processing time in seconds +totalTime <- diff(c(startTime, Sys.time())) +round(totalTime, digits = 3) +``` + +`R` session information. + +```{r reproduce3, echo=FALSE} +## Session info +library("sessioninfo") +options(width = 120) +session_info() +``` + + + +# Bibliography + +This vignette was generated using `r Biocpkg("BiocStyle")` `r Citep(bib[["BiocStyle"]])` +with `r CRANpkg("knitr")` `r Citep(bib[["knitr"]])` and `r CRANpkg("rmarkdown")` `r Citep(bib[["rmarkdown"]])` running behind the scenes. + +Citations made with `r CRANpkg("RefManageR")` `r Citep(bib[["RefManageR"]])`. + +```{r vignetteBiblio, results = "asis", echo = FALSE, warning = FALSE, message = FALSE} +## Print bibliography +PrintBibliography(bib, .opts = list(hyperlink = "to.doc", style = "html")) +``` diff --git a/vignettes/multi_gene_plots.Rmd b/vignettes/multi_gene_plots.Rmd new file mode 100644 index 00000000..41d3eac0 --- /dev/null +++ b/vignettes/multi_gene_plots.Rmd @@ -0,0 +1,350 @@ +--- +title: "Guide to Multi-Gene Plots" +author: + - name: Nicholas J. Eagles + affiliation: + - &libd Lieber Institute for Brain Development, Johns Hopkins Medical Campus + email: nickeagles77@gmail.com + - name: Leonardo Collado-Torres + affiliation: + - *libd + - &biostats Department of Biostatistics, Johns Hopkins Bloomberg School of Public Health + email: lcolladotor@gmail.com +output: + BiocStyle::html_document: + self_contained: yes + toc: true + toc_float: true + toc_depth: 2 + code_folding: show +date: "`r doc_date()`" +package: "`r pkg_ver('spatialLIBD')`" +vignette: > + %\VignetteIndexEntry{Guide to Multi-Gene Plots} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +```{r vignetteSetup, echo=FALSE, message=FALSE, warning = FALSE} +## For links +library("BiocStyle") + +## Track time spent on making the vignette +startTime <- Sys.time() + +## Bib setup +library("RefManageR") + +## Write bibliography information +bib <- c( + R = citation(), + BiocStyle = citation("BiocStyle")[1], + knitr = citation("knitr")[3], + MatrixGenerics = citation("MatrixGenerics")[1], + RColorBrewer = citation("RColorBrewer")[1], + RefManageR = citation("RefManageR")[1], + rmarkdown = citation("rmarkdown")[1], + sessioninfo = citation("sessioninfo")[1], + SpatialExperiment = citation("SpatialExperiment")[1], + spatialLIBD = citation("spatialLIBD")[1], + HumanPilot = citation("spatialLIBD")[2], + spatialDLPFC = citation("spatialLIBD")[3], + tran2021 = RefManageR::BibEntry( + bibtype = "Article", + key = "tran2021", + author = "Tran, Matthew N. and Maynard, Kristen R. and Spangler, Abby and Huuki, Louise A. and Montgomery, Kelsey D. and Sadashivaiah, Vijay and Tippani, Madhavi and Barry, Brianna K. and Hancock, Dana B. and Hicks, Stephanie C. and Kleinman, Joel E. and Hyde, Thomas M. and Collado-Torres, Leonardo and Jaffe, Andrew E. and Martinowich, Keri", + title = "Single-nucleus transcriptome analysis reveals cell-type-specific molecular signatures across reward circuitry in the human brain", + year = 2021, doi = "10.1016/j.neuron.2021.09.001", + journal = "Neuron" + ) +) +``` + +One of the goals of `spatialLIBD` is to provide options for visualizing Visium data by 10x Genomics. In +particular, `vis_gene()` and `vis_clus()` allow plotting of individual continuous or +discrete quantities belonging to each Visium spot, in a spatially accurate manner and +optionally atop histology images. + +This vignette explores a more complex capability of `vis_gene()`: to visualize a summary +metric of several continuous variables simultaneously. We'll start with a basic one-gene +use case for `vis_gene()` before moving to more advanced cases. + +First, let's load some example data for us to work on. This data is a subset from a recent publication with Visium data from the dorsolateral prefrontal cortex (DLPFC) `r Citep(bib[['spatialDLPFC']])`. + +```{r "setup", message = FALSE, warning = FALSE} +library("spatialLIBD") +spe <- fetch_data(type = "spatialDLPFC_Visium_example_subset") +spe +``` + +Next, let's define several genes known to be markers for white matter `r Citep(bib[['tran2021']])`. + +```{r "white_matter_genes"} +white_matter_genes <- c("GFAP", "AQP4", "MBP", "PLP1") +white_matter_genes <- rowData(spe)$gene_search[ + rowData(spe)$gene_name %in% white_matter_genes +] + +## Our list of white matter genes +white_matter_genes +``` + +# Plotting One Gene + +A typical use of `vis_gene()` involves +plotting the spatial distribution of a single gene or continuous variable of interest. +For example, let's plot just the expression of *GFAP*. + +```{r "single_gene"} +vis_gene( + spe, + geneid = white_matter_genes[1], + point_size = 1.5 +) +``` + +We can see a little **V** shaped section with higher expression of this gene. This seems to mark the location of layer 1. The bottom right corner seems to mark the location of white matter. + +```{r "histology_only"} +plot(imgRaster(spe)) +``` + +This particular gene is known to have high expression in both layer 1 and white matter in the dorsolateral prefrontal cortex as can be seen below `r Citep(bib[['HumanPilot']])`. It's the 386th highest ranked white matter marker gene based on the enrichment test. + +```{r "GFAP_boxplot"} +modeling_results <- fetch_data(type = "modeling_results") +sce_layer <- fetch_data(type = "sce_layer") +sig_genes <- sig_genes_extract_all( + n = 400, + modeling_results = modeling_results, + sce_layer = sce_layer +) +i_gfap <- subset(sig_genes, gene == "GFAP" & + test == "WM")$top +i_gfap +set.seed(20200206) +layer_boxplot( + i = i_gfap, + sig_genes = sig_genes, + sce_layer = sce_layer +) +``` + +# Plotting Multiple Genes + +As of version 1.15.2, the `geneid` parameter to `vis_gene()` may also take a vector of genes or continuous +variables in `colData(spe)`. In this way, the expression of multiple continuous variables can be summarized +into a single value for each spot, displayed just as a single input for `geneid` would be. +`spatialLIBD` provides three methods for merging the information from multiple continuous +variables, which may be specified through the `multi_gene_method` parameter to `vis_gene()`. + +## Averaging Z-scores + +The default is `multi_gene_method = "z_score"`. Essentially, each continuous variable (could be a mix of genes with spot-level covariates) is +normalized to be a Z-score by centering and scaling. If a particular spot has a value of `1` for a particular continuous variable, +this would indicate that spot has expression one standard deviation above the mean expression +across all spots for that continuous variable. Next, for each spot, Z-scores are averaged across continuous variables. +Compared to simply averaging raw gene expression across genes, the `"z_score"` method +is insensitive to absolute expression levels (highly expressed genes don't dominate plots), +and instead focuses on how each gene varies spatially, weighting each gene equally. + +Let's plot all four white matter genes using this method. + +```{r "multi_gene_z"} +vis_gene( + spe, + geneid = white_matter_genes, + multi_gene_method = "z_score", + point_size = 1.5 +) +``` + +Now the bottom right corner where the white matter is located starts to pop up more, though the mixed layer 1 and white matter signal provided by *GFAP* is still noticeable (the **V** shape). + +## Summarizing with PCA + +Another option is `multi_gene_method = "pca"`. A matrix is formed, where genes or continuous +features are columns, and spots are rows. PCA is performed, and the first principal component +is plotted spatially. The idea is that the first PC captures the dominant spatial signature +of the feature set. Next, its direction is reversed if the majority of coefficients (from the +"rotation matrix") across features are negative. When the features are genes whose expression +is highly correlated (like our white-matter-gene example!), this optional reversal encourages +higher values in the plot to represent areas of higher expression of the features. For our case, +this leads to the intuitive result that "expression" is higher in white matter for white-matter +genes, which is not otherwise guaranteed (the "sign" of PCs is arbitrary)! + +```{r "multi_gene_pca"} +vis_gene( + spe, + geneid = white_matter_genes, + multi_gene_method = "pca", + point_size = 1.5 +) +``` + +## Plotting Sparsity of Expression + +This final option is `multi_gene_method = "sparsity"`. For each spot, the proportion of features +with positive expression is plotted. This method is typically only meaningful when features +are raw gene counts that are expected to be quite sparse (have zero counts) at certain regions +of the tissue and not others. It also performs better with a larger number of genes; with our +example of four white-matter genes, the proportion may only hold values of 0, 0.25, 0.5, 0.75, +and 1, which is not visually informative. + +The white-matter example is thus poor due to lack of sparsity and low number of genes as you can see below. + +```{r "multi_gene_sparsity"} +vis_gene( + spe, + geneid = white_matter_genes, + multi_gene_method = "sparsity", + point_size = 1.5 +) +``` + +# With more marker genes + +Below we can plot via `multi_gene_method = "z_score"` the top 25 or top 50 white matter marker genes identified via the enrichment test in a previous dataset `r Citep(bib[['HumanPilot']])`. + +```{r "multi_gene_z_score_top_enriched"} +vis_gene( + spe, + geneid = subset(sig_genes, test == "WM")$ensembl[seq_len(25)], + multi_gene_method = "z_score", + point_size = 1.5 +) + +vis_gene( + spe, + geneid = subset(sig_genes, test == "WM")$ensembl[seq_len(50)], + multi_gene_method = "z_score", + point_size = 1.5 +) +``` + +We can repeat this process for `multi_gene_method = "pca"`. + +```{r "multi_gene_pca_top_enriched"} +vis_gene( + spe, + geneid = subset(sig_genes, test == "WM")$ensembl[seq_len(25)], + multi_gene_method = "pca", + point_size = 1.5 +) + +vis_gene( + spe, + geneid = subset(sig_genes, test == "WM")$ensembl[seq_len(50)], + multi_gene_method = "pca", + point_size = 1.5 +) +``` + +And finally, lets look at the results of `multi_gene_method = "sparsity"`. + +```{r "multi_gene_sparsity_top_enriched"} +vis_gene( + spe, + geneid = subset(sig_genes, test == "WM")$ensembl[seq_len(25)], + multi_gene_method = "sparsity", + point_size = 1.5 +) + +vis_gene( + spe, + geneid = subset(sig_genes, test == "WM")$ensembl[seq_len(50)], + multi_gene_method = "sparsity", + point_size = 1.5 +) +``` + +In this case, it seems that for both the top 25 or top 50 marker genes, `z_score` and `pca` provided cleaner visualizations than `sparsity`. Give them a try on your own datasets! + +# Visualizing non-gene continuous variables + +So far, we have only visualized multiple genes. But these methods can be applied to several continuous variables stored in `colData(spe)` as shown below. + +```{r "colData_example"} +vis_gene( + spe, + geneid = c("sum_gene", "sum_umi"), + multi_gene_method = "z_score", + point_size = 1.5 +) +``` + +We can also combine continuous variables from `colData(spe)` along with actual genes. We can combine for example the expression of *GFAP*, which is a known astrocyte marker gene, with the spot deconvolution results for astrocytes computed using Tangram `r Citep(bib[['spatialDLPFC']])`. + +```{r "colData_plus_gene"} +vis_gene( + spe, + geneid = c("broad_tangram_astro"), + point_size = 1.5 +) +vis_gene( + spe, + geneid = c("broad_tangram_astro", white_matter_genes[1]), + multi_gene_method = "pca", + point_size = 1.5 +) +``` + +These tools enable you to further explore your data in new ways. Have fun using them! + + +# Reproducibility + +Code for creating the vignette + +```{r createVignette, eval=FALSE} +## Create the vignette +library("rmarkdown") +system.time(render("multi_gene_plots.Rmd")) + +## Extract the R code +library("knitr") +knit("multi_gene_plots.Rmd", tangle = TRUE) +``` + + +Date the vignette was generated. + +```{r reproduce1, echo=FALSE} +## Date the vignette was generated +Sys.time() +``` + +Wallclock time spent generating the vignette. + +```{r reproduce2, echo=FALSE} +## Processing time in seconds +totalTime <- diff(c(startTime, Sys.time())) +round(totalTime, digits = 3) +``` + +`R` session information. + +```{r reproduce3, echo=FALSE} +## Session info +library("sessioninfo") +options(width = 120) +session_info() +``` + +# Bibliography + +This vignette was generated using `r Biocpkg('BiocStyle')` `r Citep(bib[['BiocStyle']])`, `r CRANpkg('knitr')` `r Citep(bib[['knitr']])` and `r CRANpkg('rmarkdown')` `r Citep(bib[['rmarkdown']])` running behind the scenes. + +Citations made with `r CRANpkg('RefManageR')` `r Citep(bib[['RefManageR']])`. + +```{r vignetteBiblio, results = 'asis', echo = FALSE, warning = FALSE, message = FALSE} +## Print bibliography +PrintBibliography(bib, .opts = list(hyperlink = "to.doc", style = "html")) +``` diff --git a/vignettes/spatialLIBD.Rmd b/vignettes/spatialLIBD.Rmd index 4e86fa60..694d47d9 100644 --- a/vignettes/spatialLIBD.Rmd +++ b/vignettes/spatialLIBD.Rmd @@ -49,6 +49,8 @@ bib <- c( BiocFileCache = citation("BiocFileCache")[1], BiocGenerics = citation("BiocGenerics")[1], BiocStyle = citation("BiocStyle")[1], + circlize = citation("circlize")[1], + ComplexHeatmap = citation("ComplexHeatmap")[1], cowplot = citation("cowplot")[1], DT = citation("DT")[1], edgeR = citation("edgeR")[1], @@ -77,6 +79,8 @@ bib <- c( SpatialExperiment = citation("SpatialExperiment")[1], spatialLIBD = citation("spatialLIBD")[1], spatialLIBDpaper = citation("spatialLIBD")[2], + spatialDLPFC = citation("spatialLIBD")[3], + VisiumSPGAD = citation("spatialLIBD")[4], statmod = citation("statmod")[1], SummarizedExperiment = citation("SummarizedExperiment")[1], testthat = citation("testthat")[1], @@ -464,7 +468,7 @@ Once we have computed this correlation matrix, we can then visualize it using `l ```{r 'layer_stat_cor_plot', fig.wide = TRUE} ## Visualize the correlation matrix -layer_stat_cor_plot(cor_stats_layer, max = max(cor_stats_layer)) +layer_stat_cor_plot(cor_stats_layer) ``` In order to fully interpret the resulting heatmap you need to know what each of the cell clusters labels mean. In this case, the syntax is `xx (Y)` where `xx` is the cluster number and `Y` is: @@ -478,7 +482,7 @@ In order to fully interpret the resulting heatmap you need to know what each of You can find the version with the full names [here](https://github.com/LieberInstitute/HumanPilot/blob/master/Analysis/Layer_Guesses/pdf/snRNAseq_overlap_heatmap.pdf) if you are interested in it. -The `r CRANpkg('shiny')` application provided by `r Biocpkg('spatialLIBD')` `r Citep(bib[['spatialLIBD']])` allows users to upload CSV file with these t-statistics, view the correlation heatmaps, download them, and download the correlation matrix. An example CSV file is provided [here](https://github.com/LieberInstitute/spatialLIBD/blob/master/data-raw/tstats_Human_DLPFC_snRNAseq_Nguyen_topLayer.csv). +The `r CRANpkg('shiny')` application provided by `r Biocpkg('spatialLIBD')` `r Citep(bib[['spatialLIBD']])` allows users to upload CSV file with these t-statistics, view the correlation heatmaps, download them, and download the correlation matrix. An example CSV file is provided [here](https://github.com/LieberInstitute/spatialLIBD/blob/devel/data-raw/tstats_Human_DLPFC_snRNAseq_Nguyen_topLayer.csv). ## Gene set enrichment @@ -529,7 +533,7 @@ gene_set_enrichment_plot( ) ``` -The `r CRANpkg('shiny')` application provided by `r Biocpkg('spatialLIBD')` `r Citep(bib[['spatialLIBD']])` allows users to upload CSV file their gene lists, compute the enrichment statistics, visualize them, download the PDF, and download the enrichment table. An example CSV file is provided [here](https://github.com/LieberInstitute/spatialLIBD/blob/master/data-raw/asd_sfari_geneList.csv). +The `r CRANpkg('shiny')` application provided by `r Biocpkg('spatialLIBD')` `r Citep(bib[['spatialLIBD']])` allows users to upload CSV file their gene lists, compute the enrichment statistics, visualize them, download the PDF, and download the enrichment table. An example CSV file is provided [here](https://github.com/LieberInstitute/spatialLIBD/blob/devel/data-raw/asd_sfari_geneList.csv). # Re-shaping your data to our structure @@ -567,10 +571,41 @@ If you are interested in reshaping your data to fit our structure, we do not pro * `Layer_Notebook.R` available [here](https://github.com/LieberInstitute/HumanPilot/blob/master/Analysis/Layer_Notebook.R) reads in the Visium data and builds a list of `RangeSummarizedExperiment()` objects from `r Biocpkg('SummarizedExperiment')`, one per sample (image) that is eventually saved as `Human_DLPFC_Visium_processedData_rseList.rda`. * `convert_sce.R` available [here](https://github.com/LieberInstitute/HumanPilot/blob/master/Analysis/convert_sce.R) reads in `Human_DLPFC_Visium_processedData_rseList.rda` and builds an initial `sce` object with image data under `metadata(sce)$image` which is a single data.frame. Subsetting doesn't automatically subset the image, so you have to do it yourself when plotting as is done by `vis_clus_p()` and `vis_gene_p()`. Having the data from all images in a single object allows you to use the spot-level data from all images to compute clusters and do other similar analyses to the ones you would do with sc/snRNA-seq data. The script creates the `Human_DLPFC_Visium_processedData_sce.Rdata` file. * `sce_scran.R` available [here](https://github.com/LieberInstitute/HumanPilot/blob/master/Analysis/sce_scran.R) then uses `r Biocpkg('scran')` to read in `Human_DLPFC_Visium_processedData_sce.Rdata`, compute the highly variable genes (stored in our final `sce` object at `rowData(sce)$is_top_hvg`), perform dimensionality reduction (PCA, TSNE, UMAP) and identify clusters using the data from all images. The resulting data is then stored as `Human_DLPFC_Visium_processedData_sce_scran.Rdata` and is the main object used throughout our analysis code `r Citep(bib[['spatialLIBDpaper']])`. -* `make-data_spatialLIBD.R` available in the source version of `spatialLIBD` and [online here](https://github.com/LieberInstitute/spatialLIBD/blob/master/inst/scripts/make-data_spatialLIBD.R) is the script that reads in `Human_DLPFC_Visium_processedData_sce_scran.Rdata` as well as some other outputs from our analysis and combines them into the final `sce` and `sce_layer` objects provided by `r Biocpkg('spatialLIBD')` `r Citep(bib[['spatialLIBD']])`. This script simplifies some operations in order to simplify the code behind the `r CRANpkg('shiny')` application provided by `r Biocpkg('spatialLIBD')`. +* `make-data_spatialLIBD.R` available in the source version of `spatialLIBD` and [online here](https://github.com/LieberInstitute/spatialLIBD/blob/devel/inst/scripts/make-data_spatialLIBD.R) is the script that reads in `Human_DLPFC_Visium_processedData_sce_scran.Rdata` as well as some other outputs from our analysis and combines them into the final `sce` and `sce_layer` objects provided by `r Biocpkg('spatialLIBD')` `r Citep(bib[['spatialLIBD']])`. This script simplifies some operations in order to simplify the code behind the `r CRANpkg('shiny')` application provided by `r Biocpkg('spatialLIBD')`. -You don't necessarily need to do all of this to use the functions provided by `r Biocpkg('spatialLIBD')`. Note that external to the R objects, for the `r CRANpkg('shiny')` application provided by `r Biocpkg('spatialLIBD')` you will need to have the `tissue_lowres_image.png` image files in a directory structure by sample as shown [here](https://github.com/LieberInstitute/spatialLIBD/tree/master/inst/app/www/data) in order for the interactive visualizations made with `r CRANpkg('plotly')` to work. +You don't necessarily need to do all of this to use the functions provided by `r Biocpkg('spatialLIBD')`. Note that external to the R objects, for the `r CRANpkg('shiny')` application provided by `r Biocpkg('spatialLIBD')` you will need to have the `tissue_lowres_image.png` image files in a directory structure by sample as shown [here](https://github.com/LieberInstitute/spatialLIBD/tree/devel/inst/app/www/data) in order for the interactive visualizations made with `r CRANpkg('plotly')` to work. +# More spatially-resolved LIBD datasets + +Over time `spatialLIBD::fetch_data()` has been expanded to provide access to other datasets generated by our teams at the Lieber Institute for Brain Development ([LIBD](libd.org)) that have also been analyzed with `spatialLIBD`. + +## spatialDLPFC + +Through `spatialLIBD::fetch_data()` you can also download the data from the [_Integrated single cell and unsupervised spatial transcriptomic analysis defines molecular anatomy of the human dorsolateral prefrontal cortex_](https://www.biorxiv.org/content/10.1101/2023.02.15.528722v1) project, also known as `spatialDLPFC` `r Citep(bib[['spatialDLPFC']])`. See http://research.libd.org/spatialDLPFC/ for more information about this project. + +See the Twitter thread 🧵 below for a brief overview of the [`#spatialDLPFC`](https://twitter.com/search?q=%23spatialDLPFC&src=typed_query) project. + + + +## Visium_SPG_AD + +Through `spatialLIBD::fetch_data()` you can also download the data from the [_Influence of Alzheimer’s disease related neuropathology on local microenvironment gene expression in the human inferior temporal cortex _](https://www.biorxiv.org/content/10.1101/2023.04.TODO) project, also known as `Visium_SPG_AD` `r Citep(bib[['VisiumSPGAD']])`. See http://research.libd.org/Visium_SPG_AD/ for more information about this project. + +See the Twitter thread 🧵 below for a brief overview of the [`#Visium_SPG_AD`](https://twitter.com/search?q=%23Visium_SPG_AD&src=typed_query) project. + +TODO + +## LIBD data outside `spatialLIBD` + +Sometimes our collaborators have shared data through other venues. So not all LIBD spatially-resolved transcriptomics data from the [Keri Martinowich](https://www.libd.org/team/keri-martinowich-phd/), [Kristen Maynard](https://www.libd.org/team/kristen-maynard-phd/), and [Leonardo Collado-Torres](http://lcolladotor.github.io/) teams has been released through `spatialLIBD`. However, it is very much compatible with `spatialLIBD` and can be analyzed or visualized with `spatialLIBD` functions. + +### locus-c + +[_The gene expression landscape of the human locus coeruleus revealed by single-nucleus and spatially-resolved transcriptomics_](https://www.biorxiv.org/content/10.1101/2022.10.28.514241v1), also known as `locus-c`, is not available through `spatialLIBD`, but you might be interested in checking out the excellent `r Biocpkg("WeberDivechaLCdata")` package for more details. See https://github.com/lmweber/locus-c for more details about the `locus-c` project. + +See the Twitter thread 🧵 below for a brief overview of the `locus-c` project. + + # Reproducibility @@ -582,6 +617,8 @@ The `r Biocpkg('spatialLIBD')` package `r Citep(bib[['spatialLIBD']])` was made * `r Biocpkg('BiocFileCache')` `r Citep(bib[['BiocFileCache']])` * `r Biocpkg('BiocGenerics')` `r Citep(bib[['BiocGenerics']])` * `r Biocpkg('BiocStyle')` `r Citep(bib[['BiocStyle']])` +* `r CRANpkg('circlize')` `r Citep(bib[['circlize']])` +* `r Biocpkg('ComplexHeatmap')` `r Citep(bib[['ComplexHeatmap']])` * `r CRANpkg('cowplot')` `r Citep(bib[['cowplot']])` * `r CRANpkg('DT')` `r Citep(bib[['DT']])` * `r Biocpkg('edgeR')` `r Citep(bib[['edgeR']])`