This repository has been archived by the owner on Sep 14, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 25
/
literate-programming-demo.R
65 lines (52 loc) · 2.85 KB
/
literate-programming-demo.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
## ----one time setup, eval=FALSE------------------------------------------
## ### To install the bigrquery package. The currently released version 0.3.0 does not yet
## ### have the parameter to use Standard SQL instead of Legacy SQL, so we install from github.
## library(devtools)
## install_github('rstats-db/bigrquery')
## ----initialize, message=FALSE, warning=FALSE----------------------------
library(bigrquery)
library(ggplot2)
library(xtable)
## ----eval=FALSE----------------------------------------------------------
## ######################[ CHANGE ME ]##################################
## # This codelab assumes that the current working directory is where the Rmd file resides.
## setwd("/YOUR/PATH/TO/getting-started-bigquery/RMarkdown")
##
## # Set the Google Cloud Platform project id under which these queries will run.
## project <- "YOUR-PROJECT-ID"
## #####################################################################
## ------------------------------------------------------------------------
# By default this codelab runs upon the Illumina Platinum Genomes Variants.
# Change the table here if you wish to run these queries against a different table.
theTable <- "genomics-public-data.platinum_genomes.variants"
## ------------------------------------------------------------------------
DisplayAndDispatchQuery <- function(queryUri) {
# Read in the SQL from a file or URL.
querySql <- readChar(queryUri, nchars=1e6)
# Find and replace the table name placeholder with our table name.
querySql <- sub("@THE_TABLE", theTable, querySql, fixed=TRUE)
# Display the updated SQL.
cat(querySql)
# Dispatch the query to BigQuery for execution.
query_exec(querySql, project, use_legacy_sql = FALSE)
}
## ----comment=NA----------------------------------------------------------
result <- DisplayAndDispatchQuery("../sql/sample-variant-counts-for-brca1.sql")
## ----result, comment=NA--------------------------------------------------
head(result)
summary(result)
str(result)
## ----viz, fig.align="center", fig.width=10-------------------------------
ggplot(result, aes(x=call_set_name, y=variant_count)) +
geom_bar(stat="identity") + coord_flip() +
ggtitle("Count of Variants Per Sample")
## ----comment=NA----------------------------------------------------------
result <- DisplayAndDispatchQuery("../sql/variant-level-data-for-brca1.sql")
## ----echo=FALSE, message=FALSE, warning=FALSE, comment=NA, results="asis"----
print(xtable(head(result)), type="html", include.rownames=F)
## ----comment=NA----------------------------------------------------------
result <- DisplayAndDispatchQuery("../sql/sample-level-data-for-brca1.sql")
## ----echo=FALSE, message=FALSE, warning=FALSE, comment=NA, results="asis"----
print(xtable(head(result)), type="html", include.rownames=F)
## ----provenance, comment=NA----------------------------------------------
sessionInfo()