title

author

date

output

Fig4BC_S4BC

N. Alcala

16/5/2023

html_document

keep_md
true

Code to produce Figs. 4B, C from Dayton et al. (Submitted)

load libraries

library(maftools)
library(tidyverse)
library(readxl)
library(ggpubr)

Create useful vectors

Colors of experiments

colors_org = c(LNET2="#aade87ff",LNET6="#5fd38dff",LNET13="#16502dff",LNET14="#6f917cff",
               LNET5="#e6a73cff",LNET10="#ff9955ff",LNET15="#ffd42aff", LNET16 = "#ff6600ff", LNET18= "#d0742fff", 
               LNET19="#2aff80ff", 
               LNET20 = "#f6e62bff", 
               LCNEC3="#ff8080ff",LCNEC4="#d35f5fff", LCNEC23 = "#ff5555ff", 
               LCNEC11="#ff5599ff",PANEC1="#8d5fd3ff",
               SINET7="#2ad4ffff",SINET8="#80b3ffff",SINET9="#5f8dd3ff",SINET12="#5fbcd3ff", SINET21="#0066ffff", SINET22="#2c5aa0ff")

List of samples:

sampleOrder = c("SINET7M",	"SINET7Mp2",
  "SINET8M",	"SINET8Mp2",
  "SINET9M",	"SINET9Mp1",
  "LNET2T","LNET2Tp12",
  "LNET6T",	"LNET6Tp1",
  "LNET5T",	"LNET5Tp4",
  "LNET10T",	"LNET10Tp4",
  "LCNEC3T","LCNEC3Tp17",
  "LCNEC4T",	"LCNEC4Tp7", "LCNEC4Tp24",	
  "PANEC1T","PANEC1Tp4","PANEC1Tp14")

sampleOrder.lp = c("SINET7M",	"SINET7Mp2",
  "LNET2T","LNET2Tp12",
  "LNET5T",	"LNET5Tp4")

sampleOrder.hp = c("SINET8M",	"SINET8Mp2",
  "SINET9M",	"SINET9Mp1",
  "LNET6T",	"LNET6Tp1",
  "LNET10T",	"LNET10Tp4",
  "LCNEC3T","LCNEC3Tp17",
  "LCNEC4T",	"LCNEC4Tp7", "LCNEC4Tp24",	
  "PANEC1T","PANEC1Tp4","PANEC1Tp14")

sampleOrderRNA.hp = c("SINET21M",	"SINET21Mp2",
                   "SINET22M",	"SINET22Mp2",
                   "SINET12M",	"SINET12Mp1.1","SINET12Mp1.3",
                   "LNET13T",	"LNET13Tp1",
                   "LNET14T",	"LNET14Tp1",
                "LNET15M",	"LNET15Mp2",
                "LNET16M",	"LNET16Mp1",
                "LCNEC11M","LCNEC11Mp3",
                "LCNEC23Mp3")

sampleOrderRNA.lp = c("LNET19T",	"LNET19Tp2",
                "LNET16T",	"LNET16Tp2",
                "LNET18Tp2",
                "LNET20M",	"LNET20Mp2")

ExpOrder = c("SINET7", "SINET8", "SINET9",
             "LNET2","LNET6",
             "LNET5","LNET10",
             "LCNEC3","LCNEC4","PANEC1")

# high purity only
ExpOrder.hp = c("SINET8", "SINET9",
                "LNET6","LNET10",
                "LCNEC3","LCNEC4","PANEC1")                

# same for RNAseq in high-purity samples 
ExpOrderRNA.hp= c("SINET21","SINET22","SINET12","LNET13","LNET14","LNET15","LNET16","LCNEC11","LCNEC23")

# mixed samples
ExpOrderRNA.lp= c("LNET19","LNET16","LNET18","LNET20")

Load data

Load list of damaging small variants from WGS from Table S4

smallmuts = read_xlsx("TableS4.xlsx",sheet = 1,skip=2)

Load TMBs from Table S4

TMB = read_xlsx("TableS4.xlsx",sheet = 2,skip=2) %>% mutate(TMB = as.numeric(TMB),Experiment=factor(Experiment,levels=ExpOrder), Sample = factor(Sample,levels=sampleOrder) )

Load list of driver genes from Table S4

drivers = read_xlsx("TableS4.xlsx",sheet = 3,skip=2) %>% mutate(Type=as.factor(Type))

Load list of damaging small variants from RNA-seq from Table S4

smallmutsRNA = read_xlsx("TableS4.xlsx",sheet = 4,skip=2)

Order driver genes found in our experiments

Order driver genes based on frequency in cohort, and label type as -1, 0, 1, respectively for SINET drivers, drivers in both SINET and LNEN, and drivers in LNEN

tabgenes_hp = smallmuts %>% filter(Hugo_Symbol%in%drivers$`Gene name`, Tumor_Sample_Barcode %in% sampleOrder.hp ) %>% 
  mutate(Exp=str_remove(Tumor_Sample_Barcode,"[MT]p*[0-9]*$")) %>% 
  group_by(Hugo_Symbol) %>% summarise(n=n(),nexp=length(unique(Exp)),Exp ) %>% left_join(drivers,by=c("Hugo_Symbol"="Gene name")) %>% 
  mutate(WhichType=(Type=="SINET")*(-1)+ (Type=="LNEN")*1) %>% 
  mutate(Exp=factor(Exp,levels =  ExpOrder.hp)) %>% 
  arrange(Exp,desc(nexp),desc(n))

## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
##   always returns an ungrouped data frame and adjust accordingly.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

## `summarise()` has grouped output by 'Hugo_Symbol'. You can override using the
## `.groups` argument.

Oncoplot of WGS mutations (top panels of Figures 4B and S4B)

High-purity samples (Figure 4B)

vc_col = maftools:::get_vcColors(websafe = FALSE)
vc_col["Missense_Mutation"] = "#1f78b4ff"
vc_col["Frame_Shift_Del"] = "#782121ff"
vc_col["Frame_Shift_Ins"] = "#2ca089ff"

oncoplot(maf = MAF(nonSyn = smallmuts,syn = smallmuts[-(1:nrow(smallmuts)),]),
         showTumorSampleBarcodes = T, barcode_mar = 6, drawRowBar = F,drawColBar = F,  showTitle = F,
         leftBarData = tabgenes_hp[!duplicated(tabgenes_hp$Hugo_Symbol),c(1,9)], 
         sampleOrder = sampleOrder.hp,
         genes = unique(tabgenes_hp$Hugo_Symbol), 
         keepGeneOrder = TRUE,
         removeNonMutated = FALSE,
         colors = vc_col,
         borderCol= "white",
         bgCol = "#fef8e4ff")

## -Processing clinical data
## --Missing clinical data

Mixed samples (Figure S4B)

tabgenes_lp = smallmuts %>% filter(Hugo_Symbol%in%drivers$`Gene name`, !Tumor_Sample_Barcode %in% sampleOrder.hp ) %>% 
  mutate(Exp=str_remove(Tumor_Sample_Barcode,"[MT]p*[0-9]*$")) %>% 
  group_by(Hugo_Symbol) %>% summarise(n=n(),nexp=length(unique(Exp)),Exp ) %>% left_join(drivers,by=c("Hugo_Symbol"="Gene name")) %>% 
  mutate(WhichType=(Type=="SINET")*(-1)+ (Type=="LNEN")*1) %>% 
  mutate(Exp=factor(Exp,levels =  ExpOrder.hp)) %>% 
  arrange(Exp,desc(nexp),desc(n))

oncoplot(maf = MAF(nonSyn = smallmuts,syn = smallmuts[-(1:nrow(smallmuts)),]), 
         showTumorSampleBarcodes = T, barcode_mar = 6, drawRowBar = F,drawColBar = F,  showTitle = F,
         leftBarData = tabgenes_lp[!duplicated(tabgenes_lp$Hugo_Symbol),c(1,9)], 
         sampleOrder = sampleOrder.lp,
         genes = unique(tabgenes_lp$Hugo_Symbol), 
         keepGeneOrder = TRUE,
         removeNonMutated = FALSE,
         colors = vc_col,
         borderCol= "white",
         bgCol = "#fef8e4ff")

## -Processing clinical data
## --Missing clinical data

Tumor mutational burden (bottom panel of Figures 4B and S4B)

High-purity samples (Fig. 4B, bottom)

TMB_hp = TMB %>% filter(Sample%in% sampleOrder.hp) %>% mutate(Sample = droplevels(Sample)) %>% arrange(Sample)

Fig4Bb <- ggplot( TMB_hp , aes(x=as.numeric(Sample),y=TMB,col=Experiment)) + 
  geom_path( mapping = aes(col=Experiment),cex=1.5) + 
  geom_point(shape=21,size=1.5,stroke=1.5,fill="white") + 
  geom_point(data = TMB_hp %>% filter(!PDTO) , shape=16,size=1.5,stroke=1.5) +
  scale_y_log10(lim=c(0.1,50)) + scale_color_manual(values = colors_org[ExpOrder]) + theme_classic() + grids(axis = "y") +
  theme(axis.ticks.x = element_blank(),axis.text.x = element_blank(),axis.line.x = element_blank()) +
  xlab("") + ylab("TMB (nonsyn mutations/Mb)")+ guides(col=F)

## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.

Fig4Bb

ggsave("/data/lungNENomics/work/organoids/figures/Fig4Bb_raw_TMB.pdf",Fig4Bb,height = 2,width = 3.5)

Mixed samples (Fig. S4B, bottom)

TMB_lp = TMB %>% filter(Sample%in% sampleOrder.lp) %>% mutate(Sample = droplevels(Sample)) %>% arrange(Sample)

FigS4Bb <- ggplot( TMB_lp , aes(x=as.numeric(Sample),y=TMB,col=Experiment)) + 
  geom_path( mapping = aes(col=Experiment),cex=1.5) + 
  geom_point(shape=21,size=1.5,stroke=1.5,fill="white") + 
  geom_point(data = TMB_lp %>% filter(!PDTO) , shape=16,size=1.5,stroke=1.5) +
  scale_y_log10(lim=c(0.1,50)) + scale_color_manual(values = colors_org[ExpOrder]) + theme_classic() + grids(axis = "y") +
  theme(axis.ticks.x = element_blank(),axis.text.x = element_blank(),axis.line.x = element_blank()) +
  xlab("") + ylab("TMB (nonsyn mutations/Mb)")+ guides(col=F)

## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.

FigS4Bb

ggsave("/data/lungNENomics/work/organoids/figures/Fig4SBb_raw_TMB.pdf",FigS4Bb,height = 2,width = 2.5)

Oncoplot of RNA-seq mutations (Figures 4C and S4C)

High-purity samples (Figure 4C)

sampleOrder_onco = c(sampleOrder[17:22],sampleOrder[1:16])
levels_normalRNA = c("SINET21","SINET22","SINET7","SINET8","SINET12","LNET6","LNET13","LNET14","LNET19",
                  "LNET5","LNET10","LNET15","LNET16","LNET18","LNET20",
                  "LCNEC3","LCNEC4","LCNEC11","LCNEC23","PANEC1")

tabgenesRNA_hp = smallmutsRNA %>% filter(Hugo_Symbol%in%drivers$`Gene name`, Tumor_Sample_Barcode %in% sampleOrderRNA.hp ) %>% 
  mutate(Exp=str_remove(Tumor_Sample_Barcode,"[MT]p*[0-9]*$")) %>% 
  group_by(Hugo_Symbol) %>% summarise(n=n(),nexp=length(unique(Exp)),Exp ) %>% left_join(drivers,by=c("Hugo_Symbol"="Gene name")) %>% 
  mutate(WhichType=(Type=="SINET")*(-1)+ (Type=="LNEN")*1) %>% 
  mutate(Exp=factor(Exp,levels =  ExpOrderRNA.hp)) %>% 
  arrange(Exp,desc(nexp),desc(n))

## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
##   always returns an ungrouped data frame and adjust accordingly.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

## `summarise()` has grouped output by 'Hugo_Symbol'. You can override using the
## `.groups` argument.

vc_col = maftools:::get_vcColors(websafe = FALSE)
vc_col["No_coverage"] = rgb(0.5,0.5,0.5)
vc_col["Missense_Mutation"] = "#1f78b4ff"
vc_col["Frame_Shift_Del"] = "#782121ff"
vc_col["Frame_Shift_Ins"] = "#2ca089ff"

oncoplot(maf = MAF(nonSyn = smallmutsRNA %>% filter(Tumor_Sample_Barcode %in% sampleOrderRNA.hp),syn = smallmutsRNA[-(1:nrow(smallmutsRNA)),]),
         additionalFeature = list(c("confidence","*"),c("confidence","**"),c("confidence","***")), additionalFeaturePch = c(1,4,8),
         showTumorSampleBarcodes = T, barcode_mar = 6, drawRowBar = F,drawColBar = F,  showTitle = F,
         leftBarData = tabgenesRNA_hp[!duplicated(tabgenesRNA_hp$Hugo_Symbol),c(1,9)], 
         sampleOrder = sampleOrderRNA.hp,
         genes = unique(tabgenesRNA_hp$Hugo_Symbol), 
         keepGeneOrder = TRUE,
         removeNonMutated = FALSE,
         colors = vc_col,
         borderCol= "white",
         bgCol = "#fef8e4ff")

## -Processing clinical data
## --Missing clinical data

## Warning in oncoplot(maf = MAF(nonSyn = smallmutsRNA %>%
## filter(Tumor_Sample_Barcode %in% : Provided colors for additional features are
## recycled

pdf("Fig4C_raw_17052023.pdf",h=7,w=6)
oncoplot(maf = MAF(nonSyn = smallmutsRNA %>% filter(Tumor_Sample_Barcode %in% sampleOrderRNA.hp),syn = smallmutsRNA[-(1:nrow(smallmutsRNA)),]),
         additionalFeature = list(c("confidence","*"),c("confidence","**"),c("confidence","***")), additionalFeaturePch = c(1,4,8),
         showTumorSampleBarcodes = T, barcode_mar = 6, drawRowBar = F,drawColBar = F,  showTitle = F,
         leftBarData = tabgenesRNA_hp[!duplicated(tabgenesRNA_hp$Hugo_Symbol),c(1,9)], 
         sampleOrder = sampleOrderRNA.hp,
         genes = unique(tabgenesRNA_hp$Hugo_Symbol), 
         keepGeneOrder = TRUE,
         removeNonMutated = FALSE,
         colors = vc_col,
         borderCol= "white",
         bgCol = "#fef8e4ff")

## -Processing clinical data
## --Missing clinical data

## Warning in oncoplot(maf = MAF(nonSyn = smallmutsRNA %>%
## filter(Tumor_Sample_Barcode %in% : Provided colors for additional features are
## recycled

dev.off()

## png 
##   2

Mixed samples (Figure S4C)

tabgenesRNA_lp = smallmutsRNA %>% filter(Hugo_Symbol%in%drivers$`Gene name`, Tumor_Sample_Barcode %in% sampleOrderRNA.lp ) %>% 
  mutate(Exp=str_remove(Tumor_Sample_Barcode,"[MT]p*[0-9]*$")) %>% 
  group_by(Hugo_Symbol) %>% summarise(n=n(),nexp=length(unique(Exp)),Exp ) %>% left_join(drivers,by=c("Hugo_Symbol"="Gene name")) %>% 
  mutate(WhichType=(Type=="SINET")*(-1)+ (Type=="LNEN")*1) %>% 
  mutate(Exp=factor(Exp,levels =  ExpOrderRNA.lp)) %>% 
  arrange(Exp,desc(nexp),desc(n))

## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
##   always returns an ungrouped data frame and adjust accordingly.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

## `summarise()` has grouped output by 'Hugo_Symbol'. You can override using the
## `.groups` argument.

oncoplot(maf = MAF(nonSyn = smallmutsRNA %>% filter(Tumor_Sample_Barcode %in% sampleOrderRNA.lp),syn = smallmutsRNA[-(1:nrow(smallmutsRNA)),]),
         additionalFeature = list(c("confidence","*"),c("confidence","**"),c("confidence","***")), additionalFeaturePch = c(1,4,8),
         showTumorSampleBarcodes = T, barcode_mar = 6, drawRowBar = F,drawColBar = F,  showTitle = F,
         leftBarData = tabgenesRNA_lp[!duplicated(tabgenesRNA_lp$Hugo_Symbol),c(1,9)], 
         sampleOrder = sampleOrderRNA.lp,
         genes = unique(tabgenesRNA_lp$Hugo_Symbol), 
         keepGeneOrder = TRUE,
         removeNonMutated = FALSE,
         colors = vc_col,
         borderCol= "white",
         bgCol = "#fef8e4ff")

## -Processing clinical data
## --Missing clinical data

## Warning in oncoplot(maf = MAF(nonSyn = smallmutsRNA %>%
## filter(Tumor_Sample_Barcode %in% : Provided colors for additional features are
## recycled

pdf("FigS4C_raw_17052023.pdf",h=4,w=4)
oncoplot(maf = MAF(nonSyn = smallmutsRNA %>% filter(Tumor_Sample_Barcode %in% sampleOrderRNA.lp),syn = smallmutsRNA[-(1:nrow(smallmutsRNA)),]),
         additionalFeature = list(c("confidence","*"),c("confidence","**"),c("confidence","***")), additionalFeaturePch = c(1,4,8),
         showTumorSampleBarcodes = T, barcode_mar = 6, drawRowBar = F,drawColBar = F,  showTitle = F,
         leftBarData = tabgenesRNA_lp[!duplicated(tabgenesRNA_lp$Hugo_Symbol),c(1,9)], 
         sampleOrder = sampleOrderRNA.lp,
         genes = unique(tabgenesRNA_lp$Hugo_Symbol), 
         keepGeneOrder = TRUE,
         removeNonMutated = FALSE,
         colors = vc_col,
         borderCol= "white",
         bgCol = "#fef8e4ff")

## -Processing clinical data
## --Missing clinical data

## Warning in oncoplot(maf = MAF(nonSyn = smallmutsRNA %>%
## filter(Tumor_Sample_Barcode %in% : Provided colors for additional features are
## recycled

dev.off()

## png 
##   2

Session Info

sessionInfo()

## R version 4.1.2 (2021-11-01)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Linux 7 (Core)
## 
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/libopenblasp-r0.3.3.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] ggpubr_0.4.0     readxl_1.4.2     forcats_0.5.1    stringr_1.5.0   
##  [5] dplyr_1.1.1      purrr_1.0.1      readr_2.1.4      tidyr_1.3.0     
##  [9] tibble_3.2.1     ggplot2_3.3.6    tidyverse_1.3.1  maftools_2.10.05
## 
## loaded via a namespace (and not attached):
##  [1] httr_1.4.5          sass_0.4.5          jsonlite_1.8.4     
##  [4] splines_4.1.2       carData_3.0-5       modelr_0.1.11      
##  [7] bslib_0.4.2         highr_0.10          cellranger_1.1.0   
## [10] yaml_2.3.7          pillar_1.9.0        backports_1.4.1    
## [13] lattice_0.20-45     glue_1.6.2          digest_0.6.31      
## [16] RColorBrewer_1.1-3  ggsignif_0.6.3      rvest_1.0.3        
## [19] colorspace_2.1-0    htmltools_0.5.5     Matrix_1.5-4       
## [22] pkgconfig_2.0.3     broom_1.0.4         haven_2.5.2        
## [25] scales_1.2.1        tzdb_0.3.0          timechange_0.2.0   
## [28] generics_0.1.3      farver_2.1.1        car_3.0-12         
## [31] cachem_1.0.7        withr_2.5.0         cli_3.6.1          
## [34] survival_3.5-5      magrittr_2.0.3.9000 crayon_1.5.2       
## [37] evaluate_0.15       fs_1.6.1            fansi_1.0.4        
## [40] rstatix_0.7.0       xml2_1.3.3          textshaping_0.3.6  
## [43] tools_4.1.2         data.table_1.14.2   hms_1.1.3          
## [46] lifecycle_1.0.3     munsell_0.5.0       reprex_2.0.2       
## [49] compiler_4.1.2      jquerylib_0.1.4     systemfonts_1.0.4  
## [52] rlang_1.1.0         grid_4.1.2          rstudioapi_0.14    
## [55] labeling_0.4.2      rmarkdown_2.21      gtable_0.3.3       
## [58] abind_1.4-5         DBI_1.1.3           R6_2.5.1           
## [61] lubridate_1.9.2     knitr_1.38          fastmap_1.1.1      
## [64] utf8_1.2.3          ragg_1.2.5          stringi_1.7.12     
## [67] vctrs_0.6.1         dbplyr_2.3.2        tidyselect_1.2.0   
## [70] xfun_0.38

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Fig4BC_S4BC.md

Fig4BC_S4BC.md

Code to produce Figs. 4B, C from Dayton et al. (Submitted)

load libraries

Create useful vectors

Load data

Order driver genes found in our experiments

Oncoplot of WGS mutations (top panels of Figures 4B and S4B)

High-purity samples (Figure 4B)

Mixed samples (Figure S4B)

Tumor mutational burden (bottom panel of Figures 4B and S4B)

High-purity samples (Fig. 4B, bottom)

Mixed samples (Fig. S4B, bottom)

Oncoplot of RNA-seq mutations (Figures 4C and S4C)

High-purity samples (Figure 4C)

Mixed samples (Figure S4C)

Session Info

Files

Fig4BC_S4BC.md

Latest commit

History

Fig4BC_S4BC.md

File metadata and controls

Code to produce Figs. 4B, C from Dayton et al. (Submitted)

load libraries

Create useful vectors

Load data

Order driver genes found in our experiments

Oncoplot of WGS mutations (top panels of Figures 4B and S4B)

High-purity samples (Figure 4B)

Mixed samples (Figure S4B)

Tumor mutational burden (bottom panel of Figures 4B and S4B)

High-purity samples (Fig. 4B, bottom)

Mixed samples (Fig. S4B, bottom)

Oncoplot of RNA-seq mutations (Figures 4C and S4C)

High-purity samples (Figure 4C)

Mixed samples (Figure S4C)

Session Info