Skip to content

Commit

Permalink
final polish and changelog
Browse files Browse the repository at this point in the history
  • Loading branch information
sigven committed May 31, 2024
1 parent 9af4298 commit 8cf42c7
Show file tree
Hide file tree
Showing 12 changed files with 209 additions and 158 deletions.
8 changes: 7 additions & 1 deletion R/main.R
Original file line number Diff line number Diff line change
Expand Up @@ -437,12 +437,18 @@ write_cpsr_output <- function(report,
}

if(NROW(report$content$snv_indel$callset$variant$bm) > 0){

bm_excel <- report$content$snv_indel$callset$variant$bm |>
dplyr::mutate(
BM_MOLECULAR_PROFILE = pcgrr::strip_html(
.data$BM_MOLECULAR_PROFILE))

workbook <- workbook |>
openxlsx2::wb_add_worksheet(sheet = "BIOMARKER_EVIDENCE") |>
openxlsx2::wb_add_data_table(
sheet = "BIOMARKER_EVIDENCE",
x = dplyr::select(
report$content$snv_indel$callset$variant$bm,
bm_excel,
dplyr::any_of(
cpsr::col_format_output[['xlsx_biomarker']])),
start_row = 1,
Expand Down
314 changes: 168 additions & 146 deletions data-raw/data-raw.R
Original file line number Diff line number Diff line change
Expand Up @@ -181,15 +181,15 @@ col_format_output[['html_bm']] <-
'PROTEIN_CHANGE',
'CONSEQUENCE',
'BM_EVIDENCE_LEVEL',
'BM_MOLECULAR_PROFILE',
'BM_REFERENCE',
'GENOTYPE',
"DP_CONTROL",
'BM_CANCER_TYPE',
'BM_DISEASE_ONTOLOGY_ID',
'BM_PRIMARY_SITE',
'BM_CLINICAL_SIGNIFICANCE',
'BM_THERAPEUTIC_CONTEXT',
'BM_REFERENCE',
'BM_MOLECULAR_PROFILE_NAME',
'BM_RATING',
'BM_EVIDENCE_TYPE',
'BM_EVIDENCE_DIRECTION',
Expand Down Expand Up @@ -319,7 +319,7 @@ col_format_output[['xlsx_biomarker']] <-
"BM_THERAPEUTIC_CONTEXT",
"BM_CITATION",
"BM_RATING",
"BM_MOLECULAR_PROFILE_NAME",
"BM_MOLECULAR_PROFILE",
"BM_EVIDENCE_TYPE",
"BM_EVIDENCE_LEVEL",
"BM_EVIDENCE_DIRECTION",
Expand Down Expand Up @@ -373,150 +373,172 @@ acmg[["score2tier"]] <-

usethis::use_data(acmg, overwrite = T)
usethis::use_data(col_format_output, overwrite = T)

my_log4r_layout <- function(level, ...) {
paste0(format(Sys.time()), " - cpsr-report-generation - ",
level, " - ", ..., "\n", collapse = "")
}

log4r_logger <-
log4r::logger(
threshold = "INFO", appenders = log4r::console_appender(my_log4r_layout))

# this gets passed on to all the log4r_* functions inside the pkg
options("PCGRR_LOG4R_LOGGER" = log4r_logger)

panel_zero <- list()
for(build in c('grch37','grch38')){
ref_data <- pcgrr::load_reference_data(
pcgr_db_assembly_dir =
file.path(
"/Users/sigven/project_data/data/data__pcgrdb/dev/pcgrdb",
"20240527/data",
build)
)
panel_zero[[build]] <- ref_data$gene$cpg |>
dplyr::filter(CPG_SOURCE != "ACMG_SF") |>
dplyr::mutate(
PANEL_NAME = "CPSR superpanel of cancer predisposition genes",
PANEL_VERSION = "v2024_05") |>
dplyr::inner_join(
dplyr::select(ref_data$gene$gene_xref,
ENTREZGENE,
ENSEMBL_GENE_ID,
GENE_BIOTYPE,
GENENAME,
TSG,
TSG_SUPPORT,
ONCOGENE,
ONCOGENE_SUPPORT),
by = c("ENTREZGENE","ENSEMBL_GENE_ID")
) |>
dplyr::rename(
TUMOR_SUPPRESSOR = TSG,
TUMOR_SUPPRESSOR_SUPPORT = TSG_SUPPORT
) |>
dplyr::left_join(
dplyr::select(
dplyr::filter(
ref_data$variant$clinvar_gene_stats,
.data$CONFIDENCE == "min2goldstars"),
c("ENTREZGENE",
"N_TRUNC_PATH",
"N_NONTRUNC_PATH",
"N_MISSENSE_PATH",
"N_MISSENSE_BENIGN",
"BENIGN_MISSENSE_FRAC",
"PATH_TRUNC_FRAC")
), by = "ENTREZGENE"
) |>
dplyr::select(
dplyr::any_of(
c("ENTREZGENE",
"SYMBOL",
"GENENAME",
"GENE_BIOTYPE",
"ENSEMBL_GENE_ID",
"TUMOR_SUPPRESSOR",
"TUMOR_SUPPRESSOR_SUPPORT",
"ONCOGENE",
"ONCOGENE_SUPPORT",
"CPG_SOURCE",
"CPG_MOD",
"CPG_MOI",
"CPG_PHENOTYPES",
"CPG_CANCER_CUI",
"CPG_SYNDROME_CUI")
),
dplyr::everything()
) |>
dplyr::distinct()
}
#
workbook <- openxlsx2::wb_workbook() |>
openxlsx2::wb_add_worksheet(sheet = "CPSR_SUPERPANEL.GRCH37") |>
openxlsx2::wb_add_worksheet(sheet = "CPSR_SUPERPANEL.GRCH38") |>
openxlsx2::wb_add_data_table(
sheet = "CPSR_SUPERPANEL.GRCH37",
x = panel_zero[['grch37']],
start_row = 1,
start_col = 1,
col_names = TRUE,
na.strings = "NA",
table_style = "TableStyleMedium15") |>
openxlsx2::wb_add_data_table(
sheet = "CPSR_SUPERPANEL.GRCH38",
x = panel_zero[['grch38']],
start_row = 1,
start_col = 1,
col_names = TRUE,
na.strings = "NA",
table_style = "TableStyleMedium16")

openxlsx2::wb_save(
wb = workbook,
"pkgdown/assets/cpsr_superpanel_2024_05.xlsx",
overwrite = TRUE)

# my_log4r_layout <- function(level, ...) {
# paste0(format(Sys.time()), " - cpsr-report-generation - ",
# level, " - ", ..., "\n", collapse = "")
# }
#
panel_zero_display <- panel_zero$grch38 |>
dplyr::select(
c("ENTREZGENE",
"SYMBOL",
"ENTREZGENE",
"ENSEMBL_GENE_ID",
"GENENAME",
"CPG_PHENOTYPES",
"CPG_MOI",
"CPG_MOD",
"CPG_SOURCE",
)
) |>
dplyr::mutate(
CPG_SOURCE = stringr::str_replace_all(
CPG_SOURCE, "&", ", "
)) |>
dplyr::mutate(
CPG_SOURCE = stringr::str_replace_all(
CPG_SOURCE, "ACMG_SF", ""
)
) |>
dplyr::mutate(
GENE = paste0(
"<a href='https://www.ncbi.nlm.nih.gov/gene/",
.data$ENTREZGENE,
"' target='_blank'>",
.data$SYMBOL, "</a>"
)
) |>
dplyr::select(
c("GENE","ENTREZGENE","ENSEMBL_GENE_ID",
"CPG_MOD", "CPG_MOI", "GENENAME",
"CPG_SOURCE", "CPG_PHENOTYPES")
)
# log4r_logger <-
# log4r::logger(
# threshold = "INFO", appenders = log4r::console_appender(my_log4r_layout))
#
# # this gets passed on to all the log4r_* functions inside the pkg
# options("PCGRR_LOG4R_LOGGER" = log4r_logger)
#
# panel_zero <- list()
# for(build in c('grch37','grch38')){
# ref_data <- pcgrr::load_reference_data(
# pcgr_db_assembly_dir =
# file.path(
# "/Users/sigven/project_data/data/data__pcgrdb/dev/pcgrdb",
# "20240530/data",
# build),
# genome_assembly = build
# )
#
# set1 <- ref_data$gene$cpg |>
# dplyr::filter(CPG_SOURCE != "ACMG_SF") |>
# dplyr::filter(!is.na(ENSEMBL_GENE_ID)) |>
# dplyr::inner_join(
# dplyr::select(ref_data$gene$gene_xref,
# ENTREZGENE,
# ENSEMBL_GENE_ID,
# GENE_BIOTYPE,
# GENENAME,
# TSG,
# TSG_SUPPORT,
# ONCOGENE,
# ONCOGENE_SUPPORT),
# by = c("ENTREZGENE","ENSEMBL_GENE_ID")
# )
#
# set2 <- ref_data$gene$cpg |>
# dplyr::filter(CPG_SOURCE != "ACMG_SF") |>
# dplyr::filter(is.na(ENSEMBL_GENE_ID)) |>
# dplyr::select(-c("ENSEMBL_GENE_ID")) |>
# dplyr::inner_join(
# dplyr::select(ref_data$gene$gene_xref,
# ENTREZGENE,
# ENSEMBL_GENE_ID,
# GENE_BIOTYPE,
# GENENAME,
# TSG,
# TSG_SUPPORT,
# ONCOGENE,
# ONCOGENE_SUPPORT),
# by = c("ENTREZGENE")
# )
#
# panel_zero[[build]] <- dplyr::bind_rows(set1, set2) |>
# dplyr::mutate(
# PANEL_NAME = "CPSR superpanel of cancer predisposition genes",
# PANEL_VERSION = "v2024_05") |>
# dplyr::rename(
# TUMOR_SUPPRESSOR = TSG,
# TUMOR_SUPPRESSOR_SUPPORT = TSG_SUPPORT
# ) |>
# dplyr::left_join(
# dplyr::select(
# dplyr::filter(
# ref_data$variant$clinvar_gene_stats,
# .data$CONFIDENCE == "min2goldstars"),
# c("ENTREZGENE",
# "N_TRUNC_PATH",
# "N_NONTRUNC_PATH",
# "N_MISSENSE_PATH",
# "N_MISSENSE_BENIGN",
# "BENIGN_MISSENSE_FRAC",
# "PATH_TRUNC_FRAC")
# ), by = "ENTREZGENE"
# ) |>
# dplyr::select(
# dplyr::any_of(
# c("ENTREZGENE",
# "SYMBOL",
# "GENENAME",
# "GENE_BIOTYPE",
# "ENSEMBL_GENE_ID",
# "TUMOR_SUPPRESSOR",
# "TUMOR_SUPPRESSOR_SUPPORT",
# "ONCOGENE",
# "ONCOGENE_SUPPORT",
# "CPG_SOURCE",
# "CPG_MOD",
# "CPG_MOI",
# "CPG_PHENOTYPES",
# "CPG_CANCER_CUI",
# "CPG_SYNDROME_CUI")
# ),
# dplyr::everything()
# ) |>
# dplyr::distinct()
# }
# #
# workbook <- openxlsx2::wb_workbook() |>
# openxlsx2::wb_add_worksheet(sheet = "CPSR_SUPERPANEL.GRCH37") |>
# openxlsx2::wb_add_worksheet(sheet = "CPSR_SUPERPANEL.GRCH38") |>
# openxlsx2::wb_add_data_table(
# sheet = "CPSR_SUPERPANEL.GRCH37",
# x = panel_zero[['grch37']],
# start_row = 1,
# start_col = 1,
# col_names = TRUE,
# na.strings = "NA",
# table_style = "TableStyleMedium15") |>
# openxlsx2::wb_add_data_table(
# sheet = "CPSR_SUPERPANEL.GRCH38",
# x = panel_zero[['grch38']],
# start_row = 1,
# start_col = 1,
# col_names = TRUE,
# na.strings = "NA",
# table_style = "TableStyleMedium16")
#
# openxlsx2::wb_save(
# wb = workbook,
# "pkgdown/assets/cpsr_superpanel_2024_05.xlsx",
# overwrite = TRUE)
#
readr::write_tsv(
panel_zero_display, file = "inst/extdata/panel_zero.tsv.gz",
na = "NA", col_names = T,quote = "none"
)
# #
# panel_zero_display <- panel_zero$grch38 |>
# dplyr::select(
# c("ENTREZGENE",
# "SYMBOL",
# "ENTREZGENE",
# "ENSEMBL_GENE_ID",
# "GENENAME",
# "CPG_PHENOTYPES",
# "CPG_MOI",
# "CPG_MOD",
# "CPG_SOURCE",
# )
# ) |>
# dplyr::mutate(
# CPG_SOURCE = stringr::str_replace_all(
# CPG_SOURCE, "&", ", "
# )) |>
# dplyr::mutate(
# CPG_SOURCE = stringr::str_replace_all(
# CPG_SOURCE, "ACMG_SF", ""
# )
# ) |>
# dplyr::mutate(
# GENE = paste0(
# "<a href='https://www.ncbi.nlm.nih.gov/gene/",
# .data$ENTREZGENE,
# "' target='_blank'>",
# .data$SYMBOL, "</a>"
# )
# ) |>
# dplyr::select(
# c("GENE","ENTREZGENE","ENSEMBL_GENE_ID",
# "CPG_MOD", "CPG_MOI", "GENENAME",
# "CPG_SOURCE", "CPG_PHENOTYPES")
# )
# #
# readr::write_tsv(
# panel_zero_display, file = "inst/extdata/panel_zero.tsv.gz",
# na = "NA", col_names = T,quote = "none"
# )

Binary file modified data/acmg.rda
Binary file not shown.
Binary file modified data/col_format_output.rda
Binary file not shown.
Binary file modified inst/extdata/panel_zero.tsv.gz
Binary file not shown.
4 changes: 2 additions & 2 deletions inst/templates/cpsr_report.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -80,14 +80,14 @@ cat("</ul><br>")
#| eval: !expr as.logical(cps_report$content$snv_indel$eval)
```

<br><br>
<br>

```{r biomarkers_cpsr}
#| child: 'quarto/cpsr_biomarkers.qmd'
#| eval: !expr as.logical(cps_report$content$snv_indel$eval)
```

<br><br>
<br>

```{r secondary_findings_cpsr}
#| child: 'quarto/cpsr_secondary_findings.qmd'
Expand Down
Loading

0 comments on commit 8cf42c7

Please sign in to comment.