Skip to content

Commit

Permalink
filtering the example rse_tx for the vignette
Browse files Browse the repository at this point in the history
  • Loading branch information
gpertea committed Sep 22, 2024
1 parent 81723a2 commit 5b1f5d7
Show file tree
Hide file tree
Showing 8 changed files with 60 additions and 61 deletions.
5 changes: 5 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,8 @@
^\.github$
^data-raw$
^codecov\.yml$
^\.Rhistory$
^\.Rdata$
^\.httr-oauth$
^\.DS_Store$
^dcs04_data$
6 changes: 3 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: qsvaR
Title: Generate Quality Surrogate Variable Analysis for Degradation Correction
Version: 1.9.0
Date: 2024-05-03
Version: 1.9.1
Date: 2024-09-16
Authors@R:
c(
person("Joshua", "Stolz", email = "[email protected]",
Expand All @@ -25,7 +25,7 @@ biocViews: Software, WorkflowStep, Normalization, BiologicalQuestion,
DifferentialExpression, Sequencing, Coverage
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.3.0
RoxygenNote: 7.3.2
Suggests:
BiocFileCache,
BiocStyle,
Expand Down
3 changes: 2 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
# Generated by roxygen2: do not edit by hand

export(DEqual)
export(check_tx_names)
export(getDegTx)
export(getPCs)
export(get_qsvs)
export(k_qsvs)
export(normalize_tx_names)
export(qSVA)
export(select_transcripts)
export(which_tx_names)
import(SummarizedExperiment)
import(ggplot2)
import(rlang)
Expand Down
23 changes: 12 additions & 11 deletions R/DEqual.R
Original file line number Diff line number Diff line change
Expand Up @@ -43,33 +43,34 @@ DEqual <- function(DE) {
## Check input
# stopifnot("t" %in% colnames(DE))
# stopifnot(!is.null(rownames(DE)))

# Check if input is a dataframe
if (!is.data.frame(DE)) {
stop("The input to DEqual is not a dataframe.", call. = FALSE)
if (!is.data.frame(DE)) {
stop("The input to DEqual is not a dataframe.", call. = FALSE)
}

# Check if 't' is in the column names of DE
if (!("t" %in% colnames(DE))) {
stop("'t' is not a column in 'DE'.", call. = FALSE)
}

# Check if DE has non-null row names
if (is.null(rownames(DE))) {
stop("Row names of 'DE' are NULL.", call. = FALSE)
}

## Locate common transcripts
deg_tstats = qsvaR::degradation_tstats
rownames(deg_tstats) = check_tx_names(rownames(DE),rownames(qsvaR::degradation_tstats),'rownames(DE)','qsvaR::degradation_tstats')
common = intersect(rownames(deg_tstats), rownames(DE))

whichTx <- which_tx_names(rownames(DE),rownames(deg_tstats))
#rownames(deg_tstats) = check_tx_names(rownames(DE),rownames(qsvaR::degradation_tstats),'rownames(DE)','qsvaR::degradation_tstats')
#common = intersect(rownames(deg_tstats), rownames(DE))
common = qsvaR::normalize_tx_names(rownames(DE)[whichTx])
stopifnot(length(common) > 0)

rownames(deg_tstats) <- qsvaR::normalize_tx_names(rownames(deg_tstats))
## Create dataframe with common transcripts
common_data <- data.frame(
degradation_t = deg_tstats$t[match(common, rownames(deg_tstats))],
DE_t = DE$t[match(common, rownames(DE))]
degradation_t = deg_tstats[common, ]$t,
DE_t = DE[whichTx, ]$t
)
p <- ggplot(common_data, aes(x = DE_t, y = degradation_t)) +
xlab("DE t-statistic") +
Expand Down
4 changes: 2 additions & 2 deletions R/getDegTx.R
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,8 @@ getDegTx <- function(rse_tx, type = c("cell_component", "standard", "top1500"),
stop(sprintf("'%s' is not in assayNames(rse_tx).", assayname), call. = FALSE)
}

# Check for validity and matching of tx names
wtx <- check_tx_names(rownames(rse_tx), sig_transcripts)
# Check for validity and matching of tx names and return the tx subset indexes in rse_tx
wtx <- which_tx_names(rownames(rse_tx), sig_transcripts)
if (length(wtx) < 10) {
stop("Not enough transcript names were found in the '",type, "' degradation model transcripts" )
}
Expand Down
47 changes: 34 additions & 13 deletions R/utils.R
Original file line number Diff line number Diff line change
@@ -1,32 +1,53 @@
#' Check validity of transcript vectors



#' Remove version number from Gencode/Ensembl transcript names
#'
#' This function is used to check if the tx1 and tx2 are GENCODE or ENSEMBL and print an error message if it's not and return a character vector of transcripts in tx2 that are in tx1.
#' This function removes the Gencode/ENSEMBL version from the transcript ID, while protecting _PAR_Y suffixes if present
#'
#' @param tx1 A `character()` vector of GENCODE or ENSEMBL transcripts.
#' @param tx2 A `character()` vector of GENCODE or ENSEMBL transcripts.
#' @param txnames A `character()` vector of GENCODE or ENSEMBL transcript IDs
#'
#' @param arg_name1 A `character(1)` vector of description of tx1
#' @param arg_name2 A `character(1)` vector of description of tx2
#'
#' @return A
#' `character()` vector of transcripts in `tx2` that are in `tx1`.
#' `character()` vector of transcript names without versioning
#'
#' @export
#'
#' @examples
#' ensIDs <- normalize_tx_names(rownames(rse_tx))

normalize_tx_names <- function(txnames) {
sub('(ENST\\d+)\\.\\d+(.*)$','\\1\\2', txnames, perl=TRUE)
}


#' Check validity of transcript vectors and return a vector matching indexes in tx1
#'
#' This function is used to check if tx1 and tx2 are GENCODE or ENSEMBL transcript IDs
#' and return an integer vector of tx1 transcript indexes that are in tx2.
#'
#' @param tx1 A `character()` vector of GENCODE or ENSEMBL transcript IDs.
#' @param tx2 A `character()` vector of GENCODE or ENSEMBL transcript IDs.
#'
#'
#' @return A
#' `integer()` vector of `tx1` transcript indexes in `tx2`.
#'
#' @export
#'
#' @examples
#' sig_tx <- select_transcripts("cell_component")
#' whichTx <- check_tx_names(rownames(rse_tx), sig_tx)
#' whichTx <- which_tx_names(rownames(rse_tx), sig_tx)

check_tx_names = function(txnames, sig_transcripts) {
# Functions for checking whether a vector of transcripts all match GENCODE
# or ENSEMBL naming conventions
which_tx_names = function(txnames, sig_transcripts) {
## Between releases 25 and 43, PAR genes and transcripts had the "_PAR_Y" suffix appended to their identifiers.
## Since release 44, these have their own IDs
if (!all(grepl("^ENST\\d+", txnames))) {
stop("The transcript names must be ENSEMBL or Gencode IDs (ENST...)" )
}
## normalize the transcript names
sig_tx <- sub('(ENST\\d+)\\.\\d+(.*)$','\\1\\2', sig_transcripts, perl=TRUE)
r_tx <- sub('(ENST\\d+)\\.\\d+(.*)$','\\1\\2', txnames, perl=TRUE)
r_tx <- normalize_tx_names(txnames)
sig_tx <- normalize_tx_names(sig_transcripts)
which(r_tx %in% sig_tx)
}

28 changes: 0 additions & 28 deletions man/check_tx_names.Rd

This file was deleted.

5 changes: 2 additions & 3 deletions man/getDegTx.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 5b1f5d7

Please sign in to comment.