Skip to content

Commit

Permalink
Merge pull request #36 from OHDSI/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
jreps authored Nov 28, 2023
2 parents 01d29b1 + ea3c228 commit bbadcb4
Show file tree
Hide file tree
Showing 7 changed files with 127 additions and 113 deletions.
7 changes: 4 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: Characterization
Type: Package
Title: Characterizations of Cohorts
Version: 0.1.2
Version: 0.1.3
Date: 2023-09-03
Authors@R: c(
person("Jenna", "Reps", , "[email protected]", role = c("aut", "cre")),
Expand All @@ -16,7 +16,7 @@ Depends:
R (>= 4.0.0)
Imports:
Andromeda,
DatabaseConnector (>= 5.0.4),
DatabaseConnector (>= 6.3.1),
FeatureExtraction (>= 3.0.0),
SqlRender (>= 1.9.0),
ParallelLogger (>= 3.0.0),
Expand All @@ -39,7 +39,8 @@ Remotes:
ohdsi/FeatureExtraction,
ohdsi/Eunomia,
ohdsi/ResultModelManager,
ohdsi/ShinyAppBuilder
ohdsi/ShinyAppBuilder,
ohdsi/DatabaseConnector
NeedsCompilation: no
RoxygenNote: 7.2.3
Encoding: UTF-8
Expand Down
60 changes: 37 additions & 23 deletions R/Database.R
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ insertAndromedaToDatabase <- function(
data = data,
minCellCount = minCellCount,
minCellCountColumns = minCellCountColumns
)
)

DatabaseConnector::insertTable(
connection = connection,
Expand Down Expand Up @@ -255,11 +255,14 @@ createCharacterizationTables <- function(
#' function \code{connect} in the
#' \code{DatabaseConnector} package.
#' @param resultSchema The name of the database schema that the result tables will be created.
#' @param targetDialect The database management system being used
#' @param targetDialect DEPRECATED: derived from \code{connectionDetails}.
#' @param tablePrefix The table prefix to apply to the characterization result tables
#' @param filePrefix The prefix to apply to the files
#' @param tempEmulationSchema The temp schema used when the database management system is oracle
#' @param saveDirectory The directory to save the csv results
#' @param minMeanCovariateValue The minimum mean covariate value (i.e. the minimum proportion for
#' binary covariates) for a covariate to be included in covariate table.
#' Other covariates are removed to save space.
#'
#' @return
#' csv file per table into the saveDirectory
Expand All @@ -268,11 +271,12 @@ createCharacterizationTables <- function(
exportDatabaseToCsv <- function(
connectionDetails,
resultSchema,
targetDialect,
targetDialect = NULL,
tablePrefix = "c_",
filePrefix = NULL,
tempEmulationSchema = NULL,
saveDirectory
saveDirectory,
minMeanCovariateValue = 0.001
){

errorMessages <- checkmate::makeAssertCollection()
Expand All @@ -282,6 +286,9 @@ exportDatabaseToCsv <- function(
errorMessages = errorMessages
)
checkmate::reportAssertions(errorMessages)
if (!is.null(targetDialect)) {
warning("The targetDialect argument is deprecated")
}

if (is.null(filePrefix)) {
filePrefix = ''
Expand All @@ -303,37 +310,44 @@ exportDatabaseToCsv <- function(
)
}

# max number of rows extracted at a time
maxRowCount <- 1e6

# get the table names using the function in uploadToDatabase.R
tables <- getResultTables()

# extract result per table
for(table in tables){
sql <- "select * from @resultSchema.@appendtotable@tablename"
sql <- "select * from @resultSchema.@appendtotable@tablename;"
sql <- SqlRender::render(
sql = sql,
resultSchema = resultSchema,
appendtotable = tablePrefix,
tablename = table
)
sql <- SqlRender::translate(
sql = sql,
targetDialect = targetDialect,
tempEmulationSchema = tempEmulationSchema
)
result <- DatabaseConnector::querySql(
connection = connection,
sql = sql,
snakeCaseToCamelCase = F
)
result <- formatDouble(result)

# save the results as a csv
readr::write_csv(
x = result,
file = file.path(saveDirectory, paste0(tolower(filePrefix), table,'.csv'))
)
resultSet <- DatabaseConnector::dbSendQuery(connection, sql)
tryCatch({
first <- TRUE
while (first || !DatabaseConnector::dbHasCompleted(resultSet)) {
result <- DatabaseConnector::dbFetch(resultSet, n = maxRowCount)
if (table == "covariates" && minMeanCovariateValue > 0) {
result <- result %>%
dplyr::filter(.data$average_value >= minMeanCovariateValue)
}
result <- formatDouble(result)
# save the results as a csv
readr::write_csv(
x = result,
file = file.path(saveDirectory, paste0(tolower(filePrefix), table,'.csv')),
append = !first
)
first <- FALSE
}
},
finally = {
DatabaseConnector::dbClearResult(resultSet)
})
}

invisible(saveDirectory)
}

Expand Down
2 changes: 1 addition & 1 deletion R/RunCharacterization.R
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,7 @@ runCharacterizationAnalyses <- function(
tablePrefix = tablePrefix,
minCellCount = minCellCount,
minCellCountColumns = list(
c('numEvents'),
c('numCases'),
c('dechallengeAttempt'),
c('dechallengeFail', 'dechallengeSuccess'),
c('rechallengeAttempt'),
Expand Down
6 changes: 0 additions & 6 deletions inst/sql/sql_server/DropAggregateCovariate.sql
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,6 @@ DROP TABLE #target_with_outcome;
TRUNCATE TABLE #target_outcome_f;
DROP TABLE #target_outcome_f;

TRUNCATE TABLE #target_nooutcome;
DROP TABLE #target_nooutcome;

TRUNCATE TABLE #target_noout_f;
DROP TABLE #target_noout_f;

TRUNCATE TABLE #agg_cohorts;
DROP TABLE #agg_cohorts;

Expand Down
150 changes: 75 additions & 75 deletions inst/sql/sql_server/createTargetOutcomeCombinations.sql
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ drop table if exists #targets_agg;
select * into #targets_agg
from
(select *,
row_number() over(partition by subject_id, cohort_definition_id, cohort_start_date order by cohort_start_date asc) as rn
row_number() over(partition by subject_id, cohort_definition_id order by cohort_start_date asc) as rn
from @target_database_schema.@target_table
where cohort_definition_id in
(@target_ids)
Expand Down Expand Up @@ -69,16 +69,16 @@ CROSS JOIN

union

select distinct
t.cohort_definition_id as target_cohort_id,
o.cohort_definition_id as outcome_cohort_id,
'TnOc' as cohort_type
from
(select distinct cohort_definition_id from #targets_agg) as t
CROSS JOIN
(select distinct cohort_definition_id from #outcomes_agg) as o
--select distinct
--t.cohort_definition_id as target_cohort_id,
--o.cohort_definition_id as outcome_cohort_id,
--'TnOc' as cohort_type
--from
--(select distinct cohort_definition_id from #targets_agg) as t
--CROSS JOIN
--(select distinct cohort_definition_id from #outcomes_agg) as o

union
--union

select distinct
t.cohort_definition_id as target_cohort_id,
Expand All @@ -102,16 +102,16 @@ CROSS JOIN

union

select distinct
t.cohort_definition_id as target_cohort_id,
o.cohort_definition_id as outcome_cohort_id,
'TnfirstOc' as cohort_type
from
(select distinct cohort_definition_id from #targets_agg) as t
CROSS JOIN
(select distinct cohort_definition_id from #outcomes_agg) as o
--select distinct
--t.cohort_definition_id as target_cohort_id,
--o.cohort_definition_id as outcome_cohort_id,
--'TnfirstOc' as cohort_type
--from
--(select distinct cohort_definition_id from #targets_agg) as t
--CROSS JOIN
--(select distinct cohort_definition_id from #outcomes_agg) as o

union
--union

select distinct
t.cohort_definition_id as target_cohort_id,
Expand Down Expand Up @@ -191,39 +191,39 @@ o.cohort_start_date >= dateadd(day, @tar_start, t.@tar_start_anchor);


-- 2) get all the people without the outcome in TAR
drop table if exists #target_nooutcome;
select
t.subject_id,
t.cohort_start_date,
t.cohort_end_date,
t.cohort_definition_id as target_cohort_id,
o.cohort_definition_id as outcome_cohort_id
into #target_nooutcome
from #targets_agg t
CROSS JOIN
( select distinct cohort_definition_id from #outcomes_agg) o
left outer join #target_with_outcome two
on t.cohort_definition_id = two.target_cohort_id
and t.subject_id = two.subject_id
and o.cohort_definition_id = two.outcome_cohort_id
where two.subject_id IS NULL;

drop table if exists #target_noout_f;
select
t.subject_id,
t.cohort_start_date,
t.cohort_end_date,
t.cohort_definition_id as target_cohort_id,
o.cohort_definition_id as outcome_cohort_id
into #target_noout_f
from #targets_agg t
CROSS JOIN
( select distinct cohort_definition_id from #outcomes_agg) o
left outer join #target_outcome_f two
on t.cohort_definition_id = two.target_cohort_id
and t.subject_id = two.subject_id
and o.cohort_definition_id = two.outcome_cohort_id
where two.subject_id IS NULL;
--drop table if exists #target_nooutcome;
--select
--t.subject_id,
--t.cohort_start_date,
--t.cohort_end_date,
--t.cohort_definition_id as target_cohort_id,
--o.cohort_definition_id as outcome_cohort_id
--into #target_nooutcome
--from #targets_agg t
--CROSS JOIN
--( select distinct cohort_definition_id from #outcomes_agg) o
--left outer join #target_with_outcome two
--on t.cohort_definition_id = two.target_cohort_id
--and t.subject_id = two.subject_id
--and o.cohort_definition_id = two.outcome_cohort_id
--where two.subject_id IS NULL;

--drop table if exists #target_noout_f;
--select
--t.subject_id,
--t.cohort_start_date,
--t.cohort_end_date,
--t.cohort_definition_id as target_cohort_id,
--o.cohort_definition_id as outcome_cohort_id
--into #target_noout_f
--from #targets_agg t
--CROSS JOIN
--( select distinct cohort_definition_id from #outcomes_agg) o
--left outer join #target_outcome_f two
--on t.cohort_definition_id = two.target_cohort_id
--and t.subject_id = two.subject_id
--and o.cohort_definition_id = two.outcome_cohort_id
--where two.subject_id IS NULL;

-- Final: select into #agg_cohorts

Expand Down Expand Up @@ -294,33 +294,33 @@ union

-- T without O

select
tnoc.subject_id,
tnoc.cohort_start_date,
tnoc.cohort_end_date,
cd.cohort_definition_id
from #target_nooutcome tnoc
INNER JOIN #cohort_details cd
on cd.target_cohort_id = tnoc.target_cohort_id
and cd.outcome_cohort_id = tnoc.outcome_cohort_id
and cd.cohort_type = 'TnOc'
--select
--tnoc.subject_id,
--tnoc.cohort_start_date,
--tnoc.cohort_end_date,
--cd.cohort_definition_id
--from #target_nooutcome tnoc
--INNER JOIN #cohort_details cd
--on cd.target_cohort_id = tnoc.target_cohort_id
--and cd.outcome_cohort_id = tnoc.outcome_cohort_id
--and cd.cohort_type = 'TnOc'

union
--union

-- T without first O

select
tnoc.subject_id,
tnoc.cohort_start_date,
tnoc.cohort_end_date,
cd.cohort_definition_id
from #target_noout_f tnoc
INNER JOIN #cohort_details cd
on cd.target_cohort_id = tnoc.target_cohort_id
and cd.outcome_cohort_id = tnoc.outcome_cohort_id
and cd.cohort_type = 'TnfirstOc'

union
--select
--tnoc.subject_id,
--tnoc.cohort_start_date,
--tnoc.cohort_end_date,
--cd.cohort_definition_id
--from #target_noout_f tnoc
--INNER JOIN #cohort_details cd
--on cd.target_cohort_id = tnoc.target_cohort_id
--and cd.outcome_cohort_id = tnoc.outcome_cohort_id
--and cd.cohort_type = 'TnfirstOc'

--union

-- Ts and Os

Expand Down
11 changes: 8 additions & 3 deletions man/exportDatabaseToCsv.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit bbadcb4

Please sign in to comment.