diff --git a/mimic-iv/concepts_duckdb/demographics/icustay_hourly.sql b/mimic-iv/concepts_duckdb/demographics/icustay_hourly.sql index 94b7360d..7d8988be 100644 --- a/mimic-iv/concepts_duckdb/demographics/icustay_hourly.sql +++ b/mimic-iv/concepts_duckdb/demographics/icustay_hourly.sql @@ -19,4 +19,4 @@ SELECT TRY_CAST(hr_unnested AS BIGINT) AS hr, endtime + TRY_CAST(hr_unnested AS BIGINT) * INTERVAL '1' HOUR AS endtime FROM all_hours -CROSS JOIN UNNEST(all_hours.hrs) AS _t(hr_unnested) \ No newline at end of file +CROSS JOIN UNNEST(all_hours.hrs) AS _t0(hr_unnested) \ No newline at end of file diff --git a/mimic-iv/concepts_duckdb/medication/antibiotic.sql b/mimic-iv/concepts_duckdb/medication/antibiotic.sql index 5b85b720..7935522f 100644 --- a/mimic-iv/concepts_duckdb/medication/antibiotic.sql +++ b/mimic-iv/concepts_duckdb/medication/antibiotic.sql @@ -338,6 +338,8 @@ FROM mimiciv_hosp.prescriptions AS pr INNER JOIN abx ON pr.drug = abx.drug AND pr.route = abx.route LEFT JOIN mimiciv_icu.icustays AS ie - ON pr.hadm_id = ie.hadm_id AND pr.starttime >= ie.intime AND pr.starttime < ie.outtime + ON pr.hadm_id = ie.hadm_id + AND pr.starttime >= ie.intime + AND pr.starttime < ie.outtime WHERE abx.antibiotic = 1 \ No newline at end of file diff --git a/mimic-iv/concepts_duckdb/medication/vasoactive_agent.sql b/mimic-iv/concepts_duckdb/medication/vasoactive_agent.sql index 4e7e0c44..444f5659 100644 --- a/mimic-iv/concepts_duckdb/medication/vasoactive_agent.sql +++ b/mimic-iv/concepts_duckdb/medication/vasoactive_agent.sql @@ -90,18 +90,32 @@ SELECT mil.vaso_rate AS milrinone FROM tm_lag AS t LEFT JOIN mimiciv_derived.dobutamine AS dob - ON t.stay_id = dob.stay_id AND t.starttime >= dob.starttime AND t.endtime <= dob.endtime + ON t.stay_id = dob.stay_id + AND t.starttime >= dob.starttime + AND t.endtime <= dob.endtime LEFT JOIN mimiciv_derived.dopamine AS dop - ON t.stay_id = dop.stay_id AND t.starttime >= dop.starttime AND t.endtime <= dop.endtime + ON t.stay_id = dop.stay_id + AND t.starttime >= dop.starttime + AND t.endtime <= dop.endtime LEFT JOIN mimiciv_derived.epinephrine AS epi - ON t.stay_id = epi.stay_id AND t.starttime >= epi.starttime AND t.endtime <= epi.endtime + ON t.stay_id = epi.stay_id + AND t.starttime >= epi.starttime + AND t.endtime <= epi.endtime LEFT JOIN mimiciv_derived.norepinephrine AS nor - ON t.stay_id = nor.stay_id AND t.starttime >= nor.starttime AND t.endtime <= nor.endtime + ON t.stay_id = nor.stay_id + AND t.starttime >= nor.starttime + AND t.endtime <= nor.endtime LEFT JOIN mimiciv_derived.phenylephrine AS phe - ON t.stay_id = phe.stay_id AND t.starttime >= phe.starttime AND t.endtime <= phe.endtime + ON t.stay_id = phe.stay_id + AND t.starttime >= phe.starttime + AND t.endtime <= phe.endtime LEFT JOIN mimiciv_derived.vasopressin AS vas - ON t.stay_id = vas.stay_id AND t.starttime >= vas.starttime AND t.endtime <= vas.endtime + ON t.stay_id = vas.stay_id + AND t.starttime >= vas.starttime + AND t.endtime <= vas.endtime LEFT JOIN mimiciv_derived.milrinone AS mil - ON t.stay_id = mil.stay_id AND t.starttime >= mil.starttime AND t.endtime <= mil.endtime + ON t.stay_id = mil.stay_id + AND t.starttime >= mil.starttime + AND t.endtime <= mil.endtime WHERE NOT t.endtime IS NULL \ No newline at end of file diff --git a/mimic-iv/concepts_duckdb/organfailure/kdigo_uo.sql b/mimic-iv/concepts_duckdb/organfailure/kdigo_uo.sql index 7737fcb0..e59d0083 100644 --- a/mimic-iv/concepts_duckdb/organfailure/kdigo_uo.sql +++ b/mimic-iv/concepts_duckdb/organfailure/kdigo_uo.sql @@ -60,4 +60,6 @@ SELECT uo_tm_24hr FROM uo_stg2 AS ur LEFT JOIN mimiciv_derived.weight_durations AS wd - ON ur.stay_id = wd.stay_id AND ur.charttime >= wd.starttime AND ur.charttime < wd.endtime \ No newline at end of file + ON ur.stay_id = wd.stay_id + AND ur.charttime >= wd.starttime + AND ur.charttime < wd.endtime \ No newline at end of file diff --git a/mimic-iv/concepts_duckdb/score/apsiii.sql b/mimic-iv/concepts_duckdb/score/apsiii.sql index 6f56eefc..da48d277 100644 --- a/mimic-iv/concepts_duckdb/score/apsiii.sql +++ b/mimic-iv/concepts_duckdb/score/apsiii.sql @@ -8,7 +8,9 @@ WITH pa AS ( ROW_NUMBER() OVER (PARTITION BY ie.stay_id ORDER BY bg.po2 DESC) AS rn FROM mimiciv_derived.bg AS bg INNER JOIN mimiciv_icu.icustays AS ie - ON bg.hadm_id = ie.hadm_id AND bg.charttime >= ie.intime AND bg.charttime < ie.outtime + ON bg.hadm_id = ie.hadm_id + AND bg.charttime >= ie.intime + AND bg.charttime < ie.outtime LEFT JOIN mimiciv_derived.ventilation AS vd ON ie.stay_id = vd.stay_id AND bg.charttime >= vd.starttime @@ -27,7 +29,9 @@ WITH pa AS ( ROW_NUMBER() OVER (PARTITION BY ie.stay_id ORDER BY bg.aado2 DESC) AS rn FROM mimiciv_derived.bg AS bg INNER JOIN mimiciv_icu.icustays AS ie - ON bg.hadm_id = ie.hadm_id AND bg.charttime >= ie.intime AND bg.charttime < ie.outtime + ON bg.hadm_id = ie.hadm_id + AND bg.charttime >= ie.intime + AND bg.charttime < ie.outtime INNER JOIN mimiciv_derived.ventilation AS vd ON ie.stay_id = vd.stay_id AND bg.charttime >= vd.starttime @@ -62,7 +66,9 @@ WITH pa AS ( END AS acidbase_score FROM mimiciv_derived.bg AS bg INNER JOIN mimiciv_icu.icustays AS ie - ON bg.hadm_id = ie.hadm_id AND bg.charttime >= ie.intime AND bg.charttime < ie.outtime + ON bg.hadm_id = ie.hadm_id + AND bg.charttime >= ie.intime + AND bg.charttime < ie.outtime WHERE NOT ph IS NULL AND NOT pco2 IS NULL AND bg.specimen = 'ART.' ), acidbase_max AS ( @@ -608,7 +614,8 @@ WITH pa AS ( WHEN ABS(heart_rate_max - 75) = ABS(heart_rate_min - 75) AND smax.hr_score >= smin.hr_score THEN smax.hr_score - WHEN ABS(heart_rate_max - 75) = ABS(heart_rate_min - 75) AND smax.hr_score < smin.hr_score + WHEN ABS(heart_rate_max - 75) = ABS(heart_rate_min - 75) + AND smax.hr_score < smin.hr_score THEN smin.hr_score END AS hr_score, CASE diff --git a/mimic-iv/concepts_duckdb/score/lods.sql b/mimic-iv/concepts_duckdb/score/lods.sql index 4d218550..c3889ca5 100644 --- a/mimic-iv/concepts_duckdb/score/lods.sql +++ b/mimic-iv/concepts_duckdb/score/lods.sql @@ -35,14 +35,18 @@ WITH cpap AS ( CASE WHEN NOT cp.stay_id IS NULL THEN 1 ELSE 0 END AS cpap FROM mimiciv_derived.bg AS bg INNER JOIN mimiciv_icu.icustays AS ie - ON bg.hadm_id = ie.hadm_id AND bg.charttime >= ie.intime AND bg.charttime < ie.outtime + ON bg.hadm_id = ie.hadm_id + AND bg.charttime >= ie.intime + AND bg.charttime < ie.outtime LEFT JOIN mimiciv_derived.ventilation AS vd ON ie.stay_id = vd.stay_id AND bg.charttime >= vd.starttime AND bg.charttime <= vd.endtime AND vd.ventilation_status = 'InvasiveVent' LEFT JOIN cpap AS cp - ON ie.stay_id = cp.stay_id AND bg.charttime >= cp.starttime AND bg.charttime <= cp.endtime + ON ie.stay_id = cp.stay_id + AND bg.charttime >= cp.starttime + AND bg.charttime <= cp.endtime ), pafi2 AS ( SELECT stay_id, diff --git a/mimic-iv/concepts_duckdb/score/sapsii.sql b/mimic-iv/concepts_duckdb/score/sapsii.sql index 7bd5a9e8..307bc0b1 100644 --- a/mimic-iv/concepts_duckdb/score/sapsii.sql +++ b/mimic-iv/concepts_duckdb/score/sapsii.sql @@ -17,7 +17,9 @@ WITH co AS ( MAX(CASE WHEN REGEXP_MATCHES(LOWER(ce.value), '(cpap mask|bipap)') THEN 1 ELSE 0 END) AS cpap FROM co INNER JOIN mimiciv_icu.chartevents AS ce - ON co.stay_id = ce.stay_id AND ce.charttime > co.starttime AND ce.charttime <= co.endtime + ON co.stay_id = ce.stay_id + AND ce.charttime > co.starttime + AND ce.charttime <= co.endtime WHERE ce.itemid = 226732 AND REGEXP_MATCHES(LOWER(ce.value), '(cpap mask|bipap)') GROUP BY @@ -135,7 +137,9 @@ WITH co AS ( MIN(gcs.gcs) AS mingcs FROM co LEFT JOIN mimiciv_derived.gcs AS gcs - ON co.stay_id = gcs.stay_id AND co.starttime < gcs.charttime AND gcs.charttime <= co.endtime + ON co.stay_id = gcs.stay_id + AND co.starttime < gcs.charttime + AND gcs.charttime <= co.endtime GROUP BY co.stay_id ), vital AS ( @@ -160,7 +164,9 @@ WITH co AS ( SUM(uo.urineoutput) AS urineoutput FROM co LEFT JOIN mimiciv_derived.urine_output AS uo - ON co.stay_id = uo.stay_id AND co.starttime < uo.charttime AND co.endtime >= uo.charttime + ON co.stay_id = uo.stay_id + AND co.starttime < uo.charttime + AND co.endtime >= uo.charttime GROUP BY co.stay_id ), labs AS ( @@ -301,7 +307,10 @@ WITH co AS ( THEN 4 WHEN heartrate_min < 70 THEN 2 - WHEN heartrate_max >= 70 AND heartrate_max < 120 AND heartrate_min >= 70 AND heartrate_min < 120 + WHEN heartrate_max >= 70 + AND heartrate_max < 120 + AND heartrate_min >= 70 + AND heartrate_min < 120 THEN 0 END AS hr_score, CASE @@ -371,7 +380,10 @@ WITH co AS ( THEN 3 WHEN potassium_max >= 5.0 THEN 3 - WHEN potassium_max >= 3.0 AND potassium_max < 5.0 AND potassium_min >= 3.0 AND potassium_min < 5.0 + WHEN potassium_max >= 3.0 + AND potassium_max < 5.0 + AND potassium_min >= 3.0 + AND potassium_min < 5.0 THEN 0 END AS potassium_score, CASE diff --git a/mimic-iv/concepts_duckdb/score/sofa.sql b/mimic-iv/concepts_duckdb/score/sofa.sql index bc4f5725..102fad93 100644 --- a/mimic-iv/concepts_duckdb/score/sofa.sql +++ b/mimic-iv/concepts_duckdb/score/sofa.sql @@ -33,7 +33,9 @@ WITH co AS ( MIN(vs.mbp) AS meanbp_min FROM co LEFT JOIN mimiciv_derived.vitalsign AS vs - ON co.stay_id = vs.stay_id AND co.starttime < vs.charttime AND co.endtime >= vs.charttime + ON co.stay_id = vs.stay_id + AND co.starttime < vs.charttime + AND co.endtime >= vs.charttime GROUP BY co.stay_id, co.hr @@ -44,7 +46,9 @@ WITH co AS ( MIN(gcs.gcs) AS gcs_min FROM co LEFT JOIN mimiciv_derived.gcs AS gcs - ON co.stay_id = gcs.stay_id AND co.starttime < gcs.charttime AND co.endtime >= gcs.charttime + ON co.stay_id = gcs.stay_id + AND co.starttime < gcs.charttime + AND co.endtime >= gcs.charttime GROUP BY co.stay_id, co.hr @@ -55,7 +59,9 @@ WITH co AS ( MAX(enz.bilirubin_total) AS bilirubin_max FROM co LEFT JOIN mimiciv_derived.enzyme AS enz - ON co.hadm_id = enz.hadm_id AND co.starttime < enz.charttime AND co.endtime >= enz.charttime + ON co.hadm_id = enz.hadm_id + AND co.starttime < enz.charttime + AND co.endtime >= enz.charttime GROUP BY co.stay_id, co.hr @@ -79,7 +85,9 @@ WITH co AS ( MIN(cbc.platelet) AS platelet_min FROM co LEFT JOIN mimiciv_derived.complete_blood_count AS cbc - ON co.hadm_id = cbc.hadm_id AND co.starttime < cbc.charttime AND co.endtime >= cbc.charttime + ON co.hadm_id = cbc.hadm_id + AND co.starttime < cbc.charttime + AND co.endtime >= cbc.charttime GROUP BY co.stay_id, co.hr @@ -109,7 +117,9 @@ WITH co AS ( ) AS uo_24hr FROM co LEFT JOIN mimiciv_derived.urine_output_rate AS uo - ON co.stay_id = uo.stay_id AND co.starttime < uo.charttime AND co.endtime >= uo.charttime + ON co.stay_id = uo.stay_id + AND co.starttime < uo.charttime + AND co.endtime >= uo.charttime GROUP BY co.stay_id, co.hr @@ -123,13 +133,21 @@ WITH co AS ( MAX(dob.vaso_rate) AS rate_dobutamine FROM co LEFT JOIN mimiciv_derived.epinephrine AS epi - ON co.stay_id = epi.stay_id AND co.endtime > epi.starttime AND co.endtime <= epi.endtime + ON co.stay_id = epi.stay_id + AND co.endtime > epi.starttime + AND co.endtime <= epi.endtime LEFT JOIN mimiciv_derived.norepinephrine AS nor - ON co.stay_id = nor.stay_id AND co.endtime > nor.starttime AND co.endtime <= nor.endtime + ON co.stay_id = nor.stay_id + AND co.endtime > nor.starttime + AND co.endtime <= nor.endtime LEFT JOIN mimiciv_derived.dopamine AS dop - ON co.stay_id = dop.stay_id AND co.endtime > dop.starttime AND co.endtime <= dop.endtime + ON co.stay_id = dop.stay_id + AND co.endtime > dop.starttime + AND co.endtime <= dop.endtime LEFT JOIN mimiciv_derived.dobutamine AS dob - ON co.stay_id = dob.stay_id AND co.endtime > dob.starttime AND co.endtime <= dob.endtime + ON co.stay_id = dob.stay_id + AND co.endtime > dob.starttime + AND co.endtime <= dob.endtime WHERE NOT epi.stay_id IS NULL OR NOT nor.stay_id IS NULL diff --git a/mimic-iv/concepts_postgres/demographics/age.sql b/mimic-iv/concepts_postgres/demographics/age.sql index 4370bfe1..dd83aa6b 100644 --- a/mimic-iv/concepts_postgres/demographics/age.sql +++ b/mimic-iv/concepts_postgres/demographics/age.sql @@ -7,7 +7,7 @@ SELECT ad.admittime, pa.anchor_age, pa.anchor_year, /* calculate the age as anchor_age (60) plus difference between */ /* admit year and the anchor year. */ /* the noqa retains the extra long line so the */ /* convert to postgres bash script works */ - pa.anchor_age + EXTRACT(EPOCH FROM ad.admittime - TO_TIMESTAMP(TO_CHAR(pa.anchor_year, '0000') || TO_CHAR(1, '00') || TO_CHAR(1, '00') || TO_CHAR(0, '00') || TO_CHAR(0, '00') || TO_CHAR(0, '00'), 'yyyymmddHH24MISS')) / 31556908.8 AS age /* noqa: L016 */ + pa.anchor_age + EXTRACT(EPOCH FROM ad.admittime - MAKE_TIMESTAMP(pa.anchor_year, 1, 1, 0, 0, 0)) / 31556908.8 AS age /* noqa: L016 */ FROM mimiciv_hosp.admissions AS ad INNER JOIN mimiciv_hosp.patients AS pa ON ad.subject_id = pa.subject_id \ No newline at end of file diff --git a/mimic-iv/concepts_postgres/demographics/icustay_detail.sql b/mimic-iv/concepts_postgres/demographics/icustay_detail.sql index 378be879..342f1e0b 100644 --- a/mimic-iv/concepts_postgres/demographics/icustay_detail.sql +++ b/mimic-iv/concepts_postgres/demographics/icustay_detail.sql @@ -9,7 +9,7 @@ SELECT adm.admittime, adm.dischtime, EXTRACT(EPOCH FROM adm.dischtime - adm.admittime) / 86400.0 AS los_hospital, /* calculate the age as anchor_age (60) plus difference between */ /* admit year and the anchor year. */ /* the noqa retains the extra long line so the */ /* convert to postgres bash script works */ - pat.anchor_age + EXTRACT(EPOCH FROM adm.admittime - TO_TIMESTAMP(TO_CHAR(pat.anchor_year, '0000') || TO_CHAR(1, '00') || TO_CHAR(1, '00') || TO_CHAR(0, '00') || TO_CHAR(0, '00') || TO_CHAR(0, '00'), 'yyyymmddHH24MISS')) / 31556908.8 AS admission_age, /* noqa: L016 */ + pat.anchor_age + EXTRACT(EPOCH FROM adm.admittime - MAKE_TIMESTAMP(pat.anchor_year, 1, 1, 0, 0, 0)) / 31556908.8 AS admission_age, /* noqa: L016 */ adm.race, adm.hospital_expire_flag, DENSE_RANK() OVER (PARTITION BY adm.subject_id ORDER BY adm.admittime NULLS FIRST) AS hospstay_seq, diff --git a/mimic-iv/concepts_postgres/demographics/icustay_hourly.sql b/mimic-iv/concepts_postgres/demographics/icustay_hourly.sql index 0c553168..d9e911e0 100644 --- a/mimic-iv/concepts_postgres/demographics/icustay_hourly.sql +++ b/mimic-iv/concepts_postgres/demographics/icustay_hourly.sql @@ -9,7 +9,9 @@ WITH all_hours AS ( THEN it.intime_hr ELSE DATE_TRUNC('HOUR', it.intime_hr) + INTERVAL '1 HOUR' END AS endtime, /* create integers for each charttime in hours from admission */ /* so 0 is admission time, 1 is one hour after admission, etc, */ /* up to ICU disch */ /* we allow 24 hours before ICU admission (to grab labs before admit) */ - GENERATE_SERIES(-24, CAST(CEIL(EXTRACT(EPOCH FROM it.outtime_hr - it.intime_hr) / 3600.0) AS INT)) AS hrs /* noqa: L016 */ + ARRAY(SELECT + * + FROM GENERATE_SERIES(-24, CAST(CEIL(EXTRACT(EPOCH FROM it.outtime_hr - it.intime_hr) / 3600.0) AS INT))) AS hrs /* noqa: L016 */ FROM mimiciv_derived.icustay_times AS it ) SELECT @@ -17,4 +19,4 @@ SELECT CAST(hr_unnested AS BIGINT) AS hr, endtime + CAST(hr_unnested AS BIGINT) * INTERVAL '1 HOUR' AS endtime FROM all_hours -CROSS JOIN UNNEST(all_hours.hrs) AS _t(hr_unnested) \ No newline at end of file +CROSS JOIN UNNEST(all_hours.hrs) AS _t0(hr_unnested) \ No newline at end of file diff --git a/mimic-iv/concepts_postgres/firstday/first_day_urine_output.sql b/mimic-iv/concepts_postgres/firstday/first_day_urine_output.sql index 3fff4808..832d8ebd 100644 --- a/mimic-iv/concepts_postgres/firstday/first_day_urine_output.sql +++ b/mimic-iv/concepts_postgres/firstday/first_day_urine_output.sql @@ -9,7 +9,7 @@ FROM mimiciv_icu.icustays AS ie /* Join to the outputevents table to get urine output */ LEFT JOIN mimiciv_derived.urine_output AS uo ON ie.stay_id = uo.stay_id - AND uo.charttime >= ie.intime + AND /* ensure the data occurs during the first day */ uo.charttime >= ie.intime AND uo.charttime <= ie.intime + INTERVAL '1 DAY' GROUP BY ie.subject_id, diff --git a/mimic-iv/concepts_postgres/firstday/first_day_weight.sql b/mimic-iv/concepts_postgres/firstday/first_day_weight.sql index c41dbea6..9fabea8d 100644 --- a/mimic-iv/concepts_postgres/firstday/first_day_weight.sql +++ b/mimic-iv/concepts_postgres/firstday/first_day_weight.sql @@ -11,7 +11,8 @@ SELECT FROM mimiciv_icu.icustays AS ie /* admission weight */ LEFT JOIN mimiciv_derived.weight_durations AS ce - ON ie.stay_id = ce.stay_id AND ce.starttime <= ie.intime + INTERVAL '1 DAY' + ON ie.stay_id = ce.stay_id + AND /* we filter to weights documented during or before the 1st day */ ce.starttime <= ie.intime + INTERVAL '1 DAY' GROUP BY ie.subject_id, ie.stay_id \ No newline at end of file diff --git a/mimic-iv/concepts_postgres/measurement/bg.sql b/mimic-iv/concepts_postgres/measurement/bg.sql index 3964da60..1ab86d96 100644 --- a/mimic-iv/concepts_postgres/measurement/bg.sql +++ b/mimic-iv/concepts_postgres/measurement/bg.sql @@ -3,7 +3,7 @@ DROP TABLE IF EXISTS mimiciv_derived.bg; CREATE TABLE mimiciv_derived.bg AS /* The aim of this query is to pivot entries related to blood gases */ /* which were found in LABEVENTS */ WITH bg AS ( SELECT - MAX(subject_id) AS subject_id, + MAX(subject_id) AS subject_id, /* specimen_id only ever has 1 measurement for each itemid */ /* so, we may simply collapse rows using MAX() */ MAX(hadm_id) AS hadm_id, MAX(charttime) AS charttime, /* specimen_id *may* have different storetimes, so this */ /* is taking the latest */ MAX(storetime) AS storetime, @@ -57,7 +57,9 @@ WITH bg AS ( AVG(valuenum) AS spo2 FROM mimiciv_icu.chartevents WHERE - itemid = 220277 /* O2 saturation pulseoxymetry */ AND valuenum > 0 AND valuenum <= 100 + itemid = 220277 /* O2 saturation pulseoxymetry */ + AND valuenum > 0 + AND valuenum <= 100 GROUP BY subject_id, charttime @@ -78,7 +80,9 @@ WITH bg AS ( ) AS fio2_chartevents FROM mimiciv_icu.chartevents WHERE - itemid = 223835 /* Inspired O2 Fraction (FiO2) */ AND valuenum > 0 AND valuenum <= 100 + itemid = 223835 /* Inspired O2 Fraction (FiO2) */ + AND valuenum > 0 + AND valuenum <= 100 GROUP BY subject_id, charttime @@ -90,7 +94,7 @@ WITH bg AS ( FROM bg LEFT JOIN stg_spo2 AS s1 ON bg.subject_id = s1.subject_id - AND s1.charttime BETWEEN bg.charttime - INTERVAL '2 HOUR' AND bg.charttime + AND /* spo2 occurred at most 2 hours before this blood gas */ s1.charttime BETWEEN bg.charttime - INTERVAL '2 HOUR' AND bg.charttime WHERE NOT bg.po2 IS NULL ), stg3 AS ( @@ -101,7 +105,7 @@ WITH bg AS ( FROM stg2 AS bg LEFT JOIN stg_fio2 AS s2 ON bg.subject_id = s2.subject_id - AND s2.charttime >= bg.charttime - INTERVAL '4 HOUR' + AND /* fio2 occurred at most 4 hours before this blood gas */ s2.charttime >= bg.charttime - INTERVAL '4 HOUR' AND s2.charttime <= bg.charttime AND s2.fio2_chartevents > 0 /* only the row with the most recent SpO2 (if no SpO2 found lastRowSpO2 = 1) */ diff --git a/mimic-iv/concepts_postgres/measurement/blood_differential.sql b/mimic-iv/concepts_postgres/measurement/blood_differential.sql index 9369f703..73cd6ad9 100644 --- a/mimic-iv/concepts_postgres/measurement/blood_differential.sql +++ b/mimic-iv/concepts_postgres/measurement/blood_differential.sql @@ -56,7 +56,7 @@ WITH blood_diff AS ( MAX(CASE WHEN itemid = 51257 THEN valuenum ELSE NULL END) AS nrbc, /* utility flags which determine whether imputation is possible */ CASE WHEN MAX(CASE WHEN itemid IN (51300, 51301, 51755) THEN valuenum ELSE NULL END) > 0 - AND SUM( + AND /* and we have at least one percentage from the diff */ /* sometimes the entire diff is 0%, which looks like bad data */ SUM( CASE WHEN itemid IN (51146, 51200, 51244, 51245, 51254, 51256) THEN valuenum @@ -70,7 +70,7 @@ WITH blood_diff AS ( WHERE le.itemid IN (51146 /* basophils */, 52069 /* Absolute basophil count */, 51199 /* Eosinophil Count */, 51200 /* Eosinophils */, 52073 /* Absolute Eosinophil count */, 51244 /* Lymphocytes */, 51245 /* Lymphocytes, Percent */, 51133 /* Absolute Lymphocyte Count */, 52769 /* Absolute Lymphocyte Count */, 51253 /* Monocyte Count */, 51254 /* Monocytes */, 52074 /* Absolute Monocyte Count */, 51256 /* Neutrophils */, 52075 /* Absolute Neutrophil Count */, 51143 /* Atypical lymphocytes */, 51144 /* Bands (%) */, 51218 /* Granulocyte Count */, 52135 /* Immature granulocytes (%) */, 51251 /* Metamyelocytes */, 51257 /* Nucleated Red Cells */ /* wbc totals measured in K/uL */ /* 52220 (wbcp) is percentage */, 51300, 51301, 51755) /* below are point of care tests which are extremely infrequent */ /* and usually low quality */ /* 51697, -- Neutrophils (mmol/L) */ /* below itemid do not have data as of MIMIC-IV v1.0 */ /* 51536, -- Absolute Lymphocyte Count */ /* 51537, -- Absolute Neutrophil */ /* 51690, -- Lymphocytes */ /* 52151, -- NRBC */ AND NOT valuenum IS NULL - AND valuenum >= 0 + AND /* differential values cannot be negative */ valuenum >= 0 GROUP BY le.specimen_id ) diff --git a/mimic-iv/concepts_postgres/measurement/chemistry.sql b/mimic-iv/concepts_postgres/measurement/chemistry.sql index ce9b0e26..c9344d31 100644 --- a/mimic-iv/concepts_postgres/measurement/chemistry.sql +++ b/mimic-iv/concepts_postgres/measurement/chemistry.sql @@ -22,8 +22,8 @@ FROM mimiciv_hosp.labevents AS le WHERE le.itemid IN (50862 /* comment is: LABEL | CATEGORY | FLUID | NUMBER OF ROWS IN LABEVENTS */ /* ALBUMIN | CHEMISTRY | BLOOD | 146697 */, 50930 /* Globulin */, 50976 /* Total protein */ /* 52456, -- Anion gap, point of care test */, 50868 /* ANION GAP | CHEMISTRY | BLOOD | 769895 */, 50882 /* BICARBONATE | CHEMISTRY | BLOOD | 780733 */, 50893 /* Calcium */ /* 52502, Creatinine, point of care */, 50912 /* CREATININE | CHEMISTRY | BLOOD | 797476 */, 50902 /* CHLORIDE | CHEMISTRY | BLOOD | 795568 */, 50931 /* GLUCOSE | CHEMISTRY | BLOOD | 748981 */ /* 52525, Glucose, point of care */ /* 52566, -- Potassium, point of care */, 50971 /* POTASSIUM | CHEMISTRY | BLOOD | 845825 */ /* 52579, -- Sodium, point of care */, 50983 /* SODIUM | CHEMISTRY | BLOOD | 808489 */ /* 52603, Urea, point of care */, 51006 /* UREA NITROGEN | CHEMISTRY | BLOOD | 791925 */) AND NOT valuenum IS NULL - AND ( + AND /* lab values cannot be 0 and cannot be negative */ /* .. except anion gap. */ ( valuenum > 0 OR itemid = 50868 - ) /* lab values cannot be 0 and cannot be negative */ /* .. except anion gap. */ + ) GROUP BY le.specimen_id \ No newline at end of file diff --git a/mimic-iv/concepts_postgres/measurement/complete_blood_count.sql b/mimic-iv/concepts_postgres/measurement/complete_blood_count.sql index 5fa3db50..720b2bb1 100644 --- a/mimic-iv/concepts_postgres/measurement/complete_blood_count.sql +++ b/mimic-iv/concepts_postgres/measurement/complete_blood_count.sql @@ -20,6 +20,6 @@ FROM mimiciv_hosp.labevents AS le WHERE le.itemid IN (51221 /* hematocrit */, 51222 /* hemoglobin */, 51248 /* MCH */, 51249 /* MCHC */, 51250 /* MCV */, 51265 /* platelets */, 51279 /* RBC */, 51277 /* RDW */, 52159 /* RDW SD */, 51301 /* WBC */) AND NOT valuenum IS NULL - AND valuenum > 0 + AND /* lab values cannot be 0 and cannot be negative */ valuenum > 0 GROUP BY le.specimen_id \ No newline at end of file diff --git a/mimic-iv/concepts_postgres/measurement/enzyme.sql b/mimic-iv/concepts_postgres/measurement/enzyme.sql index 4d256d3f..57b591a1 100644 --- a/mimic-iv/concepts_postgres/measurement/enzyme.sql +++ b/mimic-iv/concepts_postgres/measurement/enzyme.sql @@ -21,6 +21,6 @@ FROM mimiciv_hosp.labevents AS le WHERE le.itemid IN (50861 /* Alanine transaminase (ALT) */, 50863 /* Alkaline phosphatase (ALP) */, 50878 /* Aspartate transaminase (AST) */, 50867 /* Amylase */, 50885 /* total bili */, 50884 /* indirect bili */, 50883 /* direct bili */, 50910 /* ck_cpk */, 50911 /* CK-MB */, 50927 /* Gamma Glutamyltransferase (GGT) */, 50954 /* ld_ldh */) AND NOT valuenum IS NULL - AND valuenum > 0 + AND /* lab values cannot be 0 and cannot be negative */ valuenum > 0 GROUP BY le.specimen_id \ No newline at end of file diff --git a/mimic-iv/concepts_postgres/measurement/height.sql b/mimic-iv/concepts_postgres/measurement/height.sql index 5902fdd1..ef1d5ee1 100644 --- a/mimic-iv/concepts_postgres/measurement/height.sql +++ b/mimic-iv/concepts_postgres/measurement/height.sql @@ -10,7 +10,7 @@ WITH ht_in AS ( c.valuenum AS height_orig FROM mimiciv_icu.chartevents AS c WHERE - NOT c.valuenum IS NULL /* Height (measured in inches) */ AND c.itemid = 226707 + NOT c.valuenum IS NULL AND /* Height (measured in inches) */ c.itemid = 226707 ), ht_cm AS ( SELECT c.subject_id, @@ -19,7 +19,7 @@ WITH ht_in AS ( ROUND(CAST(c.valuenum AS DECIMAL), 2) AS height FROM mimiciv_icu.chartevents AS c WHERE - NOT c.valuenum IS NULL /* Height cm */ AND c.itemid = 226730 + NOT c.valuenum IS NULL AND /* Height cm */ c.itemid = 226730 ), ht_stg0 AS ( SELECT COALESCE(h1.subject_id, h1.subject_id) AS subject_id, @@ -37,4 +37,4 @@ SELECT height FROM ht_stg0 WHERE - NOT height IS NULL /* filter out bad heights */ AND height > 120 AND height < 230 \ No newline at end of file + NOT height IS NULL AND /* filter out bad heights */ height > 120 AND height < 230 \ No newline at end of file diff --git a/mimic-iv/concepts_postgres/measurement/inflammation.sql b/mimic-iv/concepts_postgres/measurement/inflammation.sql index 30c2a0d6..4f7f0bc2 100644 --- a/mimic-iv/concepts_postgres/measurement/inflammation.sql +++ b/mimic-iv/concepts_postgres/measurement/inflammation.sql @@ -10,6 +10,6 @@ FROM mimiciv_hosp.labevents AS le WHERE le.itemid IN (50889 /* 51652 -- high sensitivity CRP */ /* crp */) AND NOT valuenum IS NULL - AND valuenum > 0 + AND /* lab values cannot be 0 and cannot be negative */ valuenum > 0 GROUP BY le.specimen_id \ No newline at end of file diff --git a/mimic-iv/concepts_postgres/medication/antibiotic.sql b/mimic-iv/concepts_postgres/medication/antibiotic.sql index 4c54327f..91d1b2d4 100644 --- a/mimic-iv/concepts_postgres/medication/antibiotic.sql +++ b/mimic-iv/concepts_postgres/medication/antibiotic.sql @@ -318,11 +318,11 @@ WITH abx AS ( FROM mimiciv_hosp.prescriptions /* excludes vials/syringe/normal saline, etc */ WHERE - NOT drug_type IN ('BASE') /* we exclude routes via the eye, ears, or topically */ - AND NOT route IN ('OU', 'OS', 'OD', 'AU', 'AS', 'AD', 'TP') + NOT drug_type IN ('BASE') + AND /* we exclude routes via the eye, ears, or topically */ NOT route IN ('OU', 'OS', 'OD', 'AU', 'AS', 'AD', 'TP') AND NOT LOWER(route) LIKE '%ear%' AND NOT LOWER(route) LIKE '%eye%' - AND NOT LOWER(drug) LIKE '%cream%' /* we exclude certain types of antibiotics: topical creams, */ /* gels, desens, etc */ + AND /* we exclude certain types of antibiotics: topical creams, */ /* gels, desens, etc */ NOT LOWER(drug) LIKE '%cream%' AND NOT LOWER(drug) LIKE '%desensitization%' AND NOT LOWER(drug) LIKE '%ophth oint%' AND NOT LOWER(drug) LIKE '%gel%' @@ -338,9 +338,12 @@ SELECT FROM mimiciv_hosp.prescriptions AS pr /* inner join to subselect to only antibiotic prescriptions */ INNER JOIN abx - ON pr.drug = abx.drug AND pr.route = abx.route + ON pr.drug = abx.drug + AND /* route is never NULL for antibiotics */ /* only ~4000 null rows in prescriptions total. */ pr.route = abx.route /* add in stay_id as we use this table for sepsis-3 */ LEFT JOIN mimiciv_icu.icustays AS ie - ON pr.hadm_id = ie.hadm_id AND pr.starttime >= ie.intime AND pr.starttime < ie.outtime + ON pr.hadm_id = ie.hadm_id + AND pr.starttime >= ie.intime + AND pr.starttime < ie.outtime WHERE abx.antibiotic = 1 \ No newline at end of file diff --git a/mimic-iv/concepts_postgres/medication/vasoactive_agent.sql b/mimic-iv/concepts_postgres/medication/vasoactive_agent.sql index dfc1bbce..063d04bb 100644 --- a/mimic-iv/concepts_postgres/medication/vasoactive_agent.sql +++ b/mimic-iv/concepts_postgres/medication/vasoactive_agent.sql @@ -94,19 +94,33 @@ SELECT /* isoproterenol is used in CCU/CVICU but not in metavision */ /* other drugs not included here but (rarely) used in the BIDMC: */ /* angiotensin II, methylene blue */ FROM tm_lag AS t LEFT JOIN mimiciv_derived.dobutamine AS dob - ON t.stay_id = dob.stay_id AND t.starttime >= dob.starttime AND t.endtime <= dob.endtime + ON t.stay_id = dob.stay_id + AND t.starttime >= dob.starttime + AND t.endtime <= dob.endtime LEFT JOIN mimiciv_derived.dopamine AS dop - ON t.stay_id = dop.stay_id AND t.starttime >= dop.starttime AND t.endtime <= dop.endtime + ON t.stay_id = dop.stay_id + AND t.starttime >= dop.starttime + AND t.endtime <= dop.endtime LEFT JOIN mimiciv_derived.epinephrine AS epi - ON t.stay_id = epi.stay_id AND t.starttime >= epi.starttime AND t.endtime <= epi.endtime + ON t.stay_id = epi.stay_id + AND t.starttime >= epi.starttime + AND t.endtime <= epi.endtime LEFT JOIN mimiciv_derived.norepinephrine AS nor - ON t.stay_id = nor.stay_id AND t.starttime >= nor.starttime AND t.endtime <= nor.endtime + ON t.stay_id = nor.stay_id + AND t.starttime >= nor.starttime + AND t.endtime <= nor.endtime LEFT JOIN mimiciv_derived.phenylephrine AS phe - ON t.stay_id = phe.stay_id AND t.starttime >= phe.starttime AND t.endtime <= phe.endtime + ON t.stay_id = phe.stay_id + AND t.starttime >= phe.starttime + AND t.endtime <= phe.endtime LEFT JOIN mimiciv_derived.vasopressin AS vas - ON t.stay_id = vas.stay_id AND t.starttime >= vas.starttime AND t.endtime <= vas.endtime + ON t.stay_id = vas.stay_id + AND t.starttime >= vas.starttime + AND t.endtime <= vas.endtime LEFT JOIN mimiciv_derived.milrinone AS mil - ON t.stay_id = mil.stay_id AND t.starttime >= mil.starttime AND t.endtime <= mil.endtime + ON t.stay_id = mil.stay_id + AND t.starttime >= mil.starttime + AND t.endtime <= mil.endtime /* remove the final row for each stay_id */ /* it will not have any infusions associated with it */ WHERE NOT t.endtime IS NULL \ No newline at end of file diff --git a/mimic-iv/concepts_postgres/organfailure/kdigo_stages.sql b/mimic-iv/concepts_postgres/organfailure/kdigo_stages.sql index 4f7947d0..b9b04f59 100644 --- a/mimic-iv/concepts_postgres/organfailure/kdigo_stages.sql +++ b/mimic-iv/concepts_postgres/organfailure/kdigo_stages.sql @@ -14,7 +14,7 @@ WITH cr_stg AS ( ) THEN 3 WHEN cr.creat >= 4 - AND ( + AND /* For patients reaching Stage 3 by SCr >4.0 mg/dl */ /* require that the patient first achieve ... */ /* an acute increase >= 0.3 within 48 hr */ /* *or* an increase of >= 1.5 times baseline */ ( cr.creat_low_past_48hr <= 3.7 OR cr.creat >= ( 1.5 * cr.creat_low_past_7day ) diff --git a/mimic-iv/concepts_postgres/organfailure/kdigo_uo.sql b/mimic-iv/concepts_postgres/organfailure/kdigo_uo.sql index 7114fdd8..4bc23334 100644 --- a/mimic-iv/concepts_postgres/organfailure/kdigo_uo.sql +++ b/mimic-iv/concepts_postgres/organfailure/kdigo_uo.sql @@ -69,4 +69,6 @@ SELECT uo_tm_24hr FROM uo_stg2 AS ur LEFT JOIN mimiciv_derived.weight_durations AS wd - ON ur.stay_id = wd.stay_id AND ur.charttime >= wd.starttime AND ur.charttime < wd.endtime \ No newline at end of file + ON ur.stay_id = wd.stay_id + AND ur.charttime >= wd.starttime + AND ur.charttime < wd.endtime \ No newline at end of file diff --git a/mimic-iv/concepts_postgres/score/apsiii.sql b/mimic-iv/concepts_postgres/score/apsiii.sql index 48412103..8b0dad09 100644 --- a/mimic-iv/concepts_postgres/score/apsiii.sql +++ b/mimic-iv/concepts_postgres/score/apsiii.sql @@ -9,7 +9,9 @@ WITH pa AS ( ROW_NUMBER() OVER (PARTITION BY ie.stay_id ORDER BY bg.po2 DESC NULLS LAST) AS rn FROM mimiciv_derived.bg AS bg INNER JOIN mimiciv_icu.icustays AS ie - ON bg.hadm_id = ie.hadm_id AND bg.charttime >= ie.intime AND bg.charttime < ie.outtime + ON bg.hadm_id = ie.hadm_id + AND bg.charttime >= ie.intime + AND bg.charttime < ie.outtime LEFT JOIN mimiciv_derived.ventilation AS vd ON ie.stay_id = vd.stay_id AND bg.charttime >= vd.starttime @@ -17,7 +19,7 @@ WITH pa AS ( AND vd.ventilation_status = 'InvasiveVent' WHERE vd.stay_id IS NULL /* patient is *not* ventilated */ - AND COALESCE(fio2, fio2_chartevents, 21) < 50 + AND /* and fio2 < 50, or if no fio2, assume room air */ COALESCE(fio2, fio2_chartevents, 21) < 50 AND NOT bg.po2 IS NULL AND bg.specimen = 'ART.' ), aa AS ( @@ -30,7 +32,9 @@ WITH pa AS ( /* row number indicating the highest AaDO2 */ FROM mimiciv_derived.bg AS bg INNER JOIN mimiciv_icu.icustays AS ie - ON bg.hadm_id = ie.hadm_id AND bg.charttime >= ie.intime AND bg.charttime < ie.outtime + ON bg.hadm_id = ie.hadm_id + AND bg.charttime >= ie.intime + AND bg.charttime < ie.outtime INNER JOIN mimiciv_derived.ventilation AS vd ON ie.stay_id = vd.stay_id AND bg.charttime >= vd.starttime @@ -65,7 +69,9 @@ WITH pa AS ( END AS acidbase_score FROM mimiciv_derived.bg AS bg INNER JOIN mimiciv_icu.icustays AS ie - ON bg.hadm_id = ie.hadm_id AND bg.charttime >= ie.intime AND bg.charttime < ie.outtime + ON bg.hadm_id = ie.hadm_id + AND bg.charttime >= ie.intime + AND bg.charttime < ie.outtime WHERE NOT ph IS NULL AND NOT pco2 IS NULL AND bg.specimen = 'ART.' ), acidbase_max AS ( @@ -80,7 +86,9 @@ WITH pa AS ( SELECT ie.stay_id, CASE - WHEN labs.creatinine_max >= 1.5 AND uo.urineoutput < 410 AND icd.ckd = 0 + WHEN labs.creatinine_max >= 1.5 + AND uo.urineoutput < 410 + AND /* acute renal failure is only coded if the patient */ /* is not on chronic dialysis */ /* we use ICD-9 coding of ESRD as a proxy for chronic dialysis */ icd.ckd = 0 THEN 1 ELSE 0 END AS arf @@ -613,7 +621,8 @@ WITH pa AS ( WHEN ABS(heart_rate_max - 75) = ABS(heart_rate_min - 75) AND smax.hr_score >= smin.hr_score THEN smax.hr_score - WHEN ABS(heart_rate_max - 75) = ABS(heart_rate_min - 75) AND smax.hr_score < smin.hr_score + WHEN ABS(heart_rate_max - 75) = ABS(heart_rate_min - 75) + AND smax.hr_score < smin.hr_score THEN smin.hr_score END AS hr_score, CASE diff --git a/mimic-iv/concepts_postgres/score/lods.sql b/mimic-iv/concepts_postgres/score/lods.sql index a38a16ff..d333cdab 100644 --- a/mimic-iv/concepts_postgres/score/lods.sql +++ b/mimic-iv/concepts_postgres/score/lods.sql @@ -37,14 +37,18 @@ WITH cpap AS ( CASE WHEN NOT cp.stay_id IS NULL THEN 1 ELSE 0 END AS cpap FROM mimiciv_derived.bg AS bg INNER JOIN mimiciv_icu.icustays AS ie - ON bg.hadm_id = ie.hadm_id AND bg.charttime >= ie.intime AND bg.charttime < ie.outtime + ON bg.hadm_id = ie.hadm_id + AND bg.charttime >= ie.intime + AND bg.charttime < ie.outtime LEFT JOIN mimiciv_derived.ventilation AS vd ON ie.stay_id = vd.stay_id AND bg.charttime >= vd.starttime AND bg.charttime <= vd.endtime AND vd.ventilation_status = 'InvasiveVent' LEFT JOIN cpap AS cp - ON ie.stay_id = cp.stay_id AND bg.charttime >= cp.starttime AND bg.charttime <= cp.endtime + ON ie.stay_id = cp.stay_id + AND bg.charttime >= cp.starttime + AND bg.charttime <= cp.endtime ), pafi2 AS ( /* get the minimum PaO2/FiO2 ratio *only for ventilated/cpap patients* */ SELECT diff --git a/mimic-iv/concepts_postgres/score/sapsii.sql b/mimic-iv/concepts_postgres/score/sapsii.sql index aa19ee06..92fc61b9 100644 --- a/mimic-iv/concepts_postgres/score/sapsii.sql +++ b/mimic-iv/concepts_postgres/score/sapsii.sql @@ -18,7 +18,9 @@ WITH co AS ( MAX(CASE WHEN LOWER(ce.value) ~ '(cpap mask|bipap)' THEN 1 ELSE 0 END) AS cpap FROM co INNER JOIN mimiciv_icu.chartevents AS ce - ON co.stay_id = ce.stay_id AND ce.charttime > co.starttime AND ce.charttime <= co.endtime + ON co.stay_id = ce.stay_id + AND ce.charttime > co.starttime + AND ce.charttime <= co.endtime WHERE ce.itemid = 226732 AND LOWER(ce.value) ~ '(cpap mask|bipap)' GROUP BY @@ -138,7 +140,9 @@ WITH co AS ( MIN(gcs.gcs) AS mingcs FROM co LEFT JOIN mimiciv_derived.gcs AS gcs - ON co.stay_id = gcs.stay_id AND co.starttime < gcs.charttime AND gcs.charttime <= co.endtime + ON co.stay_id = gcs.stay_id + AND co.starttime < gcs.charttime + AND gcs.charttime <= co.endtime GROUP BY co.stay_id ), vital AS ( @@ -163,7 +167,9 @@ WITH co AS ( SUM(uo.urineoutput) AS urineoutput FROM co LEFT JOIN mimiciv_derived.urine_output AS uo - ON co.stay_id = uo.stay_id AND co.starttime < uo.charttime AND co.endtime >= uo.charttime + ON co.stay_id = uo.stay_id + AND co.starttime < uo.charttime + AND co.endtime >= uo.charttime GROUP BY co.stay_id ), labs AS ( @@ -306,7 +312,10 @@ WITH co AS ( THEN 4 WHEN heartrate_min < 70 THEN 2 - WHEN heartrate_max >= 70 AND heartrate_max < 120 AND heartrate_min >= 70 AND heartrate_min < 120 + WHEN heartrate_max >= 70 + AND heartrate_max < 120 + AND heartrate_min >= 70 + AND heartrate_min < 120 THEN 0 END AS hr_score, CASE @@ -376,7 +385,10 @@ WITH co AS ( THEN 3 WHEN potassium_max >= 5.0 THEN 3 - WHEN potassium_max >= 3.0 AND potassium_max < 5.0 AND potassium_min >= 3.0 AND potassium_min < 5.0 + WHEN potassium_max >= 3.0 + AND potassium_max < 5.0 + AND potassium_min >= 3.0 + AND potassium_min < 5.0 THEN 0 END AS potassium_score, CASE diff --git a/mimic-iv/concepts_postgres/score/sofa.sql b/mimic-iv/concepts_postgres/score/sofa.sql index 3bcd891b..e680aa22 100644 --- a/mimic-iv/concepts_postgres/score/sofa.sql +++ b/mimic-iv/concepts_postgres/score/sofa.sql @@ -35,7 +35,9 @@ WITH co AS ( MIN(vs.mbp) AS meanbp_min FROM co LEFT JOIN mimiciv_derived.vitalsign AS vs - ON co.stay_id = vs.stay_id AND co.starttime < vs.charttime AND co.endtime >= vs.charttime + ON co.stay_id = vs.stay_id + AND co.starttime < vs.charttime + AND co.endtime >= vs.charttime GROUP BY co.stay_id, co.hr @@ -46,7 +48,9 @@ WITH co AS ( MIN(gcs.gcs) AS gcs_min FROM co LEFT JOIN mimiciv_derived.gcs AS gcs - ON co.stay_id = gcs.stay_id AND co.starttime < gcs.charttime AND co.endtime >= gcs.charttime + ON co.stay_id = gcs.stay_id + AND co.starttime < gcs.charttime + AND co.endtime >= gcs.charttime GROUP BY co.stay_id, co.hr @@ -57,7 +61,9 @@ WITH co AS ( MAX(enz.bilirubin_total) AS bilirubin_max FROM co LEFT JOIN mimiciv_derived.enzyme AS enz - ON co.hadm_id = enz.hadm_id AND co.starttime < enz.charttime AND co.endtime >= enz.charttime + ON co.hadm_id = enz.hadm_id + AND co.starttime < enz.charttime + AND co.endtime >= enz.charttime GROUP BY co.stay_id, co.hr @@ -81,7 +87,9 @@ WITH co AS ( MIN(cbc.platelet) AS platelet_min FROM co LEFT JOIN mimiciv_derived.complete_blood_count AS cbc - ON co.hadm_id = cbc.hadm_id AND co.starttime < cbc.charttime AND co.endtime >= cbc.charttime + ON co.hadm_id = cbc.hadm_id + AND co.starttime < cbc.charttime + AND co.endtime >= cbc.charttime GROUP BY co.stay_id, co.hr @@ -112,7 +120,9 @@ WITH co AS ( ) AS uo_24hr FROM co LEFT JOIN mimiciv_derived.urine_output_rate AS uo - ON co.stay_id = uo.stay_id AND co.starttime < uo.charttime AND co.endtime >= uo.charttime + ON co.stay_id = uo.stay_id + AND co.starttime < uo.charttime + AND co.endtime >= uo.charttime GROUP BY co.stay_id, co.hr @@ -126,13 +136,21 @@ WITH co AS ( MAX(dob.vaso_rate) AS rate_dobutamine FROM co LEFT JOIN mimiciv_derived.epinephrine AS epi - ON co.stay_id = epi.stay_id AND co.endtime > epi.starttime AND co.endtime <= epi.endtime + ON co.stay_id = epi.stay_id + AND co.endtime > epi.starttime + AND co.endtime <= epi.endtime LEFT JOIN mimiciv_derived.norepinephrine AS nor - ON co.stay_id = nor.stay_id AND co.endtime > nor.starttime AND co.endtime <= nor.endtime + ON co.stay_id = nor.stay_id + AND co.endtime > nor.starttime + AND co.endtime <= nor.endtime LEFT JOIN mimiciv_derived.dopamine AS dop - ON co.stay_id = dop.stay_id AND co.endtime > dop.starttime AND co.endtime <= dop.endtime + ON co.stay_id = dop.stay_id + AND co.endtime > dop.starttime + AND co.endtime <= dop.endtime LEFT JOIN mimiciv_derived.dobutamine AS dob - ON co.stay_id = dob.stay_id AND co.endtime > dob.starttime AND co.endtime <= dob.endtime + ON co.stay_id = dob.stay_id + AND co.endtime > dob.starttime + AND co.endtime <= dob.endtime WHERE NOT epi.stay_id IS NULL OR NOT nor.stay_id IS NULL diff --git a/pyproject.toml b/pyproject.toml index 8c887778..c6ca5529 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,7 +18,7 @@ classifiers = [ "Operating System :: OS Independent", ] dependencies = [ - "sqlglot", + "sqlglot==25.13.0", "pandas", "numpy", ] diff --git a/src/mimic_utils/sqlglot/__init__.py b/src/mimic_utils/sqlglot_dialects/__init__.py similarity index 100% rename from src/mimic_utils/sqlglot/__init__.py rename to src/mimic_utils/sqlglot_dialects/__init__.py diff --git a/src/mimic_utils/sqlglot/bigquery.py b/src/mimic_utils/sqlglot_dialects/bigquery.py similarity index 100% rename from src/mimic_utils/sqlglot/bigquery.py rename to src/mimic_utils/sqlglot_dialects/bigquery.py diff --git a/src/mimic_utils/sqlglot/duckdb.py b/src/mimic_utils/sqlglot_dialects/duckdb.py similarity index 100% rename from src/mimic_utils/sqlglot/duckdb.py rename to src/mimic_utils/sqlglot_dialects/duckdb.py diff --git a/src/mimic_utils/sqlglot/postgres.py b/src/mimic_utils/sqlglot_dialects/postgres.py similarity index 94% rename from src/mimic_utils/sqlglot/postgres.py rename to src/mimic_utils/sqlglot_dialects/postgres.py index 8334f5ad..c142e3e0 100644 --- a/src/mimic_utils/sqlglot/postgres.py +++ b/src/mimic_utils/sqlglot_dialects/postgres.py @@ -1,6 +1,7 @@ import sqlglot import sqlglot.dialects.postgres -from sqlglot import Expression, exp, select +from sqlglot import Expression, exp +from sqlglot.expressions import array, select # DATETIME: allow passing either a DATE directly, or multiple arguments # there isn't a class for the Datetime function, so we have to create it ourself, @@ -94,18 +95,20 @@ def datetime_sql(self: Expression, expression: Expression): # https://cloud.google.com/bigquery/docs/reference/standard-sql/array_functions#generate_array # https://www.postgresql.org/docs/current/functions-srf.html def generate_array_sql(self: Expression, expression: Expression): - # first create a select statement which selects from generate_series - select_statement = select("*").from_( + # BigQuery's generate array returns an array data type, + # but PostgreSQL generate series returns a set of rows, + # so we wrap the output of generate series in an array + # constructor. + select_statement = array(select("*").from_( GenerateSeries( expressions=[ expression.expressions[0], expression.expressions[1], ], ) - ) + )) - # now convert the select statement to an array - return f"ARRAY({self.sql(select_statement)})" + return self.generate(select_statement) sqlglot.dialects.postgres.Postgres.Generator.TRANSFORMS[GenerateArray] = generate_array_sql # we need to prevent the wrapping of the table alias in brackets for UNNEST diff --git a/src/mimic_utils/transpile.py b/src/mimic_utils/transpile.py index 365e30a0..2d52a452 100644 --- a/src/mimic_utils/transpile.py +++ b/src/mimic_utils/transpile.py @@ -6,14 +6,14 @@ import sqlglot.dialects.bigquery import sqlglot.dialects.duckdb import sqlglot.dialects.postgres -from sqlglot import Expression, exp, select -from sqlglot.helper import seq_get +from sqlglot import exp +from sqlglot.expressions import to_identifier # Apply transformation monkey patches # these modules are imported for their side effects -from mimic_utils.sqlglot import postgres -from mimic_utils.sqlglot import bigquery -from mimic_utils.sqlglot import duckdb +from mimic_utils.sqlglot_dialects import postgres +from mimic_utils.sqlglot_dialects import bigquery +from mimic_utils.sqlglot_dialects import duckdb # sqlglot has a default convention that function names are upper-case _FUNCTION_MAPPING = { @@ -38,6 +38,16 @@ def transpile_query(query: str, source_dialect: str="bigquery", destination_dial for table in sql_parsed.find_all(exp.Table): if table.catalog == catalog_to_remove: table.args['catalog'] = None + # we remove quoting of the table identifiers, for consistency + # with previously generated code + table.args['this'] = to_identifier( + name=table.args['this'].this, + quoted=False, + ) + table.args['db'] = to_identifier( + name=table.args['db'].this, + quoted=False, + ) elif table.this.name.startswith(catalog_to_remove): table.args['this'].args['this'] = table.this.name.replace(catalog_to_remove + '.', '') # sqlglot wants to output the schema/table as a single quoted identifier @@ -47,6 +57,26 @@ def transpile_query(query: str, source_dialect: str="bigquery", destination_dial quoted=False ) + # HACK: sqlglot has a GenerateSeries transpilation in v25.13.0, + # which is inserted during the parse of BigQuery. However, it looks + # incorrect for postgres (at least), as it swaps GENERATE_ARRAY for GENERATE_SERIES. + # BigQuery's GENERATE_ARRAY outputs an array, but GENERATE_SERIES outputs exploded rows. + # We will manually replace the GENERATE_SERIES call with an anonymous function, so our + # custom transpile code can do the correct conversion for postgres. + if (source_dialect == 'bigquery') and (destination_dialect == 'postgres'): + for gs_function in sql_parsed.find_all(exp.GenerateSeries): + # rename to our anonymous generate array function, so the + # later loop will catch it + gs_function.replace( + exp.Anonymous( + this='GENERATE_ARRAY', + expressions=[ + gs_function.args['start'], + gs_function.args['end'] + ] + ) + ) + # BigQuery has a few functions which are not in sqlglot, so we have # created classes for them, and this loop replaces the anonymous functions # with the named functions