forked from MIT-LCP/mimic-omop
-
Notifications
You must be signed in to change notification settings - Fork 1
/
check_etl.sql
196 lines (186 loc) · 3.68 KB
/
check_etl.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
BEGIN;
SELECT plan(7);
-- 1.checker global distribution labevents
SELECT results_eq
(
'
SELECT count(1), cast(FLOOR(MIN(valuenum)) as numeric), cast(FLOOR(AVG(valuenum)) as numeric), cast(floor(MAX(valuenum)) as numeric)
FROM labevents lab
JOIN d_labitems USING (itemid)
where itemid IN
(
SELECT itemid
from labevents join d_labitems using (itemid)
group by itemid order by count(itemid) desc
)
AND valuenum is not null
GROUP BY itemid
ORDER BY itemid, count(itemid) desc;
'
,
'
SELECT count(1), floor(MIN(value_as_number)), floor(AVG(value_as_number)), floor(MAX(value_as_number)
)
FROM omop.measurement
JOIN omop.concept on measurement_source_concept_id = concept_id
Where concept_code IN
(
SELECT itemid::VARCHAR
from labevents join d_labitems using (itemid)
group by itemid order by count(itemid) desc
)
AND value_as_number is not null and operator_concept_id = 4172703
GROUP BY concept_code
ORDER BY concept_code, count(measurement_source_concept_id) desc;
'
,
'MEASUREMENT -- check distribution of all labs match'
);
-- 2. repartition des microorganismes
SELECT results_eq
(
'
SELECT org_name::TEXT, count(1)
FROM
(
SELECT DISTINCT ON
(hadm_id, spec_type_desc, org_name, coalesce(charttime, chartdate))
AS org_name
FROM microbiologyevents
WHERE org_name IS NOT NULL
) tmp
GROUP BY org_name ORDER BY 2, 1 desc;
'
,
'
SELECT value_source_value::TEXT, count(1)
FROM omop.measurement
WHERE measurement_type_concept_id = 2000000007
AND value_as_concept_id IS DISTINCT FROM 9189
GROUP BY value_source_value ORDER BY 2, 1 desc;
'
,
'MEASUREMENT -- check microbiology organism distributions match'
);
SELECT results_eq
(
'
select 0::integer;
'
,
'
SELECT count(1)::integer
FROM omop.measurement
WHERE measurement_source_concept_id = 0;
'
,
'MEASUREMENT -- there is source concept in measurement not described'
);
SELECT results_eq
(
'
SELECT 0::INTEGER;
'
,
'
SELECT COUNT(1)::INTEGER
FROM
(
SELECT COUNT(1)::INTEGER
FROM omop.measurement
GROUP BY measurement_id
HAVING COUNT(1) > 1
) as t;
'
,
'MEASUREMENT -- check for duplicate primary keys'
);
SELECT results_eq
(
'
SELECT 0::INTEGER;
'
,
'
SELECT COUNT(1)::INTEGER
FROM omop.measurement
LEFT JOIN omop.concept
ON measurement_concept_id = concept_id
WHERE measurement_concept_id != 0
AND standard_concept != ''S'';
'
,
'MEASUREMENT -- standard concept checker'
);
SELECT results_eq
(
'
WITH omop_measure AS
(
SELECT concept_code::integer as itemid, count(*)
FROM omop.measurement
JOIN omop.concept ON measurement_source_concept_id = concept_id
WHERE measurement_type_concept_id IN (44818701, 44818702, 2000000003, 2000000009, 2000000010, 2000000011)
group by 1 order by 1 asc
),
omop_observation AS
(
SELECT concept_code::integer as itemid, count(*)
FROM omop.observation
JOIN omop.concept ON observation_source_concept_id = concept_id
WHERE observation_type_concept_id = 581413
group by 1 order by 1 asc
),
omop_result AS
(
SELECT * from omop_measure
UNION
SELECT * from omop_observation
)
SELECT itemid, count
FROM omop_result
ORDER BY 1 asc;
'
,
'
WITH mimic_chartevents as
(
SELECT itemid, count(*)
from chartevents
WHERE error is null or error = 0
group by 1 order by 1 asc
),
mimic_labevents AS
(
SELECT itemid, count(*)
from labevents
group by 1 order by 1 asc
),
mimic_output AS
(
SELECT itemid, count(*)
FROM outputevents
WHERE iserror is null
group by 1 order by 1 asc
),
mimic_result AS
(
SELECT *
from mimic_chartevents
UNION
SELECT *
from mimic_labevents
UNION
SELECT *
FROM mimic_output
)
SELECT itemid, count
FROM mimic_result
ORDER BY 1 asc;
'
,
'MEASUREMENT -- check row counts match'
);
SELECT pass( 'Measurement pass, w00t!' );
SELECT * from finish();
ROLLBACK;