-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathSQL-mimic-cdm.txt
374 lines (308 loc) · 16.4 KB
/
SQL-mimic-cdm.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
CREATE TABLE person
(
person_id INTEGER NOT NULL ,
gender_concept_id INTEGER NOT NULL ,
year_of_birth INTEGER NOT NULL ,
month_of_birth INTEGER NULL,
day_of_birth INTEGER NULL,
time_of_birth VARCHAR(10) NULL,
race_concept_id INTEGER NOT NULL,
ethnicity_concept_id INTEGER NOT NULL,
location_id INTEGER NULL,
provider_id INTEGER NULL,
care_site_id INTEGER NULL,
person_source_value VARCHAR(50) NULL,
gender_source_value VARCHAR(50) NULL,
gender_source_concept_id INTEGER NULL,
race_source_value VARCHAR(50) NULL,
race_source_concept_id INTEGER NULL,
ethnicity_source_value VARCHAR(50) NULL,
ethnicity_source_concept_id INTEGER NULL
)
;
INSERT into person(person_id, gender_concept_id, year_of_birth, month_of_birth,day_of_birth,race_concept_id, ethnicity_concept_id, gender_source_value)
SELECT subject_id,
CASE
when sex = 'M' then 8507
when sex = 'F' then 8532
else 8851
END as gender_concept_id,
extract (year from dob),extract (month from dob),extract (day from dob),
0,0, sex
FROM mimic2v26.d_patients;
------------------------------------ TABLE DEATH
CREATE TABLE death
(
person_id INTEGER NOT NULL ,
death_date DATE NOT NULL ,
death_type_concept_id INTEGER NOT NULL ,
cause_concept_id INTEGER NULL ,
cause_source_value VARCHAR(50) NULL,
cause_source_concept_id INTEGER NULL
)
;
INSERT into death(person_id, death_date, death_type_concept_id, cause_concept_id, cause_source_concept_id)
SELECT subject_id, dod, 0, 0, 0
FROM mimic2v26.d_patients
WHERE hospital_expire_flg = 'Y'
--------------------------------- TABLE visit_occurrence
CREATE TABLE visit_occurrence
(
visit_occurrence_id INTEGER NOT NULL ,
person_id INTEGER NOT NULL ,
visit_concept_id INTEGER NOT NULL ,
visit_start_date DATE NOT NULL ,
visit_start_time VARCHAR(10) NULL ,
visit_end_date DATE NOT NULL ,
visit_end_time VARCHAR(10) NULL ,
visit_type_concept_id INTEGER NULL ,
provider_id INTEGER NULL,
care_site_id INTEGER NULL,
visit_source_value VARCHAR(50) NULL,
visit_source_concept_id INTEGER NULL
)
;
-- I used icustay_days table. I may need to use admissions table as well. 9203 is for emergency visit
-- CONFUSION: icustay_detail and icustay_days have different number of records. which one to use?
-- CONFUSION: should I use icustay_id as the visit_occurrence_id or just a serial number?
INSERT INTO visit_occurrence (visit_occurrence_id, person_id, visit_concept_id, visit_start_date, visit_start_time, visit_end_date, visit_end_time, visit_type_concept_id, visit_source_concept_id)
SELECT icustay_id, subject_id, 9203, begintime, pg_catalog.time(begintime), endtime, pg_catalog.time(endtime), 0, 0
FROM mimic2v26.icustay_days;
-------------------------- Table condition_occurrence
CREATE TABLE condition_occurrence
(
-- condition_occurrence_id INTEGER NOT NULL ,
person_id INTEGER NOT NULL ,
condition_concept_id INTEGER NOT NULL ,
condition_start_date DATE NULL ,
condition_end_date DATE NULL ,
condition_type_concept_id INTEGER NOT NULL ,
stop_reason VARCHAR(20) NULL ,
provider_id INTEGER NULL ,
visit_occurrence_id INTEGER NULL ,
condition_source_value VARCHAR(50) NULL ,
condition_source_concept_id INTEGER NULL
)
;
-- not sure how to get condition_start_date and end_date.
-- don't see how to obtain visit_occurrence_id (icustay_id was used originally in visit_occurrence, but here we have hadm_id)
INSERT INTO condition_occurrence(person_id, condition_concept_id, condition_type_concept_id, condition_source_value, condition_source_concept_id)
SELECT subject_id, 0, 0, code, 0
FROM mimic2v26.icd9;
ALTER TABLE condition_occurrence ADD condition_occurrence_id SERIAL;
ALTER TABLE condition_occurrence ALTER condition_occurrence_id SET NOT NULL
------------------------------------- Table measurement
-- MEASUREMENT table (a large one)
CREATE TABLE measurement
(
--measurement_id INTEGER NOT NULL ,
person_id INTEGER NOT NULL ,
measurement_concept_id INTEGER NOT NULL ,
measurement_date DATE NOT NULL ,
measurement_time VARCHAR(10) NULL ,
measurement_type_concept_id INTEGER NOT NULL ,
operator_concept_id INTEGER NULL ,
value_as_number NUMERIC NULL ,
value_as_concept_id INTEGER NULL ,
unit_concept_id INTEGER NULL ,
range_low NUMERIC NULL ,
range_high NUMERIC NULL ,
provider_id INTEGER NULL ,
visit_occurrence_id INTEGER NULL ,
measurement_source_value VARCHAR(50) NULL ,
measurement_source_concept_id INTEGER NULL ,
unit_source_value VARCHAR(50) NULL ,
value_source_value VARCHAR(50) NULL
)
;
INSERT INTO measurement(person_id, measurement_concept_id, measurement_date, measurement_time, measurement_type_concept_id, value_as_number,visit_occurrence_id,measurement_source_value,
unit_source_value)
SELECT temp_join_table.subject_id, 0, temp_join_table.charttime, pg_catalog.time(temp_join_table.charttime), 0,
temp_join_table.valuenum, icustay_id,temp_join_table.LOINC_CODE,
temp_join_table.valueuom
FROM
(SELECT * FROM mimic2v26.d_labitems, mimic2v26.labevents
WHERE mimic2v26.d_labitems.itemid = mimic2v26.labevents.itemid) as
temp_join_table
ALTER TABLE measurement ADD measurement_id SERIAL;
ALTER TABLE measurement ALTER measurement_id SET NOT NULL
select * from measurement
limit 1000
-------------------- procedure_occurrence table
CREATE TABLE procedure_occurrence
(
--procedure_occurrence_id INTEGER NULL ,
person_id INTEGER NOT NULL ,
procedure_concept_id INTEGER NOT NULL ,
procedure_date DATE NOT NULL ,
procedure_type_concept_id INTEGER NOT NULL ,
modifier_concept_id INTEGER NULL ,
quantity INTEGER NULL ,
provider_id INTEGER NULL ,
visit_occurrence_id INTEGER NULL ,
procedure_source_value VARCHAR(50) NULL ,
procedure_source_concept_id INTEGER NULL ,
qualifier_source_value VARCHAR(50) NULL
)
;
INSERT INTO procedure_occurrence (person_id, procedure_date,
procedure_source_value)
SELECT tmp_join_table.subject_id, tmp_join_table.proc_dt,tmp_join_table.code
FROM
(SELECT * FROM mimic2v26.procedureevents, mimic2v26.d_codeditems
WHERE mimic2v26.procedureevents.itemid =
mimic2v26.d_codeditems.itemid) as tmp_join_table;
ALTER TABLE procedure_occurrence ADD procedure_occurrence_id SERIAL;
ALTER TABLE procedure_occurrence ALTER procedure_occurrence_id SET NOT NULL
--------------------- Table Drug_exposure
CREATE TABLE drug_exposure
(
--drug_exposure_id INTEGER NOT NULL ,
person_id INTEGER NOT NULL ,
drug_concept_id INTEGER NOT NULL ,
drug_exposure_start_date DATE NOT NULL ,
drug_exposure_end_date DATE NULL ,
drug_type_concept_id INTEGER NOT NULL ,
stop_reason VARCHAR(20) NULL ,
refills INTEGER NULL ,
quantity NUMERIC NULL ,
days_supply INTEGER NULL ,
sig TEXT NULL ,
route_concept_id INTEGER NULL ,
effective_drug_dose NUMERIC NULL ,
dose_unit_concept_id INTEGER NULL ,
lot_number VARCHAR(50) NULL ,
provider_id INTEGER NULL ,
visit_occurrence_id INTEGER NULL ,
drug_source_value VARCHAR(50) NULL ,
drug_source_concept_id INTEGER NULL ,
route_source_value VARCHAR(50) NULL ,
dose_unit_source_value VARCHAR(50) NULL
)
;
-- confusion: drug_source_value: is it itemid or the label?
INSERT INTO drug_exposure (person_id, drug_concept_id,
drug_exposure_start_date, drug_type_concept_id,effective_drug_dose,
provider_id, visit_occurrence_id, drug_source_value,
drug_source_concept_id, route_source_value, dose_unit_source_value)
SELECT tmp_join_table.subject_id, 0, tmp_join_table.charttime, 0,
tmp_join_table.dose, tmp_join_table.cgid, tmp_join_table.icustay_id,
tmp_join_table.label, 0, tmp_join_table.route, tmp_join_table.doseuom
FROM
(SELECT * FROM mimic2v26.medevents, mimic2v26.d_meditems
WHERE mimic2v26.medevents.itemid = mimic2v26.d_meditems.itemid) as
tmp_join_table
ALTER TABLE drug_exposure ADD drug_exposure_id SERIAL;
ALTER TABLE drug_exposure ALTER drug_exposure_id SET NOT NULL
------------------------- Table NOTE
-- big table
CREATE TABLE note
(
--note_id INTEGER NOT NULL ,
person_id INTEGER NOT NULL ,
note_date DATE NOT NULL ,
note_time VARCHAR(10) NULL ,
note_type_concept_id INTEGER NOT NULL ,
note_text TEXT NOT NULL ,
provider_id INTEGER NULL ,
visit_occurrence_id INTEGER NULL ,
note_source_value VARCHAR(50) NULL
)
;
-- Nurses typically write \nursing notes", a summary
--of events which occurred during their shift period. When the patient is dis-
--charged from the hospital, the responsible physician dictates a summary of the
--entire hospitalization period, known as the \discharge summary". These reports
--are recorded in noteevents table
-- CONFUSION: I mapped note_source_value from category; is it okay?
--Also, I mapped visit_occurrence_id from icustay_id.
-- but there are some notes for which there is no icustay_id (like
--discharge summary). should I use hadm_id instead? I used only
--icustay_id in the visit_occurrence table as well.
INSERT INTO note(person_id, note_date, note_time,
note_type_concept_id, note_text, provider_id, visit_occurrence_id,
note_source_value)
SELECT mimic2v26.noteevents.subject_id,
mimic2v26.noteevents.charttime,
pg_catalog.time(mimic2v26.noteevents.charttime), 0,
mimic2v26.noteevents.text, mimic2v26.noteevents.cgid,
mimic2v26.noteevents.icustay_id, mimic2v26.noteevents.category
FROM mimic2v26.noteevents
ALTER TABLE note ADD note_id SERIAL;
ALTER TABLE note ALTER note_id SET NOT NULL
----------------------- Table Provide
CREATE TABLE provider
(
provider_id INTEGER NOT NULL ,
provider_name VARCHAR(255) NULL ,
NPI VARCHAR(20) NULL ,
DEA VARCHAR(20) NULL ,
specialty_concept_id INTEGER NULL ,
care_site_id INTEGER NULL ,
year_of_birth INTEGER NULL ,
gender_concept_id INTEGER NULL ,
provider_source_value VARCHAR(50) NULL ,
specialty_source_value VARCHAR(50) NULL ,
specialty_source_concept_id INTEGER NULL ,
gender_source_value VARCHAR(50) NULL ,
gender_source_concept_id INTEGER NULL
)
;
-- provider_id and provider_source_value are same
INSERT INTO provider (provider_id, provider_source_value,
specialty_concept_id, specialty_source_value,
specialty_source_concept_id)
SELECT mimic2v26.d_caregivers.cgid, mimic2v26.d_caregivers.cgid, 0,
mimic2v26.d_caregivers.label, 0
FROM mimic2v26.d_caregivers
----------------------- Table specimen
CREATE TABLE specimen
(
-- specimen_id INTEGER NOT NULL ,
person_id INTEGER NOT NULL ,
specimen_concept_id INTEGER NOT NULL ,
specimen_type_concept_id INTEGER NOT NULL ,
specimen_date DATE NOT NULL ,
specimen_time VARCHAR(10) NULL ,
quantity NUMERIC NULL ,
unit_concept_id INTEGER NULL ,
anatomic_site_concept_id INTEGER NULL ,
disease_status_concept_id INTEGER NULL ,
specimen_source_id VARCHAR(50) NULL ,
specimen_source_value VARCHAR(50) NULL ,
unit_source_value VARCHAR(50) NULL ,
anatomic_site_source_value VARCHAR(50) NULL ,
disease_status_source_value VARCHAR(50) NULL
)
;
-- It seems to be a subset of measurement table
-- CONFUSION: specimen_source_id is the itemid or loinc_code? I have used loinc_code
-- How about specimen_source_value? I have used the name of the fluid.
INSERT INTO specimen(person_id, specimen_concept_id, specimen_type_concept_id,specimen_date,
specimen_time, unit_concept_id, anatomic_site_concept_id, disease_status_concept_id,
specimen_source_id, specimen_source_value, unit_source_value)
SELECT temp_join_table.subject_id, 0, 0, temp_join_table.charttime,
pg_catalog.time(temp_join_table.charttime), 0, 0, 0,
temp_join_table.loinc_code, temp_join_table.fluid, temp_join_table.valueuom
FROM
(SELECT * FROM mimic2v26.d_labitems, mimic2v26.labevents
WHERE mimic2v26.d_labitems.itemid = mimic2v26.labevents.itemid) as
temp_join_table
ALTER TABLE specimen ADD specimen_id SERIAL;
ALTER TABLE specimen ALTER specimen_id SET NOT NULL;
-------------------------------- Table site
CREATE TABLE care_site
(
-- care_site_id INTEGER NOT NULL ,
care_site_name VARCHAR(255) NULL ,
place_of_service_concept_id INTEGER NULL ,
location_id INTEGER NULL ,
care_site_source_value VARCHAR(50) NULL ,
place_of_service_source_value VARCHAR(50) NULL
)
;
INSERT INTO care_site(care_site_name, place_of_service_concept_id, care_site_source_value)
SELECT mimic2v26.d_careunits.label, 0, mimic2v26.d_careunits.cuid
FROM mimic2v26.d_careunits;
ALTER TABLE care_site ADD care_site_id SERIAL;
ALTER TABLE care_site ALTER care_site_id SET NOT NULL;