Skip to content

Commit

Permalink
NiFi: cohort export script added support for int timestamps.
Browse files Browse the repository at this point in the history
  • Loading branch information
vladd-bit committed Jun 28, 2024
1 parent 5ceb9b5 commit 9fbb683
Showing 1 changed file with 57 additions and 47 deletions.
104 changes: 57 additions & 47 deletions nifi/user-scripts/cogstack_cohort_generate_data.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import json
import sys
import logging
from datetime import datetime
from datetime import datetime, timezone
import os
import traceback
import multiprocess
Expand Down Expand Up @@ -80,53 +80,63 @@

def _process_patient_records(patient_records: list):
_ptt2sex, _ptt2eth, _ptt2dob, _ptt2age, _ptt2dod, _doc2ptt = {}, {}, {}, {}, {}, {}

for patient_record in patient_records:

_ethnicity = str(patient_record[PATIENT_ETHNICITY_FIELD_NAME]).lower().replace("-", " ").replace("_", " ") if PATIENT_ETHNICITY_FIELD_NAME in patient_record.keys() else "other"

if _ethnicity in ethnicity_map.keys():
_ptt2eth[patient_record[PATIENT_ID_FIELD_NAME]] = ethnicity_map[_ethnicity].title()
else:
_ptt2eth[patient_record[PATIENT_ID_FIELD_NAME]] = _ethnicity.title()

# based on: https://www.datadictionary.nhs.uk/attributes/person_gender_code.html
_tmp_gender = str(patient_record[PATIENT_GENDER_FIELD_NAME]).lower() if PATIENT_GENDER_FIELD_NAME in patient_record.keys() else "Unknown"
if _tmp_gender in ["male", "1", "m"]:
_tmp_gender = "Male"
elif _tmp_gender in ["female", "2", "f"]:
_tmp_gender = "Female"
else:
_tmp_gender = "Unknown"

_ptt2sex[patient_record[PATIENT_ID_FIELD_NAME]] = _tmp_gender

dob = datetime.strptime(str(patient_record[PATIENT_BIRTH_DATE_FIELD_NAME]), DATE_TIME_FORMAT)
dod = patient_record[PATIENT_DEATH_DATE_FIELD_NAME] if PATIENT_DEATH_DATE_FIELD_NAME in patient_record.keys() else None
patient_age = 0

if dod not in [None, "null", 0]:
dod = datetime.strptime(str(patient_record[PATIENT_DEATH_DATE_FIELD_NAME]), DATE_TIME_FORMAT)
patient_age = dod.year - dob.year
else:
patient_age = datetime.now().year - dob.year

# convert to ints
dod = int(dod.strftime("%Y%m%d%H%M%S")) if dod not in [None, "null"] else 0
dob = int(dob.strftime("%Y%m%d%H%M%S"))

# change records
_ptt2dod[patient_record[PATIENT_ID_FIELD_NAME]] = dod
_ptt2dob[patient_record[PATIENT_ID_FIELD_NAME]] = dob
_ptt2age[patient_record[PATIENT_ID_FIELD_NAME]] = patient_age

_derived_document_id_field_from_ann = ANNOTATION_DOCUMENT_ID_FIELD_NAME.removeprefix("meta.")
if DOCUMENT_ID_FIELD_NAME in patient_record.keys():
docid = patient_record[DOCUMENT_ID_FIELD_NAME]
else:
docid = _derived_document_id_field_from_ann

_doc2ptt[docid] = patient_record[PATIENT_ID_FIELD_NAME]
if PATIENT_ID_FIELD_NAME in patient_record.keys():
_ethnicity = str(patient_record[PATIENT_ETHNICITY_FIELD_NAME]).lower().replace("-", " ").replace("_", " ") if PATIENT_ETHNICITY_FIELD_NAME in patient_record.keys() else "other"

if _ethnicity in ethnicity_map.keys():
_ptt2eth[patient_record[PATIENT_ID_FIELD_NAME]] = ethnicity_map[_ethnicity].title()
else:
_ptt2eth[patient_record[PATIENT_ID_FIELD_NAME]] = _ethnicity.title()

# based on: https://www.datadictionary.nhs.uk/attributes/person_gender_code.html
_tmp_gender = str(patient_record[PATIENT_GENDER_FIELD_NAME]).lower() if PATIENT_GENDER_FIELD_NAME in patient_record.keys() else "Unknown"
if _tmp_gender in ["male", "1", "m"]:
_tmp_gender = "Male"
elif _tmp_gender in ["female", "2", "f"]:
_tmp_gender = "Female"
else:
_tmp_gender = "Unknown"

_ptt2sex[patient_record[PATIENT_ID_FIELD_NAME]] = _tmp_gender

dob = patient_record[PATIENT_BIRTH_DATE_FIELD_NAME]

if isinstance(dob, int):
dob = datetime.fromtimestamp(patient_record[PATIENT_BIRTH_DATE_FIELD_NAME] / 1000, tz=timezone.utc)
else:
dob = datetime.strptime(str(patient_record[PATIENT_BIRTH_DATE_FIELD_NAME]), DATE_TIME_FORMAT)

dod = patient_record[PATIENT_DEATH_DATE_FIELD_NAME] if PATIENT_DEATH_DATE_FIELD_NAME in patient_record.keys() else None
patient_age = 0

if dod not in [None, "null", 0]:
if isinstance(dod, int):
dod = datetime.fromtimestamp(patient_record[PATIENT_DEATH_DATE_FIELD_NAME] / 1000, tz=timezone.utc)
else:
dod = datetime.strptime(str(patient_record[PATIENT_DEATH_DATE_FIELD_NAME]), DATE_TIME_FORMAT)

patient_age = dod.year - dob.year
else:
patient_age = datetime.now().year - dob.year

# convert to ints
dod = int(dod.strftime("%Y%m%d%H%M%S")) if dod not in [None, "null"] else 0
dob = int(dob.strftime("%Y%m%d%H%M%S"))

# change records
_ptt2dod[patient_record[PATIENT_ID_FIELD_NAME]] = dod
_ptt2dob[patient_record[PATIENT_ID_FIELD_NAME]] = dob
_ptt2age[patient_record[PATIENT_ID_FIELD_NAME]] = patient_age

_derived_document_id_field_from_ann = ANNOTATION_DOCUMENT_ID_FIELD_NAME.removeprefix("meta.")
if DOCUMENT_ID_FIELD_NAME in patient_record.keys():
docid = patient_record[DOCUMENT_ID_FIELD_NAME]
else:
docid = _derived_document_id_field_from_ann

_doc2ptt[docid] = patient_record[PATIENT_ID_FIELD_NAME]

return _ptt2sex, _ptt2eth, _ptt2dob, _ptt2age, _ptt2dod, _doc2ptt

Expand Down

0 comments on commit 9fbb683

Please sign in to comment.