diff --git a/containers/trigger-code-reference/.gitignore b/containers/trigger-code-reference/.gitignore new file mode 100644 index 0000000000..2eea525d88 --- /dev/null +++ b/containers/trigger-code-reference/.gitignore @@ -0,0 +1 @@ +.env \ No newline at end of file diff --git a/containers/trigger-code-reference/app/main.py b/containers/trigger-code-reference/app/main.py index 6bcb33cc8d..0838a3c409 100644 --- a/containers/trigger-code-reference/app/main.py +++ b/containers/trigger-code-reference/app/main.py @@ -11,8 +11,8 @@ from app.utils import _stamp_resource_with_code_extension from app.utils import find_conditions from app.utils import get_clean_snomed_code -from app.utils import get_clinical_services_dict -from app.utils import get_clinical_services_list +from app.utils import get_concepts_dict +from app.utils import get_concepts_list from app.utils import read_json_from_assets RESOURCE_TO_SERVICE_TYPES = { @@ -78,8 +78,8 @@ async def stamp_condition_extensions( conditions = find_conditions(input.bundle) for cond in conditions: - cond_list = get_clinical_services_list([cond]) - cond_dict = get_clinical_services_dict(cond_list) + cond_list = get_concepts_list([cond]) + cond_dict = get_concepts_dict(cond_list) stamp_codes_to_service_codes[cond] = cond_dict bundle_entries = input.bundle.get("entry", []) @@ -135,7 +135,7 @@ async def stamp_condition_extensions( @app.get("/get-value-sets", status_code=200, responses=get_value_sets_response_examples) async def get_value_sets_for_condition( condition_code: Annotated[str, Query(examples=get_value_sets_request_examples)], - filter_clinical_services: Annotated[ + filter_concepts: Annotated[ str, Query(examples=get_value_sets_request_examples) ] = None, ) -> Response: @@ -145,9 +145,9 @@ async def get_value_sets_for_condition( :param condition_code: A query param supplied as a string representing a single SNOMED condition code. - :param filter_clinical_services: (Optional) A comma-separated string of - clinical service types (defined by the abbreviation codes above) to - keep. By default, all (currently) 6 clinical service types are + :param filter_concepts: (Optional) A comma-separated string of + value set types (defined by the abbreviation codes above) to + keep. By default, all (currently) 6 value set types are returned; use this parameter to return only types of interest. :return: An HTTP Response containing the value sets of the queried code. """ @@ -158,10 +158,8 @@ async def get_value_sets_for_condition( ) else: clean_snomed_code = get_clean_snomed_code(condition_code) - clinical_services_list = get_clinical_services_list(clean_snomed_code) - values = get_clinical_services_dict( - clinical_services_list, filter_clinical_services - ) + concepts_list = get_concepts_list(clean_snomed_code) + values = get_concepts_dict(concepts_list, filter_concepts) return values diff --git a/containers/trigger-code-reference/app/utils.py b/containers/trigger-code-reference/app/utils.py index 771e6d1994..44a836e615 100644 --- a/containers/trigger-code-reference/app/utils.py +++ b/containers/trigger-code-reference/app/utils.py @@ -46,93 +46,92 @@ def get_clean_snomed_code(snomed_code: Union[list, str, int, float]) -> list: return clean_snomed_code -def get_clinical_services_list(snomed_code: list) -> List[tuple]: +def get_concepts_list(snomed_code: list) -> List[tuple]: """ This will take a SNOMED code and runs a SQL query joins condition code, joins it to value sets, then uses the value set ids to get the - clinical service type, clinical service codes, and clinical service system - from the eRSD database grouped by clinical service type and system. + value set type, concept codes, and concept system + from the eRSD database grouped by value set type and system. :param snomed_code: SNOMED code to check - :return: A list of tuples with clinical service type, a delimited-string of + :return: A list of tuples with value set type, a delimited-string of the relevant codes and code systems as objects within. """ sql_query = """ SELECT - vs.clinical_service_type_id AS clinical_service_type, + vs.type AS valueset_type, GROUP_CONCAT(cs.code, '|') AS codes, cs.code_system AS system FROM conditions c - JOIN - value_sets vs ON c.value_set_id = vs.id - JOIN - clinical_services cs ON cs.value_set_id = vs.id + LEFT JOIN + condition_to_valueset cv ON c.id = cv.condition_id + LEFT JOIN + valuesets vs ON cv.valueset_id = vs.id + LEFT JOIN + valueset_to_concept vc ON vs.id = vc.valueset_id + LEFT JOIN + concepts cs ON vc.concept_id = cs.id WHERE c.id = ? GROUP BY - vs.clinical_service_type_id, cs.code_system + vs.type, cs.code_system """ - # Connect to the SQLite database, execute sql query, then close try: with sqlite3.connect("seed-scripts/ersd.db") as conn: cursor = conn.cursor() code = get_clean_snomed_code(snomed_code) cursor.execute(sql_query, code) - clinical_services_list = cursor.fetchall() + concept_list = cursor.fetchall() # We know it's not an actual error because we didn't get kicked to # except, so just return the lack of results - if not clinical_services_list: + if not concept_list: return [] - return clinical_services_list + return concept_list except sqlite3.Error as e: return {"error": f"An SQL error occurred: {str(e)}"} -def get_clinical_services_dict( - clinical_services_list: List[tuple], - filter_clinical_services: Union[str, list] = None, +def get_concepts_dict( + concept_list: List[tuple], + filter_concept_list: Union[str, list] = None, ) -> dict: """ This function parses a list of tuples containing data on clinical codes into a dictionary for use in the /get-value-sets API endpoint. - There is an optional parameter to return select clinical service type(s) + There is an optional parameter to return select value set type(s) specified as either a string or a list. - :param clinical_services_list: A list of tuples with clinical service type, + :param concept_list: A list of tuples with value set type, a delimited-string of relevant codes and code systems as objects within. - :param filter_clinical_services: (Optional) List of clinical service types - specified to keep. By default, all (currently) 6 clinical service types are + :param filter_concept_list: (Optional) List of value set types + specified to keep. By default, all (currently) 6 value set types are returned; use this parameter to return only types of interest. - :return: A nested dictionary with clinical service type as the key, a list + :return: A nested dictionary with value set type as the key, a list of the relevant codes and code systems as objects within. """ # Convert to the final structured format - clinical_service_dict = {} - for clinical_service_type, codes_string, system in clinical_services_list: - # If clinical_service_type is not yet in the dictionary, initialize - if clinical_service_type not in clinical_service_dict: - clinical_service_dict[clinical_service_type] = [] + concept_dict = {} + for concept_type, codes_string, system in concept_list: + # If concept_type is not yet in the dictionary, initialize + if concept_type not in concept_dict: + concept_dict[concept_type] = [] # Append a new entry with the codes and their system - clinical_service_dict[clinical_service_type].append( + concept_dict[concept_type].append( {"codes": codes_string.split("|"), "system": system} ) - # Optional: Remove clinical service types not in specified list if provided - if filter_clinical_services: - clinical_services = convert_inputs_to_list(filter_clinical_services) + # Optional: Remove value set types not in specified list if provided + if filter_concept_list: + concepts = convert_inputs_to_list(filter_concept_list) # Create a list of types to remove - remove_list = [ - type - for type in clinical_service_dict.keys() - if type not in clinical_services - ] + remove_list = [type for type in concept_dict.keys() if type not in concepts] # Remove the types for type in remove_list: - clinical_service_dict.pop(type, None) - return clinical_service_dict + concept_dict.pop(type, None) + return concept_dict def _find_codes_by_resource_type(resource: dict) -> List[str]: diff --git a/containers/trigger-code-reference/assets/sample_get_value_sets_requests.json b/containers/trigger-code-reference/assets/sample_get_value_sets_requests.json index ce3d89a858..21905f5be0 100644 --- a/containers/trigger-code-reference/assets/sample_get_value_sets_requests.json +++ b/containers/trigger-code-reference/assets/sample_get_value_sets_requests.json @@ -7,11 +7,11 @@ } }, "Filtered value set retrieval": { - "summary": "Retrieves and filteres COVID-related value sets using a clinical services filter", + "summary": "Retrieves and filters COVID-related value sets using a concept type filter", "description": "This example queries the SQLite database for value sets related to condition code 840539006; we're interested only in lab-related services for this code, so we filter out diagnostics.", "value": { "condition_code": "840539006", - "filter_clinical_services": "lrtc,ostc" + "filter_concepts": "lrtc,ostc" } } } diff --git a/containers/trigger-code-reference/dev-requirements.txt b/containers/trigger-code-reference/dev-requirements.txt index e08ab4c60a..5bc204ba8f 100644 --- a/containers/trigger-code-reference/dev-requirements.txt +++ b/containers/trigger-code-reference/dev-requirements.txt @@ -1,6 +1,6 @@ pytest httpx -testcontainers[compose]==3.7.1 +testcontainers[compose] mammoth==1.8.0 beautifulsoup4==4.12.3 lxml==5.2.2 diff --git a/containers/trigger-code-reference/seed-scripts/config/ersd.json b/containers/trigger-code-reference/seed-scripts/config/ersd.json index dad9297cc7..8475a1ba8f 100644 --- a/containers/trigger-code-reference/seed-scripts/config/ersd.json +++ b/containers/trigger-code-reference/seed-scripts/config/ersd.json @@ -1,5 +1,5 @@ { - "value_set_type": { + "valueset_types": { "fhir_path": "entry.resource.where(resourceType='ValueSet' and url.contains('http://ersd.aimsplatform.org/fhir/ValueSet/')", "data_type": "string", "nullable": false, @@ -9,19 +9,19 @@ "data_type": "string", "nullable": false }, - "clinical_service_type": { + "concept_type": { "fhir_path": "title", "data_type": "string", "nullable": false } } }, - "clinical_services": { + "concepts": { "fhir_path": "entry.resource.where(resourceType='ValueSet' and url.contains('http://cts.nlm.nih.gov/fhir/ValueSet/')", "data_type": "string", "nullable": false, "secondary_schema": { - "value_set_id": { + "valueset_id": { "fhir_path": "id", "data_type": "string", "nullable": false @@ -48,17 +48,17 @@ } } }, - "value_sets": { + "valuesets": { "fhir_path": "entry.resource.where(resourceType='ValueSet' and url.contains('http://ersd.aimsplatform.org/fhir/ValueSet/')", "data_type": "string", "nullable": false, "secondary_schema": { - "clinical_service_type_id": { + "concept_type_id": { "fhir_path": "id", "data_type": "string", "nullable": false }, - "version": { + "ersd_version": { "fhir_path": "version", "data_type": "string", "nullable": false diff --git a/containers/trigger-code-reference/seed-scripts/ersd.db b/containers/trigger-code-reference/seed-scripts/ersd.db index 6623b6e5b4..f0124960d4 100644 Binary files a/containers/trigger-code-reference/seed-scripts/ersd.db and b/containers/trigger-code-reference/seed-scripts/ersd.db differ diff --git a/containers/trigger-code-reference/seed-scripts/migrations/V02_01__create_tables_add_indexes.sql b/containers/trigger-code-reference/seed-scripts/migrations/V02_01__create_tables_add_indexes.sql new file mode 100644 index 0000000000..47e7b42f97 --- /dev/null +++ b/containers/trigger-code-reference/seed-scripts/migrations/V02_01__create_tables_add_indexes.sql @@ -0,0 +1,59 @@ +CREATE TABLE IF NOT EXISTS valuesets ( + id TEXT PRIMARY KEY, + oid TEXT, + version TEXT, + name TEXT, + author TEXT, + type TEXT +); + +CREATE TABLE IF NOT EXISTS conditions ( + id TEXT PRIMARY KEY, + system TEXT, + name TEXT, + version TEXT +); + +CREATE TABLE IF NOT EXISTS concepts ( + id TEXT PRIMARY KEY, + code TEXT, + code_system TEXT, + display TEXT, + version TEXT +); + +CREATE TABLE IF NOT EXISTS condition_to_valueset ( + id TEXT PRIMARY KEY, + condition_id TEXT, + valueset_id TEXT, + source TEXT, + FOREIGN KEY (condition_id) REFERENCES conditions(id), + FOREIGN KEY (valueset_id) REFERENCES valuesets(id) +); + +CREATE TABLE IF NOT EXISTS valueset_to_concept ( + id TEXT PRIMARY KEY, + valueset_id TEXT, + concept_id TEXT, + FOREIGN KEY (valueset_id) REFERENCES valuesets(id), + FOREIGN KEY (concept_id) REFERENCES concepts(id) +); + + +-- add indexes to increase performance +-- conditions +CREATE INDEX IF NOT EXISTS "idx_conditions_id" ON conditions(id); + +-- valuesets +CREATE INDEX IF NOT EXISTS "idx_valuesets_id" ON valuesets(id); + +-- concepts +CREATE INDEX IF NOT EXISTS "idx_concepts_id" ON concepts(id); + +-- valueset_to_concept indexes +CREATE INDEX IF NOT EXISTS "idx_valueset_to_concept_valueset_id" ON valueset_to_concept(valueset_id); +CREATE INDEX IF NOT EXISTS "idx_valueset_to_concept_concept_id" ON valueset_to_concept(concept_id); + +-- condition_to_valueset indexes +CREATE INDEX IF NOT EXISTS "idx_condition_to_valueset_condition_id" ON condition_to_valueset(condition_id); +CREATE INDEX IF NOT EXISTS "idx_condition_to_valueset_valueset_id" ON condition_to_valueset(valueset_id); diff --git a/containers/trigger-code-reference/seed-scripts/seed-ersd-database.py b/containers/trigger-code-reference/seed-scripts/seed-ersd-database.py index 836f044856..733de48805 100644 --- a/containers/trigger-code-reference/seed-scripts/seed-ersd-database.py +++ b/containers/trigger-code-reference/seed-scripts/seed-ersd-database.py @@ -5,6 +5,7 @@ import time from pathlib import Path from typing import List +from typing import Tuple import docker import requests @@ -12,11 +13,13 @@ from docker.errors import BuildError from docker.models.containers import Container from dotenv import load_dotenv +from requests.auth import HTTPBasicAuth load_dotenv() # eRSD constants, can be obtained at: https://ersd.aimsplatform.org/#/api-keys ERSD_API_KEY = os.getenv("ERSD_API_KEY") ERSD_URL = f"https://ersd.aimsplatform.org/api/ersd/v2specification?format=json&api-key={ERSD_API_KEY}" +UMLS_API_KEY = os.getenv("UMLS_API_KEY") # docker constants to start message-parser service dockerfile_path = Path(__file__).parent.parent.parent / "message-parser" @@ -39,6 +42,24 @@ def load_ersd(URL: str) -> dict: return data +def load_vsac_api(url) -> dict: + """ + Loads ValueSet or CodeSystem data from the VSAC FHIR API in a json format. + :param url: API url to hit + + :return: FHIR compose data for valueset or codesystem in JSON format + """ + response = requests.get( + url, + auth=HTTPBasicAuth("apikey", UMLS_API_KEY), + ) + if response.status_code == 200: + data = response.json() + else: + print("Failed to retrieve data:", response.status_code, response.text) + return data + + def start_docker_service(dockerfile_path: str, tag: str, ports: dict) -> Container: """ Builds and runs a Docker container based on a Dockerfile. @@ -53,7 +74,7 @@ def start_docker_service(dockerfile_path: str, tag: str, ports: dict) -> Contain container = None try: # connect and start - client.images.build(path=str(dockerfile_path), tag=tag) + client.images.build(path=str(dockerfile_path), tag=tag, platform="linux/amd64") container = client.containers.run(tag, ports=ports, detach=True) time.sleep(1.5) # TODO: find better way to make sure service waits except (BuildError, APIError) as e: @@ -91,7 +112,6 @@ def parse_ersd(ports: dict, data: dict) -> dict: :param data: eRSD json bundle :return: parsed message. """ - # load the ersd.json schema to message-parser first load_ersd_schema(ports) @@ -116,36 +136,41 @@ def parse_ersd(ports: dict, data: dict) -> dict: print(f"An error occurred: {e}") -def build_clinical_services_dict(data: dict) -> dict: +def build_valuesets_dict(data: dict) -> dict: """ - This is the only part of the parsed json bundle where the service type - and version are defined for each of the value sets, so this function makes - a dictionary that has each value_set_id as a key with its service type id + This is the only part of the parsed json bundle where the valueset type + is defined for each of the valuesets, so this function makes + a dictionary that has each valueset_id as a key with its valueset type id and version. + It also looks up the version from the VSAC FHIR as a value for the dict. + :param data: message-parser parsed eRSD json - :return: a dictionary of each value_set URL with its service type and + :return: a dictionary of each valueset URL with its valueset type and version. """ - clinical_services_dict = {} - for value_set in data.get("value_sets"): - clinical_service_type = value_set.get("clinical_service_type_id") - version = value_set.get("version") - compose_codes = value_set.get("compose_codes").split(",") + valuesets_dict = {} + for valueset in data.get("valuesets"): + concept_type = valueset.get("concept_type_id") + compose_codes = valueset.get("compose_codes").split(",") for compose_code in compose_codes: - value_set_id = compose_code.split("/")[-1] - clinical_services_dict[value_set_id] = { - "clinical_service_type": clinical_service_type, - "version": version, - } - return clinical_services_dict + valueset_id = compose_code.split("/")[-1] + if valueset_id not in valuesets_dict.keys(): + url = f"https://cts.nlm.nih.gov/fhir/ValueSet/{valueset_id}" + vsac_data = load_vsac_api(url) + version = vsac_data.get("version") + valuesets_dict[valueset_id] = { + "concept_type": concept_type, + "version": version, + } + return valuesets_dict def check_id_uniqueness(list_of_lists: List[List[str]]) -> bool: """This is a helper function that confirms that the first item in each list of the lists of lists is unique. This is needed to confirm that the assumptions we have about tables with a unique primary - key (value_sets, value_set_types, clinical_services) are all in fact + key (valuesets, valueset_types, concepts) are all in fact unique (i.e., the number of unique ids matches number of rows). If not, then the function will exit to prevent overwriting. @@ -156,59 +181,59 @@ def check_id_uniqueness(list_of_lists: List[List[str]]) -> bool: return len(unique_ids) == len(list_of_lists) -def build_value_set_type_table(data: dict) -> List[List[str]]: - """ - Loop through parsed json bundle in order to build a small table of - each of the (currently) 6 service types as defined by APHL with its id - and short description of the clinical service type. - - :param data: message-parser parsed eRSD json - :return: a list of lists of the id and type of each of the service types - to load to a database +def build_valuesets_table( + data: dict, + valuesets_dict: dict, +) -> Tuple[List[List[str]], List[List[str]]]: """ - value_set_type_list = [] - for value_set_type in data.get("value_set_type"): - id = value_set_type.get("id") - type = value_set_type.get("clinical_service_type") - value_set_type_list.append([id, type]) - if check_id_uniqueness(value_set_type_list): - return value_set_type_list - else: - return print("Non-unique IDs in value_set_type") - + Look through eRSD json to create valuesets table, where the primary key + is the valueset_id that contains the name and codes for each service. -def build_value_sets_table(data: dict, clinical_services_dict: dict) -> List[List[str]]: - """ - Look through eRSD json to create value sets table, where the primary key - is the value_set_id that contains the name and codes for each service. + It will create a junction table between valueset id, condition id, and log + version of the eRSD table. - It also uses the clinical services dictionary that will have the clinical - service type for each of the services as well as the value set version. + It also outputs a dictionary of the valueset:version to use for the other + junction table to avoid duplicative API calls. :param data: message-parser parsed eRSD json - :param clinical_services_dict: a dictionary of each value_set URL with its - service type as value - :return: list of lists of for each of the value set id, name, and code info + :param valuesets_dict: a dictionary of each valueset URL with its + service type and versions as values + :return: list of lists of for each of the valueset id, name, and code info; + list of lists of each valueset id, condition id, source/version + dict of the valuesets with version """ - clinical_services = data.get("clinical_services") - value_sets_list = [] - for service in clinical_services: - value_set_id = service.get("value_set_id") - value_set_name = service.get("display") + concepts = data.get("concepts") + ersd_version = data.get("valuesets")[0].get("ersd_version") + valuesets_list = [] + junction_list = [] + for service in concepts: + valueset_id = service.get("valueset_id") + valueset_name = service.get("display") publisher = service.get("publisher") - service_info = clinical_services_dict.get(value_set_id) + service_info = valuesets_dict.get(valueset_id) + version = service_info.get("version") + id = f"{valueset_id}_{version}" result = [ - value_set_id, - service_info.get("version", ""), - value_set_name, + id, + valueset_id, + version, + valueset_name, publisher, - service_info.get("clinical_service_type"), + service_info.get("concept_type"), ] - value_sets_list.append(result) - if check_id_uniqueness(value_sets_list): - return value_sets_list + valuesets_list.append(result) + # create junction table between valueset ID and condition ID + concept_codes = ast.literal_eval(service.get("valueable_codes")) + if isinstance(concept_codes, dict): + concept_codes = [concept_codes] + for concept in concept_codes: + code = concept.get("coding")[0].get("code") + junction_list.append([code, id, f"eRSD_{ersd_version}"]) + if check_id_uniqueness(valuesets_list): + return valuesets_list, junction_list else: - return print("Non-unique IDs in value_sets") + print("Non-unique IDs in valuesets") + return [], [] def build_conditions_table(data: dict) -> List[List[str]]: @@ -217,38 +242,62 @@ def build_conditions_table(data: dict) -> List[List[str]]: is a SNOMED condition code and has the name and system for each condition. :param data: message-parser parsed eRSD json - :return: list of lists of for each of the condition code, name, and system + :return: list of lists of for each of the condition code, name, system, + and version """ - clinical_services = data.get("clinical_services") + concepts = data.get("concepts") conditions_list = [] - for service in clinical_services: - value_set_id = service.get("value_set_id") + conditions_dict = {} + for service in concepts: valueable_codes = ast.literal_eval(service.get("valueable_codes")) if isinstance(valueable_codes, dict): # one item, need to list it valueable_codes = [valueable_codes] - # valueable codes to build value_set + # valueable codes to build conditions for valueable_code in valueable_codes: code_system = valueable_code.get("coding")[0].get("system") code = valueable_code.get("coding")[0].get("code") code_name = valueable_code.get("text") - result = [code, value_set_id, code_system, code_name] - conditions_list.append(result) - return conditions_list + # only run code once against API then append to list of lists + if code not in conditions_dict.keys(): + url = f"https://cts.nlm.nih.gov/fhir/CodeSystem/$lookup?system={code_system}&code={code}" + vsac_data = load_vsac_api(url) + version = [ + data.get("valueString").split("/")[-1] + for data in vsac_data.get("parameter") + if data.get("name") == "version" + ][0] + conditions_dict[code] = version + results = [code, code_system, code_name, version] + conditions_list.append(results) + if check_id_uniqueness(conditions_list): + return conditions_list + else: + print("Non-unique IDs in conditions") + return [] -def build_clinical_services_table(data: dict) -> List[List[str]]: +def build_concepts_table( + data: dict, + valuesets_dict: dict, +) -> Tuple[List[List[str]], List[List[str]], dict]: """ - This builds the table for clinical services, which has a unique row for - each unique value_set_id-code combination. + This builds the table for concepts, which has a unique row for + each unique valueset_id-concept code combination. + + It also creates a junction table between the valueset_id and concept_id. :param data: message-parser parsed eRSD json - :return: list of lists of for each unique value_set_id-code-id, name, and - code info + :param valuesets_dict: a dictionary of each valueset URL with its + service type and versions as values + :return: list of lists of for each unique valueset_id-code-id, name, and + code info; + list of lists for each valueset_id, concept_id """ - clinical_services = data.get("clinical_services") - clinical_services_list = [] - for service in clinical_services: - value_set_id = service.get("value_set_id") + concepts = data.get("concepts") + concepts_list = [] + junction_list = [] + for service in concepts: + valueset_id = service.get("valueset_id") compose_codes = ast.literal_eval(service.get("compose_codes")) if isinstance(compose_codes, dict): # one item, need to list it compose_codes = [compose_codes] @@ -258,13 +307,18 @@ def build_clinical_services_table(data: dict) -> List[List[str]]: for concept in compose_code.get("concept"): code = concept.get("code", "") display = concept.get("display", "") - id = f"{value_set_id}_{code}" - result = [id, value_set_id, code, system, display, version] - clinical_services_list.append(result) - if check_id_uniqueness(clinical_services_list): - return clinical_services_list + id = f"{valueset_id}_{code}" + result = [id, code, system, display, version] + concepts_list.append(result) + # create junction table between valueset and concept + valueset_version = valuesets_dict.get(valueset_id).get("version") + valueset_id_full = f"{valueset_id}_{valueset_version}" + junction_list.append([valueset_id_full, id]) + if check_id_uniqueness(concepts_list): + return concepts_list, junction_list else: - return print("Non-unique IDs in clinical_services") + print("Non-unique IDs in concepts") + return [], [] def apply_migration(connection: sqlite3.Connection, migration_file: str): @@ -305,6 +359,7 @@ def load_table( connection: sqlite3.Connection, table_name: str, insert_rows: List[List[str]], + auto_increment_id: bool = False, ): """ Takes the sqlite3 connection to insert data created in other data steps @@ -312,7 +367,11 @@ def load_table( :param connection: sqlite3 connection :param table_name: name of the table :param insert_rows: list of lists of values to insert into table + :param auto_increment_id: boolean to determine whether table needs + sequential row number as primary id """ + if auto_increment_id: + insert_rows = [[i + 1] + row for i, row in enumerate(insert_rows)] cursor = connection.cursor() try: values = ", ".join("?" for _ in insert_rows[0]) @@ -334,44 +393,51 @@ def main(): 5. Create tables and add indexes. 6. Insert data into tables. """ - # 1. extract and transform eRSD data + # 1. Load eRSD data json from APHL API. ersd_data = load_ersd(ERSD_URL) - # 2. use message-parser to parse eRSD data, then spin down service + # 2. Post eRSD data json to message-parser to get parsed data. container = start_docker_service(dockerfile_path, tag, ports) parsed_data = parse_ersd(ports, ersd_data) if container: container.stop() container.remove() - # 3. used parsed data to create needed tables as list of lists - value_set_type_list = build_value_set_type_table(parsed_data) - clinical_services_dict = build_clinical_services_dict(parsed_data) - value_sets_list = build_value_sets_table(parsed_data, clinical_services_dict) + # 3. se parsed data to create list of lists for each table. + valuesets_dict = build_valuesets_dict(parsed_data) + valuesets_list, condition_to_valueset_list = build_valuesets_table( + parsed_data, valuesets_dict + ) conditions_list = build_conditions_table(parsed_data) - clinical_services_list = build_clinical_services_table(parsed_data) + concepts_list, valueset_to_concept_list = build_concepts_table( + parsed_data, valuesets_dict + ) # Create mini-dict to loop through for sqlite queries table_dict = { - "value_set_type": value_set_type_list, - "value_sets": value_sets_list, + "valuesets": valuesets_list, "conditions": conditions_list, - "clinical_services": clinical_services_list, + "concepts": concepts_list, + "condition_to_valueset": condition_to_valueset_list, + "valueset_to_concept": valueset_to_concept_list, } with sqlite3.connect("seed-scripts/ersd.db") as conn: - # 4. Delete existing tables in eRSD database + # 4. Delete existing tables in sqlite database. for table_name in table_dict.keys(): delete_table(conn, table_name) - # 5. Create tables in eRSD database + # 5. Create tables and add indexes. apply_migration( - conn, "seed-scripts/migrations/V01_01__create_tables_add_indexes.sql" + conn, "seed-scripts/migrations/V02_01__create_tables_add_indexes.sql" ) - # 6. Insert data into the tables + # 6. Insert data into tables. for table_name, table_rows in table_dict.items(): - load_table(conn, table_name, table_rows) + if "_to_" in table_name: # use to add sequential row number + load_table(conn, table_name, table_rows, True) + else: + load_table(conn, table_name, table_rows, False) if __name__ == "__main__": diff --git a/containers/trigger-code-reference/tests/test_condition_endpoints.py b/containers/trigger-code-reference/tests/test_condition_endpoints.py index 56845636e8..2032988561 100644 --- a/containers/trigger-code-reference/tests/test_condition_endpoints.py +++ b/containers/trigger-code-reference/tests/test_condition_endpoints.py @@ -79,7 +79,7 @@ def test_get_value_sets_for_condition(mock_db): # Note: This function is defined in utils, but we mock it in the namespace # coming from main because that's where the endpoint is invoking it from -@patch("app.main.get_clinical_services_list") +@patch("app.main.get_concepts_list") def test_stamp_conditions_no_resources_to_stamp(patched_get_services_list): # We don't stamp patient resources, bundle should be a no-op message = json.load(open(Path(__file__).parent / "assets" / "sample_ecr.json")) @@ -103,7 +103,7 @@ def test_stamp_conditions_no_resources_to_stamp(patched_get_services_list): assert not found_matching_extension -@patch("app.main.get_clinical_services_list") +@patch("app.main.get_concepts_list") def test_stamp_condition_extensions(patched_get_services_list): # We'll just try stamping one of each resource type, no need # to see 47 observations diff --git a/containers/trigger-code-reference/tests/test_utils.py b/containers/trigger-code-reference/tests/test_utils.py index 85da545b42..b554ec251f 100644 --- a/containers/trigger-code-reference/tests/test_utils.py +++ b/containers/trigger-code-reference/tests/test_utils.py @@ -8,8 +8,8 @@ from app.utils import _stamp_resource_with_code_extension from app.utils import convert_inputs_to_list from app.utils import get_clean_snomed_code -from app.utils import get_clinical_services_dict -from app.utils import get_clinical_services_list +from app.utils import get_concepts_dict +from app.utils import get_concepts_list @pytest.fixture @@ -42,36 +42,36 @@ def test_get_clean_snomed_code_multiple(): # Test getting clinical code list of tuples with a valid SNOMED ID -def test_get_clinical_services_list_normal(mock_db): +def test_get_concepts_list_normal(mock_db): code = 276197005 expected_result = [ ("dxtc", "A36.3|A36", "http://hl7.org/fhir/sid/icd-10-cm"), ("sdtc", "772150003", "http://snomed.info/sct"), ] mock_db.fetchall.return_value = expected_result - result = get_clinical_services_list([code]) + result = get_concepts_list([code]) assert result == expected_result # Test with bad SNOMED code -def test_get_clinical_services_list_no_results(mock_db): +def test_get_concepts_list_no_results(mock_db): code = ["junk_id"] mock_db.fetchall.return_value = [] - result = get_clinical_services_list(code) + result = get_concepts_list(code) assert result == [] # Test SQL error messaging -def test_get_clinical_services_list_sql_error(mock_db): +def test_get_concepts_list_sql_error(mock_db): snomed_id = 276197005 mock_db.execute.side_effect = sqlite3.Error("SQL error") - result = get_clinical_services_list([snomed_id]) + result = get_concepts_list([snomed_id]) assert "error" in result assert "SQL error" in result["error"] # Test transforming clinical services list to nested dictionary -def test_get_clinical_services_dict_normal(): +def test_get_concepts_dict_normal(): clinical_services_list = [ ("dxtc", "A36.3|A36", "http://hl7.org/fhir/sid/icd-10-cm"), ("sdtc", "772150003", "http://snomed.info/sct"), @@ -82,12 +82,12 @@ def test_get_clinical_services_dict_normal(): ], "sdtc": [{"codes": ["772150003"], "system": "http://snomed.info/sct"}], } - result = get_clinical_services_dict(clinical_services_list) + result = get_concepts_dict(clinical_services_list) assert result == expected_result # Test clinical services dict limiting to just sdtc -def test_get_clinical_services_dict_filter_services(): +def test_get_concepts_dict_filter_services(): clinical_services_list = [ ("dxtc", "A36.3|A36", "http://hl7.org/fhir/sid/icd-10-cm"), ("sdtc", "772150003", "http://snomed.info/sct"), @@ -96,7 +96,7 @@ def test_get_clinical_services_dict_filter_services(): expected_result = { "sdtc": [{"codes": ["772150003"], "system": "http://snomed.info/sct"}], } - result = get_clinical_services_dict(clinical_services_list, filtered_services) + result = get_concepts_dict(clinical_services_list, filtered_services) assert result == expected_result