From ffea6e8765f63a989ba5e7a8cc94275af9c1440b Mon Sep 17 00:00:00 2001 From: vladd-bit Date: Wed, 24 Jul 2024 20:42:17 +0100 Subject: [PATCH] NiFi scripts: added big ann file gen util. --- .../tests/generate_big_ann_file.py | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 nifi/user-scripts/tests/generate_big_ann_file.py diff --git a/nifi/user-scripts/tests/generate_big_ann_file.py b/nifi/user-scripts/tests/generate_big_ann_file.py new file mode 100644 index 00000000..b3dcf807 --- /dev/null +++ b/nifi/user-scripts/tests/generate_big_ann_file.py @@ -0,0 +1,23 @@ +import json + +f_path = "../../../data/cogstack-cohort/medical_reports_anns_medcat_medmen__*.json" + + +def chunk(input_list: list, num_slices: int): + for i in range(0, len(input_list), num_slices): + yield input_list[i:i + num_slices] + + +contents = None + +add_records = 400000 + +first_annotation = contents[0] + +for i in range(add_records): + contents.append(first_annotation) + +export_path = "../../../data/medical_reports_anns_medcat_medmen__test_big.json" + +with open(export_path, mode="w+") as f: + f.write(json.dumps(contents))