From 344b7380690826280944911e84708b7637d6efc0 Mon Sep 17 00:00:00 2001 From: Josua Krause Date: Sun, 24 May 2015 22:48:57 -0400 Subject: [PATCH 1/7] easier settings handling --- build_dictionary.py | 120 ++++++++++++++-------------------- feature_extraction/cohort.py | 8 ++- feature_extraction/extract.py | 8 ++- util.py | 11 +++- 4 files changed, 69 insertions(+), 78 deletions(-) diff --git a/build_dictionary.py b/build_dictionary.py index c766d1e..9f6dbb9 100755 --- a/build_dictionary.py +++ b/build_dictionary.py @@ -91,23 +91,10 @@ def createRootEntry(self, type): res["flags"] = flags return res - def init(self, settings): - global globalSymbolsFile - global icd9File - global pntFile - global ccs_diag_file - global ccs_proc_file - global productFile - global packageFile - globalSymbolsFile = settings['filename'] - icd9File = settings['icd9'] - pntFile = settings['pnt'] - ccs_diag_file = settings['ccs_diag'] - ccs_proc_file = settings['ccs_proc'] - productFile = settings['ndc_prod'] - packageFile = settings['ndc_package'] + def init(self, settings, settingsFile): for k in self._baseTypes.keys(): - self._codeTables[k] = self._baseTypes[k].init() + self._codeTables[k] = self._baseTypes[k].init(settings) + util.read_config(settings, settingsFile, False) dictionary = EntryCreator() @@ -124,10 +111,10 @@ def flags(self): return {} def addCodeType(self, code, codeType): self._codeTypes[code] = codeType - def init(self): + def init(self, settings): res = {} for code in self._codeTypes.keys(): - res[code] = self._codeTypes[code].init() + res[code] = self._codeTypes[code].init(settings) return res def create(self, symbols, type, id, code): candidate = None @@ -161,7 +148,7 @@ def create(self, symbols, type, id, code): return (candidate, code) class TypeCode(object): - def init(self): + def init(self, settings): raise NotImplementedError() def create(self, symbols, type, id): raise NotImplementedError @@ -183,9 +170,9 @@ def create(self, symbols, type, id): if len(id) == 2: return createUnknownEntry(symbols, type, id, pid, code=self.code) return toEntry(id, pid, id, "Provider Number: {0}".format(id)) - def init(self): + def init(self, settings): res = {} - file = util.get_file(pntFile, debugOutput) + file = get_file(settings, 'pnt', 'code/pnt/pnt.txt', debugOutput) if not os.path.isfile(file): return res with open(file, 'r') as pnt: @@ -213,7 +200,7 @@ class CmsPhysicianCode(TypeCode): def create(self, symbols, type, id): pid = "" return createUnknownEntry(symbols, type, id, pid, code=self.code) - def init(self): + def init(self, settings): return {} ### prescribed ### @@ -232,9 +219,9 @@ def create(self, symbols, type, id): l = symbols[id] return toEntry(id, pid, l["nonp"], l["nonp"]+" ["+l["desc"]+"] ("+l["prop"]+") "+l["subst"]+" - "+l["pharm"]+" - "+l["pType"], l["alias"] if "alias" in l else None) return createUnknownEntry(symbols, type, id, pid, code=self.code) - def init(self): + def init(self, settings): prescribeLookup = {} - fileA = util.get_file(productFile, debugOutput) + fileA = get_file(settings, 'ndc_prod', 'code/ndc/product.txt', debugOutput) if not os.path.isfile(fileA): return prescribeLookup uidLookup = {} @@ -287,7 +274,7 @@ def init(self): prescribeLookup[ndc] = obj prescribeLookup[normndc] = obj prescribeLookup[fullndc] = obj - fileB = util.get_file(packageFile, debugOutput) + fileB = get_file(settings, 'ndc_package', 'code/ndc/package.txt', debugOutput) if not os.path.isfile(fileB): return prescribeLookup with open(fileB, 'r') as paFile: @@ -356,8 +343,8 @@ def create(self, symbols, type, id): if id in symbols: return toEntry(id, pid, symbols[id], symbols[id]) return createUnknownEntry(symbols, type, id, pid, code=self.code) - def init(self): - return getGlobalSymbols() + def init(self, settings): + return getGlobalSymbols(settings) ### diagnosis ### @dictionary.baseType("diagnosis") @@ -380,10 +367,10 @@ def create(self, symbols, type, id): return toEntry(id, pid, symbols[prox_id], symbols[prox_id], id.replace(".", "") if "HIERARCHY" not in id else None) prox_id = prox_id[:-1] return createUnknownEntry(symbols, type, id, pid, code=self.code) - def init(self): - codes = getGlobalSymbols() - codes.update(getICD9()) - self._parents = readCCS(util.get_file(ccs_diag_file, debugOutput), codes) + def init(self, settings): + codes = getGlobalSymbols(settings) + codes.update(getICD9(settings)) + self._parents = readCCS(get_file(settings, 'ccs_diag', 'code/ccs/multi_diag.txt', debugOutput), codes) return codes ### procedure ### @@ -407,10 +394,10 @@ def create(self, symbols, type, id): return toEntry(id, pid, symbols[prox_id], symbols[prox_id], id.replace(".", "") if "HIERARCHY" not in id else None) prox_id = prox_id[:-1] return createUnknownEntry(symbols, type, id, pid) - def init(self): - codes = getGlobalSymbols() - codes.update(getICD9()) - self._parents = readCCS(util.get_file(ccs_proc_file, debugOutput), codes) + def init(self, settings): + codes = getGlobalSymbols(settings) + codes.update(getICD9(settings)) + self._parents = readCCS(get_file(settings, 'ccs_proc', 'code/ccs/multi_proc.txt', debugOutput), codes) return codes ### info ### @@ -426,7 +413,7 @@ class InfoInfoCode(TypeCode): def create(self, symbols, type, id): pid = "" return toEntry(id, pid, id, "Info: " + id) - def init(self): + def init(self, settings): return {} @@ -439,7 +426,7 @@ def name(self): return UNKNOWN def color(self): return "red" - def init(self): + def init(self, settings): raise NotImplementedError() def create(self, symbols, type, id, code): return createUnknownEntry(symbols, type, id, code=code) @@ -470,15 +457,15 @@ def toEntry(id, pid, name, desc, alias=None): globalICD9 = {} -def getICD9(): +def getICD9(settings): global globalICD9 if not len(globalICD9.keys()): - globalICD9 = initICD9() + globalICD9 = initICD9(settings) return globalICD9.copy() -def initICD9(): +def initICD9(settings): codes = {} - f = util.get_file(icd9File, debugOutput) + f = get_file(settings, 'icd9', 'code/icd9/ucod.txt', debugOutput) if not os.path.isfile(f): return codes with open(f, 'r') as file: @@ -535,15 +522,15 @@ def readCCS(ccsFile, codes): globalSymbols = {} -def getGlobalSymbols(): +def getGlobalSymbols(settings): global globalSymbols if not len(globalSymbols.keys()): - globalSymbols = initGlobalSymbols() + globalSymbols = initGlobalSymbols(settings) return globalSymbols.copy() -def initGlobalSymbols(): +def initGlobalSymbols(settings): codes_dict = {} - f = util.get_file(globalSymbolsFile, debugOutput) + f = get_file(settings, 'filename', 'code/code_names.txt', debugOutput) if not os.path.isfile(f): return codes_dict with open(f, 'r') as file: @@ -581,19 +568,18 @@ def enrichDict(file, mid): with util.OutWrapper(file) as out: print(json.dumps(dict, indent=2, sort_keys=True), file=out) -def init(settings): - dictionary.init(settings) +def init(settings, settingsFile): + dictionary.init(settings, settingsFile) ### argument API -icd9File = 'code/icd9/ucod.txt' -ccs_diag_file = 'code/ccs/multi_diag.txt' -ccs_proc_file = 'code/ccs/multi_proc.txt' -productFile = 'code/ndc/product.txt' -packageFile = 'code/ndc/package.txt' -pntFile = 'code/pnt/pnt.txt' -globalSymbolsFile = 'code/code_names.txt' -globalMid = '2507387001' +def get_file(settings, key, default, debugOutput): + if key in settings: + file = settings[key] + else: + file = default + settings[key] = file + return util.get_file(file, debugOutput) def usage(): print("{0}: [--debug] -p -c -o [-h|--help] [--lookup ]".format(sys.argv[0]), file=sys.stderr) @@ -605,21 +591,12 @@ def usage(): print("-h|--help: prints this help.", file=sys.stderr) sys.exit(1) -defaultSettings = { - 'filename': globalSymbolsFile, - 'ndc_prod': productFile, - 'ndc_package': packageFile, - 'icd9': icd9File, - 'pnt': pntFile, - 'ccs_diag': ccs_diag_file, - 'ccs_proc': ccs_proc_file, -} - def interpretArgs(): global debugOutput - settings = defaultSettings + settings = {} + settingsFile = None info = { - 'mid': globalMid, + 'mid': '-', 'output': '-' } lookupMode = False @@ -638,7 +615,8 @@ def interpretArgs(): if not args: print('-c requires argument', file=sys.stderr) usage() - util.read_config(settings, args.pop(0), debugOutput) + settingsFile = args.pop(0) + util.read_config(settings, settingsFile, debugOutput) elif val == '-o': if not args: print('-o requires argument', file=sys.stderr) @@ -652,11 +630,11 @@ def interpretArgs(): else: print('illegal argument '+val, file=sys.stderr) usage() - return (settings, info, lookupMode, args) + return (settings, settingsFile, info, lookupMode, args) if __name__ == '__main__': - (settings, info, lookupMode, rest) = interpretArgs() - dictionary.init(settings) + (settings, settingsFile, info, lookupMode, rest) = interpretArgs() + dictionary.init(settings, settingsFile) if lookupMode: dict = {} diff --git a/feature_extraction/cohort.py b/feature_extraction/cohort.py index 77bfa9b..ce2a1ef 100755 --- a/feature_extraction/cohort.py +++ b/feature_extraction/cohort.py @@ -510,7 +510,8 @@ def usage(): if __name__ == '__main__': output = '-' - settings = build_dictionary.defaultSettings + settingsFile = None + settings = {} settings['delim'] = ',' settings['quote'] = '"' query = "" @@ -553,7 +554,8 @@ def usage(): if not args or args[0] == '--': print('-c requires argument', file=sys.stderr) usage() - util.read_config(settings, args.pop(0), build_dictionary.debugOutput) + settingsFile = args.pop(0) + util.read_config(settings, settingsFile, build_dictionary.debugOutput) elif arg == '--debug': build_dictionary.debugOutput = True else: @@ -564,7 +566,7 @@ def usage(): print('query is required', file=sys.stderr) usage() - build_dictionary.init(settings) + build_dictionary.init(settings, settingsFile) allPaths = [] while args: diff --git a/feature_extraction/extract.py b/feature_extraction/extract.py index ee9eb5f..20a8236 100755 --- a/feature_extraction/extract.py +++ b/feature_extraction/extract.py @@ -233,7 +233,8 @@ def usage(): if __name__ == '__main__': output = '-' - settings = build_dictionary.defaultSettings + settingsFile = None + settings = {} settings['delim'] = ',' settings['quote'] = '"' whitelist = None @@ -295,7 +296,8 @@ def usage(): if not args or args[0] == '--': print('-c requires argument', file=sys.stderr) usage() - util.read_config(settings, args.pop(0), build_dictionary.debugOutput) + settingsFile = args.pop(0) + util.read_config(settings, settingsFile, build_dictionary.debugOutput) elif arg == '--debug': build_dictionary.debugOutput = True else: @@ -303,7 +305,7 @@ def usage(): usage() build_dictionary.reportMissingEntries = False - build_dictionary.init(settings) + build_dictionary.init(settings, settingsFile) allPaths = [] while args: diff --git a/util.py b/util.py index 3fd7510..98ced8a 100755 --- a/util.py +++ b/util.py @@ -98,6 +98,8 @@ def get_file(file, debugOutput=False): def read_config(settings, file, debugOutput=False): global _path_correction + if file is None: + return _path_correction = os.path.dirname(os.path.abspath(file)) config = {} if debugOutput: @@ -106,7 +108,14 @@ def read_config(settings, file, debugOutput=False): with open(file, 'r') as input: config = json.loads(input.read()) settings.update(config) - if set(settings.keys()) - set(config.keys()): + if not set(settings.keys()) - set(config.keys()): + return + same = True + for k in settings.keys(): + if settings[k] != config[k]: + same = False + break + if not same: with open(file, 'w') as output: print(json.dumps(settings, indent=2, sort_keys=True), file=output) From d0ad51e76176518574856d0bf190fcec30e9fcae Mon Sep 17 00:00:00 2001 From: Josua Krause Date: Mon, 25 May 2015 10:01:52 -0400 Subject: [PATCH 2/7] add loinc --- build_dictionary.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/build_dictionary.py b/build_dictionary.py index 9f6dbb9..66772b7 100755 --- a/build_dictionary.py +++ b/build_dictionary.py @@ -344,7 +344,17 @@ def create(self, symbols, type, id): return toEntry(id, pid, symbols[id], symbols[id]) return createUnknownEntry(symbols, type, id, pid, code=self.code) def init(self, settings): - return getGlobalSymbols(settings) + res = getGlobalSymbols(settings) + loinc_file = get_file(settings, 'loinc', '/m/CODES/loinc/loinc_file.all.headers', debugOutput) + if os.path.isfile(loinc_file): + with open(loinc_file, 'r') as file: + for line in file: + l = line.strip() + spl = l.split('#', 1) + if len(spl) < 2: + continue + res[spl[0]] = spl[1] + return res ### diagnosis ### @dictionary.baseType("diagnosis") From 4e48e459e1546595783cafe7005ef868b600eae5 Mon Sep 17 00:00:00 2001 From: Josua Krause Date: Mon, 25 May 2015 10:06:08 -0400 Subject: [PATCH 3/7] speciality for ibc --- build_dictionary.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/build_dictionary.py b/build_dictionary.py index 66772b7..3796490 100755 --- a/build_dictionary.py +++ b/build_dictionary.py @@ -203,6 +203,26 @@ def create(self, symbols, type, id): def init(self, settings): return {} +@dictionary.codeType("physician", "ibc") +class CmsPhysicianCode(TypeCode): + def create(self, symbols, type, id): + pid = "" + if id in symbols: + return toEntry(id, pid, symbols[id], symbols[id]) + return createUnknownEntry(symbols, type, id, pid, code=self.code) + def init(self, settings): + res = {} + spec_file = get_file(settings, 'ibc_speciality', '/m/CODES/specialty/specialty_headers.txt', debugOutput) + if os.path.isfile(spec_file): + with open(spec_file, 'r') as file: + for line in file: + l = line.strip() + spl = l.split('#', 1) + if len(spl) < 2: + continue + res[spl[0]] = spl[1] + return res + ### prescribed ### @dictionary.baseType("prescribed") class TypePrescribed(TypeBase): From 37f40c78bab7947ae9499c9511245fc8bcd5ddec Mon Sep 17 00:00:00 2001 From: Josua Krause Date: Mon, 25 May 2015 10:07:55 -0400 Subject: [PATCH 4/7] remove debugOutput from get_file calls --- build_dictionary.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/build_dictionary.py b/build_dictionary.py index 3796490..fe20deb 100755 --- a/build_dictionary.py +++ b/build_dictionary.py @@ -172,7 +172,7 @@ def create(self, symbols, type, id): return toEntry(id, pid, id, "Provider Number: {0}".format(id)) def init(self, settings): res = {} - file = get_file(settings, 'pnt', 'code/pnt/pnt.txt', debugOutput) + file = get_file(settings, 'pnt', 'code/pnt/pnt.txt') if not os.path.isfile(file): return res with open(file, 'r') as pnt: @@ -212,7 +212,7 @@ def create(self, symbols, type, id): return createUnknownEntry(symbols, type, id, pid, code=self.code) def init(self, settings): res = {} - spec_file = get_file(settings, 'ibc_speciality', '/m/CODES/specialty/specialty_headers.txt', debugOutput) + spec_file = get_file(settings, 'ibc_speciality', '/m/CODES/specialty/specialty_headers.txt') if os.path.isfile(spec_file): with open(spec_file, 'r') as file: for line in file: @@ -241,7 +241,7 @@ def create(self, symbols, type, id): return createUnknownEntry(symbols, type, id, pid, code=self.code) def init(self, settings): prescribeLookup = {} - fileA = get_file(settings, 'ndc_prod', 'code/ndc/product.txt', debugOutput) + fileA = get_file(settings, 'ndc_prod', 'code/ndc/product.txt') if not os.path.isfile(fileA): return prescribeLookup uidLookup = {} @@ -294,7 +294,7 @@ def init(self, settings): prescribeLookup[ndc] = obj prescribeLookup[normndc] = obj prescribeLookup[fullndc] = obj - fileB = get_file(settings, 'ndc_package', 'code/ndc/package.txt', debugOutput) + fileB = get_file(settings, 'ndc_package', 'code/ndc/package.txt') if not os.path.isfile(fileB): return prescribeLookup with open(fileB, 'r') as paFile: @@ -365,7 +365,7 @@ def create(self, symbols, type, id): return createUnknownEntry(symbols, type, id, pid, code=self.code) def init(self, settings): res = getGlobalSymbols(settings) - loinc_file = get_file(settings, 'loinc', '/m/CODES/loinc/loinc_file.all.headers', debugOutput) + loinc_file = get_file(settings, 'loinc', '/m/CODES/loinc/loinc_file.all.headers') if os.path.isfile(loinc_file): with open(loinc_file, 'r') as file: for line in file: @@ -400,7 +400,7 @@ def create(self, symbols, type, id): def init(self, settings): codes = getGlobalSymbols(settings) codes.update(getICD9(settings)) - self._parents = readCCS(get_file(settings, 'ccs_diag', 'code/ccs/multi_diag.txt', debugOutput), codes) + self._parents = readCCS(get_file(settings, 'ccs_diag', 'code/ccs/multi_diag.txt'), codes) return codes ### procedure ### @@ -427,7 +427,7 @@ def create(self, symbols, type, id): def init(self, settings): codes = getGlobalSymbols(settings) codes.update(getICD9(settings)) - self._parents = readCCS(get_file(settings, 'ccs_proc', 'code/ccs/multi_proc.txt', debugOutput), codes) + self._parents = readCCS(get_file(settings, 'ccs_proc', 'code/ccs/multi_proc.txt'), codes) return codes ### info ### @@ -495,7 +495,7 @@ def getICD9(settings): def initICD9(settings): codes = {} - f = get_file(settings, 'icd9', 'code/icd9/ucod.txt', debugOutput) + f = get_file(settings, 'icd9', 'code/icd9/ucod.txt') if not os.path.isfile(f): return codes with open(f, 'r') as file: @@ -560,7 +560,7 @@ def getGlobalSymbols(settings): def initGlobalSymbols(settings): codes_dict = {} - f = get_file(settings, 'filename', 'code/code_names.txt', debugOutput) + f = get_file(settings, 'filename', 'code/code_names.txt') if not os.path.isfile(f): return codes_dict with open(f, 'r') as file: @@ -603,7 +603,7 @@ def init(settings, settingsFile): ### argument API -def get_file(settings, key, default, debugOutput): +def get_file(settings, key, default): if key in settings: file = settings[key] else: From d6599980a613263ac66fe3a1b248a697081e0dce Mon Sep 17 00:00:00 2001 From: Josua Krause Date: Mon, 25 May 2015 10:24:03 -0400 Subject: [PATCH 5/7] =?UTF-8?q?replacement=20for=20na=C3=AFve=20icd9=20cod?= =?UTF-8?q?es?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- build_dictionary.py | 38 +++++++++++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/build_dictionary.py b/build_dictionary.py index fe20deb..094f2f1 100755 --- a/build_dictionary.py +++ b/build_dictionary.py @@ -399,7 +399,7 @@ def create(self, symbols, type, id): return createUnknownEntry(symbols, type, id, pid, code=self.code) def init(self, settings): codes = getGlobalSymbols(settings) - codes.update(getICD9(settings)) + codes.update(getICD9(settings, True)) self._parents = readCCS(get_file(settings, 'ccs_diag', 'code/ccs/multi_diag.txt'), codes) return codes @@ -426,7 +426,7 @@ def create(self, symbols, type, id): return createUnknownEntry(symbols, type, id, pid) def init(self, settings): codes = getGlobalSymbols(settings) - codes.update(getICD9(settings)) + codes.update(getICD9(settings, False)) self._parents = readCCS(get_file(settings, 'ccs_proc', 'code/ccs/multi_proc.txt'), codes) return codes @@ -485,13 +485,33 @@ def toEntry(id, pid, name, desc, alias=None): ### icd9 ### -globalICD9 = {} - -def getICD9(settings): - global globalICD9 - if not len(globalICD9.keys()): - globalICD9 = initICD9(settings) - return globalICD9.copy() +globalICD9 = { + 'diagnosis': {}, + 'procedure': {} +} + +def getICD9(settings, isDiagnosis): + k = 'diagnosis' if isDiagnosis else 'procedure' + if not len(globalICD9[k].keys()): + fileKeyS = k + '_icd9' + fileKeyL = k + '_icd9_long' + fileDefaultS = '/m/CODES/icd9/ICD-9-CM-v32-master-descriptions/' + ('CMS32_DESC_SHORT_DX.txt' if isDiagnosis else 'CMS32_DESC_SHORT_SG.txt') + fileDefaultL = '/m/CODES/icd9/ICD-9-CM-v32-master-descriptions/' + ('CMS32_DESC_LONG_DX.txt' if isDiagnosis else 'CMS32_DESC_LONG_SG.txt') + fileS = get_file(settings, fileKeyS, fileDefaultS) + fileL = get_file(settings, fileKeyL, fileDefaultL) + if not os.path.isfile(fileS) and not os.path.isfile(fileL): + globalICD9[k] = initICD9(settings) + else: + symbols = globalICD9[k] + f = fileS if not os.path.isfile(fileL) + with open(f, 'r') as file: + for line in file: + l = line.strip() + spl = l.split(' ', 1) + if len(spl) < 2: + continue + symbols[spl[0].strip()] = spl[1].strip() + return globalICD9[k].copy() def initICD9(settings): codes = {} From eb3d30d9571ab703862902530bfb0faa6a11ee44 Mon Sep 17 00:00:00 2001 From: Josua Krause Date: Mon, 25 May 2015 11:01:41 -0400 Subject: [PATCH 6/7] updating tests --- .gitignore | 1 + build_dictionary.py | 4 ++-- test/etc/.gitignore | 1 + test/etc/config.txt | 14 ++++++++++---- test/local.sh | 14 +++++++++++--- test/run.sh | 12 ++++++++++-- util.py | 35 +++++++++++++++++++++++++++++------ 7 files changed, 64 insertions(+), 17 deletions(-) diff --git a/.gitignore b/.gitignore index d7fd1d8..578b833 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,4 @@ json.zip **/*.pyc /.venv/ +/config.txt.tmp_local diff --git a/build_dictionary.py b/build_dictionary.py index 094f2f1..7b48ce8 100755 --- a/build_dictionary.py +++ b/build_dictionary.py @@ -94,7 +94,7 @@ def createRootEntry(self, type): def init(self, settings, settingsFile): for k in self._baseTypes.keys(): self._codeTables[k] = self._baseTypes[k].init(settings) - util.read_config(settings, settingsFile, False) + util.save_config(settings, settingsFile) dictionary = EntryCreator() @@ -503,7 +503,7 @@ def getICD9(settings, isDiagnosis): globalICD9[k] = initICD9(settings) else: symbols = globalICD9[k] - f = fileS if not os.path.isfile(fileL) + f = fileS if not os.path.isfile(fileL) else fileL with open(f, 'r') as file: for line in file: l = line.strip() diff --git a/test/etc/.gitignore b/test/etc/.gitignore index 3362443..0471319 100644 --- a/test/etc/.gitignore +++ b/test/etc/.gitignore @@ -1 +1,2 @@ /cohort.txt +/config.txt.tmp diff --git a/test/etc/config.txt b/test/etc/config.txt index 107dbca..fde41cf 100644 --- a/test/etc/config.txt +++ b/test/etc/config.txt @@ -1,11 +1,17 @@ { + "ccs_diag": "../code/ccs/multi_diag.txt", "ccs_proc": "../code/ccs/multi_proc.txt", - "quote": "\"", "delim": ",", - "pnt": "../code/pnt/pnt.txt", + "diagnosis_icd9": "../code/not_found.txt", + "diagnosis_icd9_long": "../code/not_found.txt", "filename": "../code/code_names.txt", - "ndc_package": "../code/ndc/package.txt", + "ibc_speciality": "../code/code_names.txt", "icd9": "../code/icd9/ucod.txt", + "loinc": "../code/code_names.txt", + "ndc_package": "../code/ndc/package.txt", "ndc_prod": "../code/ndc/product.txt", - "ccs_diag": "../code/ccs/multi_diag.txt" + "pnt": "../code/pnt/pnt.txt", + "procedure_icd9": "../code/not_found.txt", + "procedure_icd9_long": "../code/not_found.txt", + "quote": "\"" } diff --git a/test/local.sh b/test/local.sh index 7a7f9e0..22a287b 100755 --- a/test/local.sh +++ b/test/local.sh @@ -13,7 +13,8 @@ FEATURE_EXTRACT="../feature_extraction" cohort="./etc/cohort.txt" format="../format.json" -config="../config.txt" +config="../config.txt.tmp_local" +config_regr="../config.txt" print() { echo "$@" 2>&1 @@ -36,12 +37,18 @@ check_file() { } print "test: training predictive model" +cp "${config_regr}" "${config}" if [ ! -f "${cohort}" ]; then print "building cohort" ${FEATURE_EXTRACT}/cohort.py --debug --query-file "${FEATURE_EXTRACT}/cases.txt" -f "${format}" -c "${config}" -o "${OUTPUT}/cohort_cases.txt.tmp_local" -- "$CMS_DIR" + check $? + check_file "${config_regr}" "${config}" ${FEATURE_EXTRACT}/cohort.py --debug --query-file "${FEATURE_EXTRACT}/control.txt" -f "${format}" -c "${config}" -o "${OUTPUT}/cohort_control.txt.tmp_local" -- "$CMS_DIR" + check $? + check_file "${config_regr}" "${config}" ${FEATURE_EXTRACT}/merge.py --cases "${OUTPUT}/cohort_cases.txt.tmp_local" --control "${OUTPUT}/cohort_control.txt.tmp_local" -o "${cohort}" --test 30 --seed 0 + check $? else print "use existing cohort at ${cohort}" fi @@ -49,10 +56,11 @@ fi ${FEATURE_EXTRACT}/extract.py --debug -w "${cohort}" --age-time 20100101 --to 20100101 -o - -f "${format}" -c "${config}" -- "$CMS_DIR" | \ ${FEATURE_EXTRACT}/train.py -w --in - --out "${OUTPUT}/model" --seed 0 --model reg -v 20 2> "${OUTPUT}/train.txt.tmp_local" check $? +check_file "${config_regr}" "${config}" check_file "${OUTPUT}/train.txt" "${OUTPUT}/train.txt.tmp_local" -rm -- ${OUTPUT}/*.tmp_local -rm -r -- ${OUTPUT}/model/ +rm -- "${config}" ${OUTPUT}/*.tmp_local +rm -r -- "${OUTPUT}/model/" print "all tests successful!" exec 3>&- # don't really need to close the FD diff --git a/test/run.sh b/test/run.sh index 16a75ca..cc97ade 100755 --- a/test/run.sh +++ b/test/run.sh @@ -15,7 +15,8 @@ FEATURE_EXTRACT="../feature_extraction" format="../format.json" style_classes="../style_classes.json" etc="./etc" -config="${etc}/config.txt" +config="${etc}/config.txt.tmp" +config_regr="${etc}/config.txt" print() { echo "$@" 2>&1 @@ -53,6 +54,7 @@ convert_patient() { ../build_dictionary.py --debug -p "${OUTPUT}/${id}.json.tmp" -c "${config}" -o "${OUTPUT}/dictionary.json.tmp" check $? check_file "${OUTPUT}/dictionary.json" "${OUTPUT}/dictionary.json.tmp" + check_file "${config_regr}" "${config}" } create_predictive_model() { @@ -65,24 +67,30 @@ create_predictive_model() { ${FEATURE_EXTRACT}/cohort.py --debug --query-file "${etc}/cases.txt" -f "${format}" -c "${config}" -o "${OUTPUT}/cohort_cases.txt.tmp" -- "${CMS_DIR}" check $? check_file "${OUTPUT}/cohort_cases.txt" "${OUTPUT}/cohort_cases.txt.tmp" + check_file "${config_regr}" "${config}" ${FEATURE_EXTRACT}/cohort.py --debug --query-file "${etc}/control.txt" -f "${format}" -c "${config}" -o "${OUTPUT}/cohort_control.txt.tmp" -- "${CMS_DIR}" check $? check_file "${OUTPUT}/cohort_control.txt" "${OUTPUT}/cohort_control.txt.tmp" + check_file "${config_regr}" "${config}" ${FEATURE_EXTRACT}/merge.py --cases "${OUTPUT}/cohort_cases.txt.tmp" --control "${OUTPUT}/cohort_control.txt.tmp" -o "${OUTPUT}/cohort.txt.tmp" --test 30 --seed 0 2> $ERR_FILE check $? check_file "${OUTPUT}/cohort.txt" "${OUTPUT}/cohort.txt.tmp" ${FEATURE_EXTRACT}/extract.py --debug -w "${OUTPUT}/cohort.txt.tmp" --num-cutoff 1 --age-time 20100101 --to 20100101 -o "${OUTPUT}/output.csv.tmp" -f "${format}" -c "${config}" -- "${CMS_DIR}" check $? check_file "${OUTPUT}/output.csv" "${OUTPUT}/output.csv.tmp" + check_file "${config_regr}" "${config}" head -n 1 "${OUTPUT}/output.csv.tmp" | sed "s/,/ /g" | ../build_dictionary.py --debug -o "${OUTPUT}/headers.json.tmp" -c "${config}" --lookup - check $? check_file "${OUTPUT}/headers.json" "${OUTPUT}/headers.json.tmp" + check_file "${config_regr}" "${config}" } +cp "${config_regr}" "${config}" + convert_patient "8CDC0C5ACBDFC9CE" create_predictive_model -rm -- ${ERR_FILE} ${OUTPUT}/*.tmp +rm -- "${ERR_FILE}" "${config}" ${OUTPUT}/*.tmp print "all tests successful!" exec 3>&- # don't really need to close the FD diff --git a/util.py b/util.py index 98ced8a..3284c9c 100755 --- a/util.py +++ b/util.py @@ -108,16 +108,39 @@ def read_config(settings, file, debugOutput=False): with open(file, 'r') as input: config = json.loads(input.read()) settings.update(config) - if not set(settings.keys()) - set(config.keys()): + save_on_change(settings, config, file) + +def save_config(settings, file): + global _path_correction + if file is None: return + _path_correction = os.path.dirname(os.path.abspath(file)) + config = {} + if os.path.isfile(file): + with open(file, 'r') as input: + config = json.loads(input.read()) + save_on_change(settings, config, file) + +def save_on_change(local, original, file): same = True - for k in settings.keys(): - if settings[k] != config[k]: - same = False - break + lk = local.keys() + ok = original.keys() + if len(lk) != len(ok): + same = False + else: + for k in lk: + if k not in original or local[k] != original[k]: + same = False + break + if same: + # small number of keys so it is not bad to iterate twice + for k in ok: + if k not in local or original[k] != local[k]: + same = False + break if not same: with open(file, 'w') as output: - print(json.dumps(settings, indent=2, sort_keys=True), file=output) + print(json.dumps(local, indent=2, sort_keys=True), file=output) def read_format(file, input_format, usage): if not os.path.isfile(file): From 13d3514f62e18f5164955c350f65d016f05013d3 Mon Sep 17 00:00:00 2001 From: Josua Krause Date: Mon, 25 May 2015 11:28:53 -0400 Subject: [PATCH 7/7] upgrade pip on setup --- setup.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.sh b/setup.sh index 561ee83..0a96d88 100755 --- a/setup.sh +++ b/setup.sh @@ -291,6 +291,7 @@ pip_install() { source ${venv_activate} test_fail $? echo "install python packages" + pip install --upgrade pip pip install -r requirements.txt test_fail $? }