Skip to content

Commit

Permalink
Merge pull request #48 from nyuvis/more_codes
Browse files Browse the repository at this point in the history
Add more code files
  • Loading branch information
JosuaKrause committed May 25, 2015
2 parents df8dc20 + 13d3514 commit bf455cb
Show file tree
Hide file tree
Showing 10 changed files with 181 additions and 92 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@
json.zip
**/*.pyc
/.venv/
/config.txt.tmp_local
178 changes: 103 additions & 75 deletions build_dictionary.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,23 +91,10 @@ def createRootEntry(self, type):
res["flags"] = flags
return res

def init(self, settings):
global globalSymbolsFile
global icd9File
global pntFile
global ccs_diag_file
global ccs_proc_file
global productFile
global packageFile
globalSymbolsFile = settings['filename']
icd9File = settings['icd9']
pntFile = settings['pnt']
ccs_diag_file = settings['ccs_diag']
ccs_proc_file = settings['ccs_proc']
productFile = settings['ndc_prod']
packageFile = settings['ndc_package']
def init(self, settings, settingsFile):
for k in self._baseTypes.keys():
self._codeTables[k] = self._baseTypes[k].init()
self._codeTables[k] = self._baseTypes[k].init(settings)
util.save_config(settings, settingsFile)

dictionary = EntryCreator()

Expand All @@ -124,10 +111,10 @@ def flags(self):
return {}
def addCodeType(self, code, codeType):
self._codeTypes[code] = codeType
def init(self):
def init(self, settings):
res = {}
for code in self._codeTypes.keys():
res[code] = self._codeTypes[code].init()
res[code] = self._codeTypes[code].init(settings)
return res
def create(self, symbols, type, id, code):
candidate = None
Expand Down Expand Up @@ -161,7 +148,7 @@ def create(self, symbols, type, id, code):
return (candidate, code)

class TypeCode(object):
def init(self):
def init(self, settings):
raise NotImplementedError()
def create(self, symbols, type, id):
raise NotImplementedError
Expand All @@ -183,9 +170,9 @@ def create(self, symbols, type, id):
if len(id) == 2:
return createUnknownEntry(symbols, type, id, pid, code=self.code)
return toEntry(id, pid, id, "Provider Number: {0}".format(id))
def init(self):
def init(self, settings):
res = {}
file = util.get_file(pntFile, debugOutput)
file = get_file(settings, 'pnt', 'code/pnt/pnt.txt')
if not os.path.isfile(file):
return res
with open(file, 'r') as pnt:
Expand Down Expand Up @@ -213,9 +200,29 @@ class CmsPhysicianCode(TypeCode):
def create(self, symbols, type, id):
pid = ""
return createUnknownEntry(symbols, type, id, pid, code=self.code)
def init(self):
def init(self, settings):
return {}

@dictionary.codeType("physician", "ibc")
class CmsPhysicianCode(TypeCode):
def create(self, symbols, type, id):
pid = ""
if id in symbols:
return toEntry(id, pid, symbols[id], symbols[id])
return createUnknownEntry(symbols, type, id, pid, code=self.code)
def init(self, settings):
res = {}
spec_file = get_file(settings, 'ibc_speciality', '/m/CODES/specialty/specialty_headers.txt')
if os.path.isfile(spec_file):
with open(spec_file, 'r') as file:
for line in file:
l = line.strip()
spl = l.split('#', 1)
if len(spl) < 2:
continue
res[spl[0]] = spl[1]
return res

### prescribed ###
@dictionary.baseType("prescribed")
class TypePrescribed(TypeBase):
Expand All @@ -232,9 +239,9 @@ def create(self, symbols, type, id):
l = symbols[id]
return toEntry(id, pid, l["nonp"], l["nonp"]+" ["+l["desc"]+"] ("+l["prop"]+") "+l["subst"]+" - "+l["pharm"]+" - "+l["pType"], l["alias"] if "alias" in l else None)
return createUnknownEntry(symbols, type, id, pid, code=self.code)
def init(self):
def init(self, settings):
prescribeLookup = {}
fileA = util.get_file(productFile, debugOutput)
fileA = get_file(settings, 'ndc_prod', 'code/ndc/product.txt')
if not os.path.isfile(fileA):
return prescribeLookup
uidLookup = {}
Expand Down Expand Up @@ -287,7 +294,7 @@ def init(self):
prescribeLookup[ndc] = obj
prescribeLookup[normndc] = obj
prescribeLookup[fullndc] = obj
fileB = util.get_file(packageFile, debugOutput)
fileB = get_file(settings, 'ndc_package', 'code/ndc/package.txt')
if not os.path.isfile(fileB):
return prescribeLookup
with open(fileB, 'r') as paFile:
Expand Down Expand Up @@ -356,8 +363,18 @@ def create(self, symbols, type, id):
if id in symbols:
return toEntry(id, pid, symbols[id], symbols[id])
return createUnknownEntry(symbols, type, id, pid, code=self.code)
def init(self):
return getGlobalSymbols()
def init(self, settings):
res = getGlobalSymbols(settings)
loinc_file = get_file(settings, 'loinc', '/m/CODES/loinc/loinc_file.all.headers')
if os.path.isfile(loinc_file):
with open(loinc_file, 'r') as file:
for line in file:
l = line.strip()
spl = l.split('#', 1)
if len(spl) < 2:
continue
res[spl[0]] = spl[1]
return res

### diagnosis ###
@dictionary.baseType("diagnosis")
Expand All @@ -380,10 +397,10 @@ def create(self, symbols, type, id):
return toEntry(id, pid, symbols[prox_id], symbols[prox_id], id.replace(".", "") if "HIERARCHY" not in id else None)
prox_id = prox_id[:-1]
return createUnknownEntry(symbols, type, id, pid, code=self.code)
def init(self):
codes = getGlobalSymbols()
codes.update(getICD9())
self._parents = readCCS(util.get_file(ccs_diag_file, debugOutput), codes)
def init(self, settings):
codes = getGlobalSymbols(settings)
codes.update(getICD9(settings, True))
self._parents = readCCS(get_file(settings, 'ccs_diag', 'code/ccs/multi_diag.txt'), codes)
return codes

### procedure ###
Expand All @@ -407,10 +424,10 @@ def create(self, symbols, type, id):
return toEntry(id, pid, symbols[prox_id], symbols[prox_id], id.replace(".", "") if "HIERARCHY" not in id else None)
prox_id = prox_id[:-1]
return createUnknownEntry(symbols, type, id, pid)
def init(self):
codes = getGlobalSymbols()
codes.update(getICD9())
self._parents = readCCS(util.get_file(ccs_proc_file, debugOutput), codes)
def init(self, settings):
codes = getGlobalSymbols(settings)
codes.update(getICD9(settings, False))
self._parents = readCCS(get_file(settings, 'ccs_proc', 'code/ccs/multi_proc.txt'), codes)
return codes

### info ###
Expand All @@ -426,7 +443,7 @@ class InfoInfoCode(TypeCode):
def create(self, symbols, type, id):
pid = ""
return toEntry(id, pid, id, "Info: " + id)
def init(self):
def init(self, settings):
return {}


Expand All @@ -439,7 +456,7 @@ def name(self):
return UNKNOWN
def color(self):
return "red"
def init(self):
def init(self, settings):
raise NotImplementedError()
def create(self, symbols, type, id, code):
return createUnknownEntry(symbols, type, id, code=code)
Expand Down Expand Up @@ -468,17 +485,37 @@ def toEntry(id, pid, name, desc, alias=None):

### icd9 ###

globalICD9 = {}
globalICD9 = {
'diagnosis': {},
'procedure': {}
}

def getICD9():
global globalICD9
if not len(globalICD9.keys()):
globalICD9 = initICD9()
return globalICD9.copy()
def getICD9(settings, isDiagnosis):
k = 'diagnosis' if isDiagnosis else 'procedure'
if not len(globalICD9[k].keys()):
fileKeyS = k + '_icd9'
fileKeyL = k + '_icd9_long'
fileDefaultS = '/m/CODES/icd9/ICD-9-CM-v32-master-descriptions/' + ('CMS32_DESC_SHORT_DX.txt' if isDiagnosis else 'CMS32_DESC_SHORT_SG.txt')
fileDefaultL = '/m/CODES/icd9/ICD-9-CM-v32-master-descriptions/' + ('CMS32_DESC_LONG_DX.txt' if isDiagnosis else 'CMS32_DESC_LONG_SG.txt')
fileS = get_file(settings, fileKeyS, fileDefaultS)
fileL = get_file(settings, fileKeyL, fileDefaultL)
if not os.path.isfile(fileS) and not os.path.isfile(fileL):
globalICD9[k] = initICD9(settings)
else:
symbols = globalICD9[k]
f = fileS if not os.path.isfile(fileL) else fileL
with open(f, 'r') as file:
for line in file:
l = line.strip()
spl = l.split(' ', 1)
if len(spl) < 2:
continue
symbols[spl[0].strip()] = spl[1].strip()
return globalICD9[k].copy()

def initICD9():
def initICD9(settings):
codes = {}
f = util.get_file(icd9File, debugOutput)
f = get_file(settings, 'icd9', 'code/icd9/ucod.txt')
if not os.path.isfile(f):
return codes
with open(f, 'r') as file:
Expand Down Expand Up @@ -535,15 +572,15 @@ def readCCS(ccsFile, codes):

globalSymbols = {}

def getGlobalSymbols():
def getGlobalSymbols(settings):
global globalSymbols
if not len(globalSymbols.keys()):
globalSymbols = initGlobalSymbols()
globalSymbols = initGlobalSymbols(settings)
return globalSymbols.copy()

def initGlobalSymbols():
def initGlobalSymbols(settings):
codes_dict = {}
f = util.get_file(globalSymbolsFile, debugOutput)
f = get_file(settings, 'filename', 'code/code_names.txt')
if not os.path.isfile(f):
return codes_dict
with open(f, 'r') as file:
Expand Down Expand Up @@ -581,19 +618,18 @@ def enrichDict(file, mid):
with util.OutWrapper(file) as out:
print(json.dumps(dict, indent=2, sort_keys=True), file=out)

def init(settings):
dictionary.init(settings)
def init(settings, settingsFile):
dictionary.init(settings, settingsFile)

### argument API

icd9File = 'code/icd9/ucod.txt'
ccs_diag_file = 'code/ccs/multi_diag.txt'
ccs_proc_file = 'code/ccs/multi_proc.txt'
productFile = 'code/ndc/product.txt'
packageFile = 'code/ndc/package.txt'
pntFile = 'code/pnt/pnt.txt'
globalSymbolsFile = 'code/code_names.txt'
globalMid = '2507387001'
def get_file(settings, key, default):
if key in settings:
file = settings[key]
else:
file = default
settings[key] = file
return util.get_file(file, debugOutput)

def usage():
print("{0}: [--debug] -p <file> -c <config> -o <output> [-h|--help] [--lookup <id...>]".format(sys.argv[0]), file=sys.stderr)
Expand All @@ -605,21 +641,12 @@ def usage():
print("-h|--help: prints this help.", file=sys.stderr)
sys.exit(1)

defaultSettings = {
'filename': globalSymbolsFile,
'ndc_prod': productFile,
'ndc_package': packageFile,
'icd9': icd9File,
'pnt': pntFile,
'ccs_diag': ccs_diag_file,
'ccs_proc': ccs_proc_file,
}

def interpretArgs():
global debugOutput
settings = defaultSettings
settings = {}
settingsFile = None
info = {
'mid': globalMid,
'mid': '-',
'output': '-'
}
lookupMode = False
Expand All @@ -638,7 +665,8 @@ def interpretArgs():
if not args:
print('-c requires argument', file=sys.stderr)
usage()
util.read_config(settings, args.pop(0), debugOutput)
settingsFile = args.pop(0)
util.read_config(settings, settingsFile, debugOutput)
elif val == '-o':
if not args:
print('-o requires argument', file=sys.stderr)
Expand All @@ -652,11 +680,11 @@ def interpretArgs():
else:
print('illegal argument '+val, file=sys.stderr)
usage()
return (settings, info, lookupMode, args)
return (settings, settingsFile, info, lookupMode, args)

if __name__ == '__main__':
(settings, info, lookupMode, rest) = interpretArgs()
dictionary.init(settings)
(settings, settingsFile, info, lookupMode, rest) = interpretArgs()
dictionary.init(settings, settingsFile)
if lookupMode:
dict = {}

Expand Down
8 changes: 5 additions & 3 deletions feature_extraction/cohort.py
Original file line number Diff line number Diff line change
Expand Up @@ -510,7 +510,8 @@ def usage():

if __name__ == '__main__':
output = '-'
settings = build_dictionary.defaultSettings
settingsFile = None
settings = {}
settings['delim'] = ','
settings['quote'] = '"'
query = ""
Expand Down Expand Up @@ -553,7 +554,8 @@ def usage():
if not args or args[0] == '--':
print('-c requires argument', file=sys.stderr)
usage()
util.read_config(settings, args.pop(0), build_dictionary.debugOutput)
settingsFile = args.pop(0)
util.read_config(settings, settingsFile, build_dictionary.debugOutput)
elif arg == '--debug':
build_dictionary.debugOutput = True
else:
Expand All @@ -564,7 +566,7 @@ def usage():
print('query is required', file=sys.stderr)
usage()

build_dictionary.init(settings)
build_dictionary.init(settings, settingsFile)

allPaths = []
while args:
Expand Down
8 changes: 5 additions & 3 deletions feature_extraction/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,8 @@ def usage():

if __name__ == '__main__':
output = '-'
settings = build_dictionary.defaultSettings
settingsFile = None
settings = {}
settings['delim'] = ','
settings['quote'] = '"'
whitelist = None
Expand Down Expand Up @@ -295,15 +296,16 @@ def usage():
if not args or args[0] == '--':
print('-c requires argument', file=sys.stderr)
usage()
util.read_config(settings, args.pop(0), build_dictionary.debugOutput)
settingsFile = args.pop(0)
util.read_config(settings, settingsFile, build_dictionary.debugOutput)
elif arg == '--debug':
build_dictionary.debugOutput = True
else:
print('unrecognized argument: ' + arg, file=sys.stderr)
usage()

build_dictionary.reportMissingEntries = False
build_dictionary.init(settings)
build_dictionary.init(settings, settingsFile)

allPaths = []
while args:
Expand Down
1 change: 1 addition & 0 deletions setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,7 @@ pip_install() {
source ${venv_activate}
test_fail $?
echo "install python packages"
pip install --upgrade pip
pip install -r requirements.txt
test_fail $?
}
Expand Down
Loading

0 comments on commit bf455cb

Please sign in to comment.