-
Notifications
You must be signed in to change notification settings - Fork 0
/
mesh_descriptors_ascii_file_parser.py
56 lines (48 loc) · 1.28 KB
/
mesh_descriptors_ascii_file_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Created on Wed Oct 5 17:06:58 2016
@author: ShebleAdmin
MeSH Descriptor file, 2017 prelim version from:
ftp://nlmpubs.nlm.nih.gov/online/mesh/MESH_FILES/asciimesh/d2017.bin
used in initial version
"""
import simplejson as json
import pandas as pd
rec=[]
record={}
rec_list=[]
key=""
concat=False
records_list = []
#fileName=sys.argv[1]
mesh_plaintext = 'mesh_ascii_d2017.txt'
fp = '/Users/ShebleAdmin/'
mesh_tsv = 'mesh_ascii_d2017.csv'
meshin = fp + mesh_plaintext
tsvout = fp + mesh_tsv
# Read the input file
#with open(sys.argv[1]) as f:
with open (meshin, 'r') as f:
# with open(tsvout, 'w+') as of:
i = 0
for line in f:
rec = line.rstrip().split("=", 1)
key = rec[0].strip()
if len(rec) > 1:
value = rec[1].strip()
if key == "*NEWRECORD":
continue
elif key == "":
records_list.append(record)
record = {}
else:
record[key]=value
print(len(records_list))
'''
may want to use at some point:
append record to list of records before resetting, getting the next record
print(json.dumps(record,sort_keys=True, indent=4 * ' '))
df = pd.DataFrame(records_list)
df.to_csv(tsvout, index=False, sep='\t', encoding='utf-8')
'''