-
Notifications
You must be signed in to change notification settings - Fork 2
/
files.py
executable file
·50 lines (36 loc) · 1.54 KB
/
files.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
#!/usr/bin/env python
# Tai Sakuma <[email protected]>
import pandas as pd
from .dtype import convert_column_types_to_category
##__________________________________________________________________||
def write_to_file(tbl, path):
print 'writing ', path
with open(path, 'w') as f:
tbl.to_string(f, index = False)
f.write("\n")
f.close()
##__________________________________________________________________||
def custom_pd_read_table(path, dtype = { }):
"""
read a data frame from a file with all columns as categories
unless other types are specified.
dtype : e.g., {'n': float, 'nvar': float}
"""
try:
columns = open(path).readline().split()
except TypeError: # path is buffer
columns = path.readline().split()
path.seek(0)
# e.g, columns = ['component', 'cutflow', 'bintype', 'nJet100', 'n', 'nvar']
columns_specified = dtype.keys()
# e.g. ['nvar', 'n']
str_columns = [c for c in columns if c not in columns_specified]
# e.g, ['component', 'cutflow', 'bintype', 'nJet100']
column_type_dict = dtype.copy()
column_type_dict.update(dict([(c, str) for c in str_columns]))
# e.g. {'component': <type 'str'>, 'nJet100': <type 'str'>, 'nvar': <type
# 'float'>, 'cutflow': <type 'str'>, 'bintype': <type 'str'>, 'n': <type
# 'float'>}
tbl = pd.read_table(path, delim_whitespace = True, dtype = column_type_dict)
return convert_column_types_to_category(tbl, str_columns)
##__________________________________________________________________||