-
Notifications
You must be signed in to change notification settings - Fork 0
/
merge_csv_files.py
104 lines (77 loc) · 2.97 KB
/
merge_csv_files.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
"""
merge multiple csv files in one directory
"""
# --------------------------------------------------------
import os
import glob
import shutil
import pandas as pd
from datetime import datetime
# --------------------------------------------------------
# get start time stamp
start_time = datetime.now()
start_time_str = start_time.strftime('%Y-%m-%d %H:%M:%S')
print('start date & time:\n', start_time_str, '\n')
# --------------------------------------------------------
def concat_csv_files(_dir_csv, _dir_merged_csv, _path_merged_csv, _cwd):
if os.path.exists(_dir_merged_csv):
if os.path.exists(_path_merged_csv):
try:
os.remove(_path_merged_csv)
except OSError:
print("ERROR!")
else:
try:
os.mkdir(_dir_merged_csv)
except OSError:
print('unable to create directory!')
os.chdir(_dir_csv)
csvList = glob.glob('*.csv')
# print('csv file list:\n', csvList)
dfList = []
for f in csvList:
# bld_id = f.split('.')[0]
df = pd.read_csv(f, header=0)
# df['building_id'] = bld_id
dfList.append(df)
print(f)
print(df.shape)
print(df.describe())
print(df.head(), '\n')
concatDf = pd.concat(dfList, axis=0)
concatDf.to_csv(_path_merged_csv, index=None)
print('merged csv file is:\n', _path_merged_csv, '\n')
print('shape:\n', concatDf.shape, '\n')
print('head:\n', concatDf.head(), '\n')
print('tail:\n', concatDf.tail(), '\n')
print('description:\n', concatDf.describe(), '\n')
print('info:\n', concatDf.info(), '\n')
os.chdir(cwd)
# --------------------------------------------------------
# get the cwd of this script
cwd = os.getcwd()
print('full path of current working directory:', '\n', cwd, '\n')
# --------------------------------------------------------
# specify the directory of the csv files
dir_csv = 'data'
path_csv = os.path.join(cwd, dir_csv)
print('path of csv files:\n', path_csv, '\n')
# --------------------------------------------------------
# specify the path and name of merged csv file
merged_csv_dir_name = 'merged_csv'
merged_csv_name = 'merged_results.csv'
dir_merged_csv = os.path.join(cwd, merged_csv_dir_name)
path_merged_csv = os.path.join(cwd, dir_merged_csv, merged_csv_name)
print('full path of merged csv:\n', path_merged_csv, '\n')
# --------------------------------------------------------
# concate csv files
concat_csv_files(path_csv, dir_merged_csv, path_merged_csv, cwd)
# --------------------------------------------------------
# print finish time stamp and time used
finish_time = datetime.now()
finish_time_str = finish_time.strftime('%Y-%m-%d %H:%M:%S')
time_used = finish_time - start_time
print('started at : ' + start_time_str )
print('finished at : ' + finish_time_str )
print('time used : ' + str(round(time_used.seconds/60, 1)) + ' minutes' + '\n')
print("\(^o^)/... D'oh! .....................................................")