forked from clovaai/CLEval
-
Notifications
You must be signed in to change notification settings - Fork 0
/
file_utils.py
67 lines (55 loc) · 1.85 KB
/
file_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import re
import codecs
import zipfile
def load_zip_file_keys(file, file_name_reg_exp=''):
try:
archive = zipfile.ZipFile(file, mode='r', allowZip64=True)
except:
raise Exception('Error loading the ZIP archive.')
pairs = []
for name in archive.namelist():
addFile = True
keyName = name
if file_name_reg_exp != "":
m = re.match(file_name_reg_exp, name)
if m == None:
addFile = False
else:
if len(m.groups()) > 0:
keyName = m.group(1)
if addFile:
pairs.append(keyName)
return pairs
def load_zip_file(file, file_name_reg_exp='', allEntries=False):
"""
Returns an array with the contents (filtered by fileNameRegExp) of a ZIP file.
The key's are the names or the file or the capturing group definied in the fileNameRegExp
allEntries validates that all entries in the ZIP file pass the fileNameRegExp
"""
try:
archive = zipfile.ZipFile(file, mode='r', allowZip64=True)
except:
raise Exception('Error loading the ZIP archive')
pairs = dict()
for name in archive.namelist():
addFile = True
keyName = name.replace('gt_', '').replace('res_', '').replace('.txt', '')
if addFile:
pairs[keyName] = archive.read(name)
else:
if allEntries:
raise Exception('ZIP entry not valid: %s' % name)
return pairs
def decode_utf8(raw):
"""
Returns a Unicode object on success, or None on failure
"""
try:
raw = codecs.decode(raw, 'utf-8', 'replace')
# extracts BOM if exists
raw = raw.encode('utf8')
if raw.startswith(codecs.BOM_UTF8):
raw = raw.replace(codecs.BOM_UTF8, '', 1)
return raw.decode('utf-8')
except Exception:
return None