-
Notifications
You must be signed in to change notification settings - Fork 2
/
utils.py
84 lines (74 loc) · 2.26 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import re
def decodehtml(s):
"""Decode HTML entities in the given text."""
entity_sub = re.compile(r'&(#(\d+|x[\da-fA-F]+)|[\w.:-]+);?').sub
def uchr(c):
if not isinstance(c, int):
return c
if c>255: return unichr(c)
return chr(c)
def decode_entity(match):
what = match.group(1)
if what.startswith('#x'):
what = int(what[2:], 16)
elif what.startswith('#'):
what = int(what[1:])
else:
from htmlentitydefs import name2codepoint
what = name2codepoint.get(what, match.group(0))
return uchr(what)
return entity_sub(decode_entity, s)
def tounicode(line, enc=None):
if isinstance(line, str):
done = False
if isinstance(enc, str):
try:
line = line.decode(enc)
done = True
except:
pass
if not done:
try:
line = line.decode('utf-8')
except UnicodeDecodeError:
try:
line = line.decode('iso-8859-1')
except UnicodeDecodeError:
try:
line = line.decode('cp1252')
except UnicodeDecodeError:
line = line.decode('utf-8', 'ignore')
elif isinstance(line, unicode):
pass
else:
line = repr(line)
return line
def unescapeuni(s):
"""Converts \uXXXX in s to their ascii counterparts"""
ret = ""
i = 0
while i < len(s):
if s[i:i+2] == "\u":
x = int(s[i+2:i+6],16)
if x < 256:
ret += chr(x)
i += 6
else:
ret += s[i]
i+=1
return ret
def removehtml(s):
"""Remove html tags"""
s = re.sub("<[^>]+>", "", s)
s = re.sub(" +", " ", s)
return s
def humantime(seconds, string=True):
"""Given seconds return a human readable time"""
m,s = divmod(seconds, 60)
h,m = divmod(m, 60)
d,h = divmod(h, 24)
w,d = divmod(d, 7)
if string:
return ((w and str(w)+'w ' or '')+(d and str(d)+'d ' or '')+(h and str(h)+'h ' or '')+(m and str(m)+'m ' or '')+(s and str(s)+'s' or '')).strip()
else:
return (w, d, h, m, s)