-
Notifications
You must be signed in to change notification settings - Fork 1
/
chm_to_html.py
68 lines (63 loc) · 2.57 KB
/
chm_to_html.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import re
import requests
from lxml import etree
from lxml import html
import json
import time
import pymysql
import json
class chm_to_py():
hcc_path = "html/content.hhc"
title = "2_1_Pathfinder v2.01"
html_warp = '''
<!doctype html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport"
content="width=device-width, user-scalable=no, initial-scale=1.0, maximum-scale=1.0, minimum-scale=1.0">
<meta http-equiv="X-UA-Compatible" content="ie=edge">
<title>Document</title>
</head>
<body>'''
def deal_hcc(self):
f = open(self.hcc_path, "r")
hcc_content = f.read()
html_warp = self.html_warp.format(self.title)
hcc_content = hcc_content.lower()
html_body = re.findall(re.compile('<body>(.*?)<\/body>', re.S|re.I), hcc_content)[0]
# html_body = html_body.encode('gbk').decode('gbk').decode('utf8')
#正则补全</LI>
html_body = re.sub(r'<li>(.*?)<ul>(.*?)</ul>', r'<li>\1<ul>\2</ul></li>', html_body, flags = re.S|re.I)
html_body = re.sub(r'<li>(.*?)</object>\s+(</ul></li>\s+)?(?=<li>)', r'<li>\1</object></li>\2', html_body, flags=re.S | re.I)
html_body = (html_warp + html_body + "</body></html>").lower()
# f = open("html.html", 'w')
# f.write(html_body)
# exit()
sel = etree.HTML(html_body)
rootul = sel.xpath('body/ul')[0]
result = self.deal_ul(rootul)
#组装成html代码
json_str = json.dumps(result,ensure_ascii=False)
f = open("menu.js", "w", encoding='utf8')
f.write("var json_str = " + json_str)
#递归解析每一层
def deal_ul(self, ul):
if ul == []:
return []
li_list = []
try:
lis = ul.xpath('./li')
except Exception as e:
print(repr(e))
return []
for li in lis:
# print(li.xpath('./ul'), li.xpath('./object/param[@name="name"]/@value')[0])
li_list.append({
"name" : '' if li.xpath('./object/param[@name="name"]/@value') == [] else li.xpath('./object/param[@name="name"]/@value')[0],
"local" : '' if li.xpath('./object/param[@name="local"]/@value') == [] else li.xpath('./object/param[@name="local"]/@value')[0],
"imagenumber" : '' if li.xpath('./object/param[@name="imagenumber"]/@value') == [] else li.xpath('./object/param[@name="imagenumber"]/@value')[0],
"childs" : [] if li.xpath('./ul') == [] else self.deal_ul(li.xpath('./ul')[0])
})
return li_list
chm_to_py().deal_hcc()