forked from nghuyong/ZhengFang
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparseHtml.py
76 lines (67 loc) · 2.97 KB
/
parseHtml.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# author: HuYong
# coding=utf-8
from bs4 import BeautifulSoup
# 从网页中解析学生信息
def getStudentInfor(response):
html = response.content.decode("gb2312")
soup = BeautifulSoup(html.decode("utf-8"), "html5lib")
d = {}
d["studentnumber"] = soup.find(id="xh").string
d["idCardNumber"] = soup.find(id="lbl_sfzh").string
d["name"] = soup.find(id="xm").string
d["sex"] = soup.find(id="lbl_xb").string
d["enterSchoolTime"] = soup.find(id="lbl_rxrq").string
d["birthsday"] = soup.find(id="lbl_csrq").string
d["highschool"] = soup.find(id="lbl_byzx").string
d["nationality"] = soup.find(id="lbl_mz").string
d["hometown"] = soup.find(id="lbl_jg").string
d["politicsStatus"] = soup.find(id="lbl_zzmm").string
d["college"] = soup.find(id="lbl_xy").string
d["major"] = soup.find(id="lbl_zymc").string
d["classname"] = soup.find(id="lbl_xzb").string
d["gradeClass"] = soup.find(id="lbl_dqszj").string
return d
# 从网页中解析课表信息
def getClassScheduleFromHtml(response):
html = response.content.decode("gb2312","ignore")
soup = BeautifulSoup(html.decode("utf-8"), "html5lib")
__VIEWSTATE = soup.findAll(name="input")[2]["value"]
trs = soup.find(id="Table1").find_all('tr')
classes = []
for tr in trs:
tds = tr.find_all('td')
for td in tds:
if td.string == None:
oneClassKeys = ["name", "type", "time", "teacher", "location"]
oneClassValues = []
for child in td.children:
if child.string != None:
oneClassValues.append(child.string)
while len(oneClassValues) < len(oneClassKeys):
oneClassValues.append("")
oneClass = dict((key, value) for key, value in zip(oneClassKeys, oneClassValues))
oneClass["timeInTheWeek"] = oneClass["time"].split("{")[0][:2]
oneClass["timeInTheDay"] = oneClass["time"].split("{")[0][2:]
oneClass["timeInTheTerm"] = oneClass["time"].split("{")[1][:-1]
classes.append(oneClass)
return {"classes": classes, "__VIEWSTATE": __VIEWSTATE}
def get__VIEWSTATE(response):
html = response.content.decode("gb2312")
soup = BeautifulSoup(html.decode("utf-8"), "html5lib")
__VIEWSTATE = soup.findAll(name="input")[2]["value"]
return __VIEWSTATE
def getGrade(response):
html = response.content.decode("gb2312")
soup = BeautifulSoup(html.decode("utf-8"), "html5lib")
trs = soup.find(id="Datagrid1").findAll("tr")[1:]
Grades = []
for tr in trs:
tds = tr.findAll("td")
tds = tds[:2] + tds[3:5] + tds[6:9]
oneGradeKeys = ["year", "term", "name", "type", "credit","gradePonit","grade"]
oneGradeValues = []
for td in tds:
oneGradeValues.append(td.string)
oneGrade = dict((key, value) for key, value in zip(oneGradeKeys, oneGradeValues))
Grades.append(oneGrade)
return Grades