-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnovel_downloader.py
317 lines (279 loc) · 15.7 KB
/
novel_downloader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
from typing import List, Any
import requests
import socket
import random
import re
import time
from tkinter import *
from bs4 import BeautifulSoup
import os
import tkinter.messagebox
import tkinter.simpledialog
import tkinter.filedialog
import configparser
import datetime
socket.setdefaulttimeout(30)
# 用户代理池的构建
uapools = [
"Mozilla/5.0 (Linux; U; Android 2.3.6; en-us; Nexus S Build/GRK39F) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
"Avant Browser/1.2.789rel1 (http://www.avantbrowser.com)",
"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.5 (KHTML, like Gecko) Chrome/4.0.249.0 Safari/532.5",
"Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/532.9 (KHTML, like Gecko) Chrome/5.0.310.0 Safari/532.9",
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.7 (KHTML, like Gecko) Chrome/7.0.514.0 Safari/534.7",
"Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/534.14 (KHTML, like Gecko) Chrome/9.0.601.0 Safari/534.14",
"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.14 (KHTML, like Gecko) Chrome/10.0.601.0 Safari/534.14",
"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.20 (KHTML, like Gecko) Chrome/11.0.672.2 Safari/534.20",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.27 (KHTML, like Gecko) Chrome/12.0.712.0 Safari/534.27",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.24 Safari/535.1",
"Mozilla/5.0 (Windows NT 6.0) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.120 Safari/535.2",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.36 Safari/535.7",
"Mozilla/5.0 (Windows; U; Windows NT 6.0 x64; en-US; rv:1.9pre) Gecko/2008072421 Minefield/3.0.2pre",
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.10) Gecko/2009042316 Firefox/3.0.10",
"Mozilla/5.0 (Windows; U; Windows NT 6.0; en-GB; rv:1.9.0.11) Gecko/2009060215 Firefox/3.0.11 (.NET CLR 3.5.30729)",
"Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6 GTB5",
"Mozilla/5.0 (Windows; U; Windows NT 5.1; tr; rv:1.9.2.8) Gecko/20100722 Firefox/3.6.8 ( .NET CLR 3.5.30729; .NET4.0E)",
"Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1",
"Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:2.0.1) Gecko/20100101 Firefox/4.0.1",
"Mozilla/5.0 (Windows NT 5.1; rv:5.0) Gecko/20100101 Firefox/5.0",
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:6.0a2) Gecko/20110622 Firefox/6.0a2",
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:7.0.1) Gecko/20100101 Firefox/7.0.1",
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:2.0b4pre) Gecko/20100815 Minefield/4.0b4pre",
"Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 5.0 )",
"Mozilla/4.0 (compatible; MSIE 5.5; Windows 98; Win 9x 4.90)",
"Mozilla/5.0 (Windows; U; Windows XP) Gecko MultiZilla/1.6.1.0a",
"Mozilla/2.02E (Win95; U)",
"Mozilla/3.01Gold (Win95; I)",
"Mozilla/4.8 [en] (Windows NT 5.1; U)",
"Mozilla/5.0 (Windows; U; Win98; en-US; rv:1.4) Gecko Netscape/7.1 (ax)",
"HTC_Dream Mozilla/5.0 (Linux; U; Android 1.5; en-ca; Build/CUPCAKE) AppleWebKit/528.5 (KHTML, like Gecko) Version/3.1.2 Mobile Safari/525.20.1",
"Mozilla/5.0 (hp-tablet; Linux; hpwOS/3.0.2; U; de-DE) AppleWebKit/534.6 (KHTML, like Gecko) wOSBrowser/234.40.1 Safari/534.6 TouchPad/1.0",
"Mozilla/5.0 (Linux; U; Android 1.5; en-us; sdk Build/CUPCAKE) AppleWebkit/528.5 (KHTML, like Gecko) Version/3.1.2 Mobile Safari/525.20.1",
"Mozilla/5.0 (Linux; U; Android 2.1; en-us; Nexus One Build/ERD62) AppleWebKit/530.17 (KHTML, like Gecko) Version/4.0 Mobile Safari/530.17",
"Mozilla/5.0 (Linux; U; Android 2.2; en-us; Nexus One Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
"Mozilla/5.0 (Linux; U; Android 1.5; en-us; htc_bahamas Build/CRB17) AppleWebKit/528.5 (KHTML, like Gecko) Version/3.1.2 Mobile Safari/525.20.1",
"Mozilla/5.0 (Linux; U; Android 2.1-update1; de-de; HTC Desire 1.19.161.5 Build/ERE27) AppleWebKit/530.17 (KHTML, like Gecko) Version/4.0 Mobile Safari/530.17",
"Mozilla/5.0 (Linux; U; Android 2.2; en-us; Sprint APA9292KT Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
"Mozilla/5.0 (Linux; U; Android 1.5; de-ch; HTC Hero Build/CUPCAKE) AppleWebKit/528.5 (KHTML, like Gecko) Version/3.1.2 Mobile Safari/525.20.1",
"Mozilla/5.0 (Linux; U; Android 2.2; en-us; ADR6300 Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
"Mozilla/5.0 (Linux; U; Android 2.1; en-us; HTC Legend Build/cupcake) AppleWebKit/530.17 (KHTML, like Gecko) Version/4.0 Mobile Safari/530.17",
"Mozilla/5.0 (Linux; U; Android 1.5; de-de; HTC Magic Build/PLAT-RC33) AppleWebKit/528.5 (KHTML, like Gecko) Version/3.1.2 Mobile Safari/525.20.1 FirePHP/0.3",
"Mozilla/5.0 (Linux; U; Android 1.6; en-us; HTC_TATTOO_A3288 Build/DRC79) AppleWebKit/528.5 (KHTML, like Gecko) Version/3.1.2 Mobile Safari/525.20.1",
"Mozilla/5.0 (Linux; U; Android 1.0; en-us; dream) AppleWebKit/525.10 (KHTML, like Gecko) Version/3.0.4 Mobile Safari/523.12.2",
"Mozilla/5.0 (Linux; U; Android 1.5; en-us; T-Mobile G1 Build/CRB43) AppleWebKit/528.5 (KHTML, like Gecko) Version/3.1.2 Mobile Safari 525.20.1",
"Mozilla/5.0 (Linux; U; Android 1.5; en-gb; T-Mobile_G2_Touch Build/CUPCAKE) AppleWebKit/528.5 (KHTML, like Gecko) Version/3.1.2 Mobile Safari/525.20.1",
"Mozilla/5.0 (Linux; U; Android 2.0; en-us; Droid Build/ESD20) AppleWebKit/530.17 (KHTML, like Gecko) Version/4.0 Mobile Safari/530.17",
"Mozilla/5.0 (Linux; U; Android 2.2; en-us; Droid Build/FRG22D) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
"Mozilla/5.0 (Linux; U; Android 2.0; en-us; Milestone Build/ SHOLS_U2_01.03.1) AppleWebKit/530.17 (KHTML, like Gecko) Version/4.0 Mobile Safari/530.17",
"Mozilla/5.0 (Linux; U; Android 2.0.1; de-de; Milestone Build/SHOLS_U2_01.14.0) AppleWebKit/530.17 (KHTML, like Gecko) Version/4.0 Mobile Safari/530.17",
"Mozilla/5.0 (Linux; U; Android 3.0; en-us; Xoom Build/HRI39) AppleWebKit/525.10 (KHTML, like Gecko) Version/3.0.4 Mobile Safari/523.12.2",
"Mozilla/5.0 (Linux; U; Android 0.5; en-us) AppleWebKit/522 (KHTML, like Gecko) Safari/419.3",
"Mozilla/5.0 (Linux; U; Android 1.1; en-gb; dream) AppleWebKit/525.10 (KHTML, like Gecko) Version/3.0.4 Mobile Safari/523.12.2",
"Mozilla/5.0 (Linux; U; Android 2.0; en-us; Droid Build/ESD20) AppleWebKit/530.17 (KHTML, like Gecko) Version/4.0 Mobile Safari/530.17",
"Mozilla/5.0 (Linux; U; Android 2.1; en-us; Nexus One Build/ERD62) AppleWebKit/530.17 (KHTML, like Gecko) Version/4.0 Mobile Safari/530.17",
"Mozilla/5.0 (Linux; U; Android 2.2; en-us; Sprint APA9292KT Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
"Mozilla/5.0 (Linux; U; Android 2.2; en-us; ADR6300 Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
"Mozilla/5.0 (Linux; U; Android 2.2; en-ca; GT-P1000M Build/FROYO) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
"Mozilla/5.0 (Linux; U; Android 3.0.1; fr-fr; A500 Build/HRI66) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13",
"Mozilla/5.0 (Linux; U; Android 3.0; en-us; Xoom Build/HRI39) AppleWebKit/525.10 (KHTML, like Gecko) Version/3.0.4 Mobile Safari/523.12.2",
"Mozilla/5.0 (Linux; U; Android 1.6; es-es; SonyEricssonX10i Build/R1FA016) AppleWebKit/528.5 (KHTML, like Gecko) Version/3.1.2 Mobile Safari/525.20.1",
"Mozilla/5.0 (Linux; U; Android 1.6; en-us; SonyEricssonX10i Build/R1AA056) AppleWebKit/528.5 (KHTML, like Gecko) Version/3.1.2 Mobile Safari/525.20.1",
]
# -----------------------------------------------------
class MyConfig:
_configFile=''
def __init__(self, ini_path):
self._configFile = ini_path
if not os.path.isfile(self._configFile):
fo = open(self._configFile, mode='w', encoding='ANSI')
fo.close()
def Get_All_Section(self):
cf = configparser.ConfigParser()
try:
cf.read(self._configFile)
all_sections = cf.sections()
return all_sections
except:
return ''
def Add_detail(self, section, option, value):
cf = configparser.ConfigParser()
try:
cf.read(self._configFile)
cf.add_section(section)
cf.set(section, option, value)
cf.set(section, 'current_url', '')
fo = open(self._configFile, mode='w', encoding='ANSI')
# cf.write(open(self._configFile), 'w+')
cf.write(fo, 'w')
fo.close()
return True
except Exception as ex:
print('add_detail的error---repr(e):\t', repr(ex))
return False
def Get_option(self, section, option):
cf = configparser.ConfigParser()
try:
cf.read(self._configFile)
rep_option = cf.get(section, option)
return rep_option
except:
return '没有找到'
def Set_option(self, section, option, value):
cf = configparser.ConfigParser()
try:
cf.read(self._configFile)
cf.set(section, option, value)
cf.write(open(self._configFile, 'w'))
return 'ok'
except:
return 'error'
def Has_section(self, section):
cf = configparser.ConfigParser()
try:
cf.read(self._configFile)
if cf.has_section(section):
return True
else:
return False
except Exception as ex:
print('Has_section的error---repr(e):\t', repr(ex))
# -----------------------------------------------------
def new_novel_detail(url, novel_name):
# https://www.biqubao.com/ 的具体每一章内容的结构爬出代码
nov_headers = {
"User-Agent": random.choice(uapools)
}
res = requests.get(url, headers=nov_headers)
res.encoding = 'gbk'
soup = BeautifulSoup(res.text, 'html.parser')
rst_title = soup.h1.text
rst_novel = soup.find_all('div', id="content")[0].text.replace(' ', '\n').replace(u'\xa0', u' ')
lab6['text'] = rst_title
try:
with open(novel_name, 'a') as fileobject:
fileobject.writelines(rst_title)
fileobject.writelines(rst_novel + '\n')
fileobject.writelines('\n')
except Exception as ex:
print('new_novel_detail--repr(e):\t', repr(ex))
with open(novel_name, 'a') as fileobject:
fileobject.writelines(rst_title + '\n')
fileobject.writelines(repr(ex) + '\n')
def biquge_info_v2(url):
# https://www.biqubao.com/ 小说章节链接爬出代码
global btn
btn['state'] = 'disabled'
each_url = []
start_time = datetime.datetime.now()
novel_total_num = 0
novel_current_num = 0
nov_headers = {
"User-Agent": random.choice(uapools)
}
try:
res = requests.get(url, headers=nov_headers)
res.encoding = 'gbk'
soup = BeautifulSoup(res.text, 'html.parser')
rst_novel_urls = soup.find_all('dd') # 每章的标题和链接位置
rst_novel_title = soup.h1.text # 小说标题
print(rst_novel_title)
txt_novel = rst_novel_title + '.txt'
txt_title['text'] = txt_novel
novel_total_num = rst_novel_urls.__len__() # 所有的章节数
for each in rst_novel_urls:
temp_each_url = each.a.get('href')
temp_each_url = temp_each_url.split('/')[-1]
each_url.append(temp_each_url) # 所有的章节短链接
if cfg.Has_section(rst_novel_title):
text_text ='继续下载中...'
# 处理ini文件
url_history = cfg.Get_option(rst_novel_title, 'current_url')
novel_current_num = each_url.index(url_history) + 1
lab6['text'] = '上次下载到:' + novel_current_num.__str__() + '章,一共有:' + novel_total_num.__str__()+'章。'
current_each_url = each_url[novel_current_num:]
print(current_each_url)
else:
text_text ='下载中...'
cfg.Add_detail(rst_novel_title, 'url', url)
current_each_url = each_url
for each_novle_url in current_each_url: # 处理每一个章节
novel_current_num = novel_current_num + 1
percent = int(100*novel_current_num/novel_total_num) # 文字的百分比
lab5['text'] = text_text+percent.__str__()+'%'
# text.set(text_text+percent.__str__()+'%')
percent_x = int(300*novel_current_num/novel_total_num) # 进度条的百分比
process(percent_x, remain_time(start_time))
cfg.Set_option(rst_novel_title, 'current_url', each_novle_url.__str__()) # 处理ini文件
url_str = url + each_novle_url
new_novel_detail(url_str, txt_novel)
lab5['text'] = '下载完成'
btn['state'] = 'normal'
except Exception as ex:
print('repr(e):\t', repr(ex))
lab6['fg'] = 'red'
lab6['text'] = '下载小说的地址有问题,请重新输入!'
btn['state'] = 'normal'
def remain_time(StartTime):
NowTime = datetime.datetime.now()
remain = (NowTime-StartTime).seconds
return remain
def btn_point_v2():
global btn
btn1['state'] = 'disabled'
novel_url = txt_url.get()
fill_line = canvas.create_rectangle(1.5, 1.5, 0, 21, width=0, fill='white')
canvas.coords(fill_line, (1, 1, 300, 22))
root.update()
biquge_info_v2(novel_url)
def btn_point_allcontinue():
global btn1
btn1['state'] = 'disabled'
for section in cfg.Get_All_Section():
novel_url = cfg.Get_option(section, 'url')
fill_line = canvas.create_rectangle(1.5, 1.5, 0, 21, width=0, fill='white')
canvas.coords(fill_line, (1, 1, 300, 22))
time.sleep(1)
# root.update()
biquge_info_v2(novel_url)
btn1['state'] = 'normal'
def process(x, r_time):
hour = r_time // 3600
min = (r_time % 3600) // 60
sec = (r_time % 3600) % 60
fill_line = canvas.create_rectangle(1.5, 1.5, 0, 21, width=0, fill='green')
canvas.coords(fill_line, (1, 1, x, 22))
canvas.itemconfig(canvas.create_text(150, 11), text='{}:{}:{}'.format(hour, min, sec))
root.update()
if __name__ == '__main__':
# 全局变量
global btn
cfg = MyConfig('novel.ini')
root = Tk()
root.title('www.biqubao.com 小说下载器')
root.resizable(False, False)
root.geometry('500x200')
# tkinter.messagebox.showinfo('messagebox','This is a dialog')
path_var = tkinter.StringVar()
# text = StringVar()
# text.set('下载进度:')
lab1 = tkinter.Label(root, text='www.biqubao.com小说下载器', fg='green')
lab1.grid(row=0, column=1)
lab2 = tkinter.Label(root, text='小说书名:', fg='blue').grid(row=1, column=0)
lab3 = tkinter.Label(root, text='小说首页:', fg='blue').grid(row=2, column=0)
# 设置下载进度条
lab5 = tkinter.Label(root, text='下载进度')
lab5.place(x=0, y=100)
canvas = tkinter.Canvas(root, width=300, height=20, bg='white')
canvas.place(x=99, y=100)
lab6 = tkinter.Label(root, text='章节标题')
lab6.place(x=0, y=130)
# txt_title = tkinter.Entry()
txt_title = tkinter.Label(root, text='未输入书名')
txt_title.grid(row=1, column=1)
txt_url = tkinter.Entry()
txt_url.grid(row=2, column=1)
btn = tkinter.Button(root, text='下载_提交', command=btn_point_v2, state='normal')
btn.grid(row=3, column=1)
btn1 = tkinter.Button(root, text='全部追更', command=btn_point_allcontinue, state='normal')
btn1.grid(row=3, column=2)
root.mainloop()
# biquge_info_v2('https://www.biquge.info/41_41185/')