Skip to content

Commit

Permalink
update version to v1.6.0
Browse files Browse the repository at this point in the history
  • Loading branch information
sndnvaps committed Jan 8, 2020
1 parent 107c4e9 commit 7e6bb82
Show file tree
Hide file tree
Showing 6 changed files with 400 additions and 140 deletions.
264 changes: 185 additions & 79 deletions 23us.la.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,73 @@ import (
"sync"

"github.com/Aiicy/htmlquery"
pool "github.com/dgrr/goslaves"
"gopkg.in/schollz/progressbar.v2"
)

//参考地址,创建规则
//https://www.23us.la/html/151/151850/ -> 罪域的骨终为王
//https://www.23us.la/html/209/209550/ -> 文娱万岁
//https://www.23us.la/html/113/113444/ -> 不朽凡人

//需要参考 https://segmentfault.com/a/1190000018475209 解决 返回的content与title不对应问题
/*
package main
import (
"fmt"
"sync"
"time"
)
func main() {
resultCh := make(chan chan string, 5000)
wg := sync.WaitGroup{}
go replay(resultCh)
startTime := time.Now()
operation2(resultCh, "aaa", &wg)
operation2(resultCh, "bbb", &wg)
operation1(resultCh, "ccc", &wg)
operation1(resultCh, "ddd", &wg)
operation2(resultCh, "eee", &wg)
wg.Wait()
endTime := time.Now()
fmt.Printf("Process time %s", endTime.Sub(startTime))
}
func replay(resultCh chan chan string)(){
for{
//拿到一个chan 读取值 这个时候拿到的是先进先出 因为所有方法是按顺序加入chan的
c := <- resultCh
//读取嵌套chan中的值,这个时候等待3秒 因为是operation2中执行了3秒 在这3绵中 其实其余的4个方法也已经执行完毕。之后的方法则不需要等待sleep的时间
r := <-c
fmt.Println(r)
}
}
func operation1(ch chan chan string, str string, wg *sync.WaitGroup)(){
//先创建一个chan 兵给到嵌套chan 占据一个通道 这个通道是阻塞的
c := make(chan string)
ch <- c
wg.Add(1)
go func(str string){
time.Sleep(time.Second*1)
c <- "operation1:"+str
wg.Done()
}(str)
}
func operation2(ch chan chan string, str string, wg *sync.WaitGroup)(){
c := make(chan string)
ch <- c
wg.Add(1)
go func(str string){
time.Sleep(time.Second*2)
c <- "operation2:"+str
wg.Done()
}(str)
}
*/

//顶点小说网 23us.la
type Ebook23US struct {
Url string
Expand Down Expand Up @@ -54,24 +113,34 @@ func (this Ebook23US) GetBookInfo(bookid string, proxy string) BookInfo {
description := htmlquery.SelectAttr(DescriptionMeta, "content")
fmt.Println("简介 = ", description)

//替换掉 volume是最前面的 作品名字
replaceStr := fmt.Sprintf("《%s》", bookName)
//获取书分卷信息
dtNode, _ := htmlquery.Find(doc, "//dl[@class='chapterlist']//dt") //获取书分卷信息
testVolStr := htmlquery.InnerText(dtNode[1])

if TestContainVolume(testVolStr) {
bi.ChangeVolumeState(true)
if len(dtNode) == 2 { //就是说刚好两个节点,我们要去除第一个,只保留第二个
var tmp Volume
tmp.CurrentVolume = htmlquery.InnerText(dtNode[1])
volumes = append(volumes, tmp)
} else { //当len(dtNode) >= 3
for index := 1; index < len(dtNode); index++ { //因为第一个为 最新章节部分,需要去掉
for index := 0; index < len(dtNode); index++ { //因为第一个为 最新章节部分,需要去掉
var tmp Volume
//tmp.PrevChapterId =
PrevChapter, _ := htmlquery.FindOne(dtNode[index], "//preceding-sibling::dd[1]") // 根据当前节点,查找上一个dd节点
// 根据当前节点,查找上一个dd节点
PrevChapter, _ := htmlquery.FindOne(dtNode[index], "//preceding-sibling::dd[1]")
aNode, _ := htmlquery.Find(PrevChapter, "//a")
tmp.PrevChapter.Link = this.Url + htmlquery.SelectAttr(aNode[0], "href")
tmp.PrevChapter.Title = htmlquery.InnerText(aNode[0])
tmp.CurrentVolume = htmlquery.InnerText(dtNode[index])

//根据当前节点,查找下一个dd节点
NextChapter, _ := htmlquery.FindOne(dtNode[index], "//following-sibling::dd[1]")
aNode, _ = htmlquery.Find(NextChapter, "//a")
tmp.NextChapter.Link = this.Url + htmlquery.SelectAttr(aNode[0], "href")
CurrentVolume := htmlquery.InnerText(dtNode[index])
tmp.CurrentVolume = strings.Replace(CurrentVolume, replaceStr, "", -1)
tmp.NextChapter.Title = htmlquery.InnerText(aNode[0])
volumes = append(volumes, tmp)
}
}
Expand All @@ -86,7 +155,7 @@ func (this Ebook23US) GetBookInfo(bookid string, proxy string) BookInfo {
aNode, _ := htmlquery.Find(ddNode[i], "//a")
tmp.Link = this.Url + htmlquery.SelectAttr(aNode[0], "href")
tmp.Title = htmlquery.InnerText(aNode[0])
if bi.HasVolume && len(volumes) >= 2 { //正式写入 PrevChapterId
if bi.VolumeState() && len(volumes) >= 2 { //正式写入 PrevChapterId
for index := 1; index < len(volumes); index++ { //第二个分卷开始,前面就有章节内容了
if volumes[index].PrevChapter.Link == tmp.Link {
volumes[index].PrevChapterId = i
Expand All @@ -95,13 +164,14 @@ func (this Ebook23US) GetBookInfo(bookid string, proxy string) BookInfo {
}
chapters = append(chapters, tmp)
}

HasVolume := bi.VolumeState() //先赋值给 HasVolume,再把值导入到结构体中,用于数据返回
//导入信息
bi = BookInfo{
Name: bookName,
Author: author,
Description: description,
Volumes: volumes,
HasVolume: HasVolume,
Chapters: chapters,
}
} else { //没有设置代理
Expand All @@ -125,6 +195,9 @@ func (this Ebook23US) GetBookInfo(bookid string, proxy string) BookInfo {
description := htmlquery.SelectAttr(DescriptionMeta, "content")
fmt.Println("简介 = ", description)

//替换掉 volume是最前面的 作品名字
replaceStr := fmt.Sprintf("《%s》", bookName)

//获取书分卷信息
dtNode, _ := htmlquery.Find(doc, "//dl[@class='chapterlist']//dt") //获取书分卷信息
testVolStr := htmlquery.InnerText(dtNode[1])
Expand All @@ -137,12 +210,19 @@ func (this Ebook23US) GetBookInfo(bookid string, proxy string) BookInfo {
} else { //当len(dtNode) >= 3
for index := 1; index < len(dtNode); index++ { //因为第一个为 最新章节部分,需要去掉
var tmp Volume
//tmp.PrevChapterId =
PrevChapter, _ := htmlquery.FindOne(dtNode[index], "//preceding-sibling::dd[1]") // 根据当前节点,查找上一个dd节点
// 根据当前节点,查找上一个dd节点
PrevChapter, _ := htmlquery.FindOne(dtNode[index], "//preceding-sibling::dd[1]")
aNode, _ := htmlquery.Find(PrevChapter, "//a")
tmp.PrevChapter.Link = this.Url + htmlquery.SelectAttr(aNode[0], "href")
tmp.PrevChapter.Title = htmlquery.InnerText(aNode[0])
tmp.CurrentVolume = htmlquery.InnerText(dtNode[index])

//根据当前节点,查找下一个dd节点
NextChapter, _ := htmlquery.FindOne(dtNode[index], "//following-sibling::dd[1]")
aNode, _ = htmlquery.Find(NextChapter, "//a")
tmp.NextChapter.Link = this.Url + htmlquery.SelectAttr(aNode[0], "href")
tmp.NextChapter.Title = htmlquery.InnerText(aNode[0])
CurrentVolume := htmlquery.InnerText(dtNode[index])
tmp.CurrentVolume = strings.Replace(CurrentVolume, replaceStr, "", -1)
volumes = append(volumes, tmp)
}
}
Expand All @@ -157,118 +237,144 @@ func (this Ebook23US) GetBookInfo(bookid string, proxy string) BookInfo {
aNode, _ := htmlquery.Find(ddNode[i], "//a")
tmp.Link = this.Url + htmlquery.SelectAttr(aNode[0], "href")
tmp.Title = htmlquery.InnerText(aNode[0])
//fmt.Printf("tmp.Link = %s\n", tmp.Link) //用于测试
//fmt.Printf("tmp.Title = %s\n", tmp.Title) //用于测试

if bi.HasVolume && len(volumes) >= 2 { //正式写入 PrevChapterId
for index := 1; index < len(volumes); index++ { //第二个分卷开始,前面就有章节内容了
if bi.VolumeState() && len(volumes) >= 2 { //正式写入 PrevChapterId && NextChapterId
for index := 0; index < len(volumes); index++ {
if volumes[index].PrevChapter.Link == tmp.Link {
volumes[index].PrevChapterId = i
volumes[index].PrevChapterId = (i - 12) + 1 //表示 设置 第一个章节为0
}
if volumes[index].NextChapter.Link == tmp.Link {
volumes[index].NextChapterId = (i - 12) + 1 //表示 设置 第一个章节为0
}
}
}
chapters = append(chapters, tmp)
}

HasVolume := bi.VolumeState() //先赋值给 HasVolume,再把值导入到结构体中,用于数据返回
//导入信息
bi = BookInfo{
Name: bookName,
Author: author,
Description: description,
Volumes: volumes,
HasVolume: HasVolume,
Chapters: chapters,
}
}
return bi
}

func (this Ebook23US) GetChapterContent(pc ProxyChapter) Chapter {
pollURL := pc.C.Link
proxy := pc.Proxy
var result Chapter

if proxy != "" {
doc, _ := htmlquery.LoadURLWithProxy(pollURL, proxy)
contentNode, _ := htmlquery.FindOne(doc, "//div[@id='content']")
contentText := htmlquery.InnerText(contentNode)

//替换字符串中的特殊字符 \xE3\x80\x80\xE3\x80\x80 为换行符 \n
tmp := strings.Replace(contentText, "\xE3\x80\x80\xE3\x80\x80", "\r\n", -1)

//把 readx(); 替换成 ""
//tmp = strings.Replace(tmp, "999小说更新最快 电脑端:https://www.999xs.com/", "", -1)

//tmp = tmp + "\r\n"
//返回数据,填写Content内容
result = Chapter{
Title: pc.C.Title,
Link: pc.C.Link,
Content: tmp,
}
} else {
doc, _ := htmlquery.LoadURL(pollURL)
contentNode, _ := htmlquery.FindOne(doc, "//div[@id='content']")
contentText := htmlquery.InnerText(contentNode)

//替换字符串中的特殊字符 \xE3\x80\x80\xE3\x80\x80 为换行符 \n
tmp := strings.Replace(contentText, "\xE3\x80\x80\xE3\x80\x80", "\r\n", -1)

//把 readx(); 替换成 ""
//tmp = strings.Replace(tmp, "999小说更新最快 电脑端:https://www.999xs.com/", "", -1)

//tmp = tmp + "\r\n"
//返回数据,填写Content内容
result = Chapter{
Title: pc.C.Title,
Link: pc.C.Link,
Content: tmp,
}
}

return result
}

//根据每个章节的 url连接,下载每章对应的内容Content当中
func (this Ebook23US) DownloadChapters(Bi BookInfo, proxy string) BookInfo {
chapters := Bi.Chapters

NumChapter := len(chapters)
ch := make(chan Chapter, 1)
locker := sync.Mutex{}
tmpChapter := make(chan Chapter, NumChapter)
ResultCh := make(chan chan Chapter, NumChapter)
wg := sync.WaitGroup{}
var c []Chapter
var bar *progressbar.ProgressBar
go AsycChapter(ResultCh, tmpChapter)
for index := 0; index < NumChapter; index++ {
tmp := ProxyChapter{
Proxy: proxy,
C: chapters[index],
}
this.DownloaderChapter(ResultCh, tmp, &wg)
}

sp := pool.NewPool(0, func(obj interface{}) {
locker.Lock()
tmp := obj.(ProxyChapter)
content := this.GetChapterContent(tmp)
locker.Unlock()
ch <- content

})

go excuteServe(&sp, chapters, proxy)
wg.Wait()

//下载章节的时候显示进度条
bar = progressbar.New(NumChapter)
bar.RenderBlank()

for i := 0; i < len(chapters); {
for index := 0; index < NumChapter; {
select {
case c := <-ch:
chapters[i].Content = c.Content
i++
case tmp := <-tmpChapter:
//fmt.Printf("tmp.Title = %s\n", tmp.Title)
//fmt.Printf("tmp.Content= %s\n", tmp.Content)
c = append(c, tmp)
index++
if index == (NumChapter - 1) {
goto ForEnd
}
}
bar.Add(1)

}
sp.Close()
ForEnd:

result := BookInfo{
Name: Bi.Name,
Author: Bi.Author,
Description: Bi.Description,
Chapters: chapters,
Volumes: Bi.Volumes, //小说分卷信息在 GetBookInfo()的时候已经下载完成
HasVolume: Bi.VolumeState(), //小说分卷信息在 GetBookInfo()的时候已经定义
Chapters: c,
}

return result
}

//func DownloaderChapter(ResultChan chan chan Chapter)
func (this Ebook23US) DownloaderChapter(ResultChan chan chan Chapter, pc ProxyChapter, wg *sync.WaitGroup) {
c := make(chan Chapter)
ResultChan <- c
wg.Add(1)
go func(pc ProxyChapter) {
pollURL := pc.C.Link
proxy := pc.Proxy
var result Chapter

if proxy != "" {
doc, _ := htmlquery.LoadURLWithProxy(pollURL, proxy)
contentNode, _ := htmlquery.FindOne(doc, "//div[@id='content']")
contentText := htmlquery.InnerText(contentNode)

//替换字符串中的特殊字符 \xE3\x80\x80\xE3\x80\x80 为换行符 \n
tmp := strings.Replace(contentText, "\xE3\x80\x80\xE3\x80\x80", "\r\n", -1)

//把 readx(); 替换成 ""
tmp = strings.Replace(tmp, "</p>", "", -1)
tmp = strings.Replace(tmp, "(https://)", "", -1)

//tmp = tmp + "\r\n"
//返回数据,填写Content内容
result = Chapter{
Title: pc.C.Title,
Link: pc.C.Link,
Content: tmp,
}
} else {
doc, _ := htmlquery.LoadURL(pollURL)
contentNode, _ := htmlquery.FindOne(doc, "//div[@id='content']")
contentText := htmlquery.InnerText(contentNode)

//替换字符串中的特殊字符 \xE3\x80\x80\xE3\x80\x80 为换行符 \n
tmp := strings.Replace(contentText, "\xE3\x80\x80\xE3\x80\x80", "\r\n", -1)

//把 readx(); 替换成 ""
tmp = strings.Replace(tmp, "</p>", "", -1)
tmp = strings.Replace(tmp, "(https://)", "", -1)

//tmp = tmp + "\r\n"
//返回数据,填写Content内容
result = Chapter{
Title: pc.C.Title,
Link: pc.C.Link,
Content: tmp,
}
}
//fmt.Printf("result.Content= %s\n", result.Content)
c <- result
wg.Done()
}(pc)
}

//检测是 第一个 dt标签是否包含 “正文卷”,如果不包含就表示是分卷
func TestContainVolume(src string) bool {
return !strings.Contains(src, "正文卷")
return !strings.Contains(src, "正文")
}
Loading

0 comments on commit 7e6bb82

Please sign in to comment.