update version to v1.6.0

sndnvaps · Jan 8, 2020 · 7e6bb82 · 7e6bb82
1 parent 107c4e9
commit 7e6bb82
Show file tree

Hide file tree

Showing 6 changed files with 400 additions and 140 deletions.
diff --git a/23us.la.go b/23us.la.go
@@ -6,14 +6,73 @@ import (
  "sync"
 
  "github.com/Aiicy/htmlquery"
- pool "github.com/dgrr/goslaves"
  "gopkg.in/schollz/progressbar.v2"
 )
 
 //参考地址，创建规则
 //https://www.23us.la/html/151/151850/ -> 罪域的骨终为王
 //https://www.23us.la/html/209/209550/ -> 文娱万岁
 //https://www.23us.la/html/113/113444/ -> 不朽凡人
+
+//需要参考 https://segmentfault.com/a/1190000018475209 解决 返回的content与title不对应问题
+/*
+package main
+
+import (
+ "fmt"
+ "sync"
+ "time"
+)
+
+func main() {
+ resultCh := make(chan chan string, 5000)
+ wg := sync.WaitGroup{}
+ go replay(resultCh)
+ startTime := time.Now()
+ operation2(resultCh, "aaa", &wg)
+ operation2(resultCh, "bbb", &wg)
+ operation1(resultCh, "ccc", &wg)
+ operation1(resultCh, "ddd", &wg)
+ operation2(resultCh, "eee", &wg)
+ wg.Wait()
+ endTime := time.Now()
+ fmt.Printf("Process time %s", endTime.Sub(startTime))
+}
+
+func replay(resultCh chan chan string)(){
+ for{
+ //拿到一个chan 读取值 这个时候拿到的是先进先出 因为所有方法是按顺序加入chan的
+ c := <- resultCh
+ //读取嵌套chan中的值，这个时候等待3秒 因为是operation2中执行了3秒 在这3绵中 其实其余的4个方法也已经执行完毕。之后的方法则不需要等待sleep的时间
+ r := <-c
+ fmt.Println(r)
+ }
+}
+
+func operation1(ch chan chan string, str string, wg *sync.WaitGroup)(){
+ //先创建一个chan 兵给到嵌套chan 占据一个通道 这个通道是阻塞的
+ c := make(chan string)
+ ch <- c
+ wg.Add(1)
+ go func(str string){
+ time.Sleep(time.Second*1)
+ c <- "operation1:"+str
+ wg.Done()
+ }(str)
+}
+
+func operation2(ch chan chan string, str string, wg *sync.WaitGroup)(){
+ c := make(chan string)
+ ch <- c
+ wg.Add(1)
+ go func(str string){
+ time.Sleep(time.Second*2)
+ c <- "operation2:"+str
+ wg.Done()
+ }(str)
+}
+*/
+
 //顶点小说网 23us.la
 type Ebook23US struct {
  Url string
@@ -54,24 +113,34 @@ func (this Ebook23US) GetBookInfo(bookid string, proxy string) BookInfo {
  description := htmlquery.SelectAttr(DescriptionMeta, "content")
  fmt.Println("简介 = ", description)
 
+ //替换掉 volume是最前面的 作品名字
+ replaceStr := fmt.Sprintf("《%s》", bookName)
  //获取书分卷信息
  dtNode, _ := htmlquery.Find(doc, "//dl[@class='chapterlist']//dt") //获取书分卷信息
  testVolStr := htmlquery.InnerText(dtNode[1])
+
  if TestContainVolume(testVolStr) {
  bi.ChangeVolumeState(true)
  if len(dtNode) == 2 { //就是说刚好两个节点，我们要去除第一个，只保留第二个
  var tmp Volume
  tmp.CurrentVolume = htmlquery.InnerText(dtNode[1])
  volumes = append(volumes, tmp)
  } else { //当len(dtNode) >= 3
- for index := 1; index < len(dtNode); index++ { //因为第一个为 最新章节部分，需要去掉
+ for index := 0; index < len(dtNode); index++ { //因为第一个为 最新章节部分，需要去掉
  var tmp Volume
- //tmp.PrevChapterId =
- PrevChapter, _ := htmlquery.FindOne(dtNode[index], "//preceding-sibling::dd[1]") // 根据当前节点，查找上一个dd节点
+ // 根据当前节点，查找上一个dd节点
+ PrevChapter, _ := htmlquery.FindOne(dtNode[index], "//preceding-sibling::dd[1]")
  aNode, _ := htmlquery.Find(PrevChapter, "//a")
  tmp.PrevChapter.Link = this.Url + htmlquery.SelectAttr(aNode[0], "href")
  tmp.PrevChapter.Title = htmlquery.InnerText(aNode[0])
- tmp.CurrentVolume = htmlquery.InnerText(dtNode[index])
+
+ //根据当前节点，查找下一个dd节点
+ NextChapter, _ := htmlquery.FindOne(dtNode[index], "//following-sibling::dd[1]")
+ aNode, _ = htmlquery.Find(NextChapter, "//a")
+ tmp.NextChapter.Link = this.Url + htmlquery.SelectAttr(aNode[0], "href")
+ CurrentVolume := htmlquery.InnerText(dtNode[index])
+ tmp.CurrentVolume = strings.Replace(CurrentVolume, replaceStr, "", -1)
+ tmp.NextChapter.Title = htmlquery.InnerText(aNode[0])
  volumes = append(volumes, tmp)
  }
  }
@@ -86,7 +155,7 @@ func (this Ebook23US) GetBookInfo(bookid string, proxy string) BookInfo {
  aNode, _ := htmlquery.Find(ddNode[i], "//a")
  tmp.Link = this.Url + htmlquery.SelectAttr(aNode[0], "href")
  tmp.Title = htmlquery.InnerText(aNode[0])
- if bi.HasVolume && len(volumes) >= 2 { //正式写入 PrevChapterId
+ if bi.VolumeState() && len(volumes) >= 2 { //正式写入 PrevChapterId
  for index := 1; index < len(volumes); index++ { //第二个分卷开始，前面就有章节内容了
  if volumes[index].PrevChapter.Link == tmp.Link {
  volumes[index].PrevChapterId = i
@@ -95,13 +164,14 @@ func (this Ebook23US) GetBookInfo(bookid string, proxy string) BookInfo {
  }
  chapters = append(chapters, tmp)
  }
-
+ HasVolume := bi.VolumeState() //先赋值给 HasVolume,再把值导入到结构体中，用于数据返回
  //导入信息
  bi = BookInfo{
  Name: bookName,
  Author: author,
  Description: description,
  Volumes: volumes,
+ HasVolume: HasVolume,
  Chapters: chapters,
  }
  } else { //没有设置代理
@@ -125,6 +195,9 @@ func (this Ebook23US) GetBookInfo(bookid string, proxy string) BookInfo {
  description := htmlquery.SelectAttr(DescriptionMeta, "content")
  fmt.Println("简介 = ", description)
 
+ //替换掉 volume是最前面的 作品名字
+ replaceStr := fmt.Sprintf("《%s》", bookName)
+
  //获取书分卷信息
  dtNode, _ := htmlquery.Find(doc, "//dl[@class='chapterlist']//dt") //获取书分卷信息
  testVolStr := htmlquery.InnerText(dtNode[1])
@@ -137,12 +210,19 @@ func (this Ebook23US) GetBookInfo(bookid string, proxy string) BookInfo {
  } else { //当len(dtNode) >= 3
  for index := 1; index < len(dtNode); index++ { //因为第一个为 最新章节部分，需要去掉
  var tmp Volume
- //tmp.PrevChapterId =
- PrevChapter, _ := htmlquery.FindOne(dtNode[index], "//preceding-sibling::dd[1]") // 根据当前节点，查找上一个dd节点
+ // 根据当前节点，查找上一个dd节点
+ PrevChapter, _ := htmlquery.FindOne(dtNode[index], "//preceding-sibling::dd[1]")
  aNode, _ := htmlquery.Find(PrevChapter, "//a")
  tmp.PrevChapter.Link = this.Url + htmlquery.SelectAttr(aNode[0], "href")
  tmp.PrevChapter.Title = htmlquery.InnerText(aNode[0])
- tmp.CurrentVolume = htmlquery.InnerText(dtNode[index])
+
+ //根据当前节点，查找下一个dd节点
+ NextChapter, _ := htmlquery.FindOne(dtNode[index], "//following-sibling::dd[1]")
+ aNode, _ = htmlquery.Find(NextChapter, "//a")
+ tmp.NextChapter.Link = this.Url + htmlquery.SelectAttr(aNode[0], "href")
+ tmp.NextChapter.Title = htmlquery.InnerText(aNode[0])
+ CurrentVolume := htmlquery.InnerText(dtNode[index])
+ tmp.CurrentVolume = strings.Replace(CurrentVolume, replaceStr, "", -1)
  volumes = append(volumes, tmp)
  }
  }
@@ -157,118 +237,144 @@ func (this Ebook23US) GetBookInfo(bookid string, proxy string) BookInfo {
  aNode, _ := htmlquery.Find(ddNode[i], "//a")
  tmp.Link = this.Url + htmlquery.SelectAttr(aNode[0], "href")
  tmp.Title = htmlquery.InnerText(aNode[0])
+ //fmt.Printf("tmp.Link = %s\n", tmp.Link) //用于测试
+ //fmt.Printf("tmp.Title = %s\n", tmp.Title) //用于测试
 
- if bi.HasVolume && len(volumes) >= 2 { //正式写入 PrevChapterId
- for index := 1; index < len(volumes); index++ { //第二个分卷开始，前面就有章节内容了
+ if bi.VolumeState() && len(volumes) >= 2 { //正式写入 PrevChapterId && NextChapterId
+ for index := 0; index < len(volumes); index++ {
  if volumes[index].PrevChapter.Link == tmp.Link {
- volumes[index].PrevChapterId = i
+ volumes[index].PrevChapterId = (i - 12) + 1 //表示 设置 第一个章节为0
+ }
+ if volumes[index].NextChapter.Link == tmp.Link {
+ volumes[index].NextChapterId = (i - 12) + 1 //表示 设置 第一个章节为0
  }
  }
  }
  chapters = append(chapters, tmp)
  }
-
+ HasVolume := bi.VolumeState() //先赋值给 HasVolume,再把值导入到结构体中，用于数据返回
  //导入信息
  bi = BookInfo{
  Name: bookName,
  Author: author,
  Description: description,
  Volumes: volumes,
+ HasVolume: HasVolume,
  Chapters: chapters,
  }
  }
  return bi
 }
 
-func (this Ebook23US) GetChapterContent(pc ProxyChapter) Chapter {
- pollURL := pc.C.Link
- proxy := pc.Proxy
- var result Chapter
-
- if proxy != "" {
- doc, _ := htmlquery.LoadURLWithProxy(pollURL, proxy)
- contentNode, _ := htmlquery.FindOne(doc, "//div[@id='content']")
- contentText := htmlquery.InnerText(contentNode)
-
- //替换字符串中的特殊字符 \xE3\x80\x80\xE3\x80\x80 为换行符 \n
- tmp := strings.Replace(contentText, "\xE3\x80\x80\xE3\x80\x80", "\r\n", -1)
-
- //把 readx(); 替换成 ""
- //tmp = strings.Replace(tmp, "999小说更新最快 电脑端:https://www.999xs.com/", "", -1)
-
- //tmp = tmp + "\r\n"
- //返回数据，填写Content内容
- result = Chapter{
- Title: pc.C.Title,
- Link: pc.C.Link,
- Content: tmp,
- }
- } else {
- doc, _ := htmlquery.LoadURL(pollURL)
- contentNode, _ := htmlquery.FindOne(doc, "//div[@id='content']")
- contentText := htmlquery.InnerText(contentNode)
-
- //替换字符串中的特殊字符 \xE3\x80\x80\xE3\x80\x80 为换行符 \n
- tmp := strings.Replace(contentText, "\xE3\x80\x80\xE3\x80\x80", "\r\n", -1)
-
- //把 readx(); 替换成 ""
- //tmp = strings.Replace(tmp, "999小说更新最快 电脑端:https://www.999xs.com/", "", -1)
-
- //tmp = tmp + "\r\n"
- //返回数据，填写Content内容
- result = Chapter{
- Title: pc.C.Title,
- Link: pc.C.Link,
- Content: tmp,
- }
- }
-
- return result
-}
-
 //根据每个章节的 url连接，下载每章对应的内容Content当中
 func (this Ebook23US) DownloadChapters(Bi BookInfo, proxy string) BookInfo {
  chapters := Bi.Chapters
+
  NumChapter := len(chapters)
- ch := make(chan Chapter, 1)
- locker := sync.Mutex{}
+ tmpChapter := make(chan Chapter, NumChapter)
+ ResultCh := make(chan chan Chapter, NumChapter)
+ wg := sync.WaitGroup{}
+ var c []Chapter
  var bar *progressbar.ProgressBar
+ go AsycChapter(ResultCh, tmpChapter)
+ for index := 0; index < NumChapter; index++ {
+ tmp := ProxyChapter{
+ Proxy: proxy,
+ C: chapters[index],
+ }
+ this.DownloaderChapter(ResultCh, tmp, &wg)
+ }
 
- sp := pool.NewPool(0, func(obj interface{}) {
- locker.Lock()
- tmp := obj.(ProxyChapter)
- content := this.GetChapterContent(tmp)
- locker.Unlock()
- ch <- content
-
- })
-
- go excuteServe(&sp, chapters, proxy)
+ wg.Wait()
 
  //下载章节的时候显示进度条
  bar = progressbar.New(NumChapter)
  bar.RenderBlank()
 
- for i := 0; i < len(chapters); {
+ for index := 0; index < NumChapter; {
  select {
- case c := <-ch:
- chapters[i].Content = c.Content
- i++
+ case tmp := <-tmpChapter:
+ //fmt.Printf("tmp.Title = %s\n", tmp.Title)
+ //fmt.Printf("tmp.Content= %s\n", tmp.Content)
+ c = append(c, tmp)
+ index++
+ if index == (NumChapter - 1) {
+ goto ForEnd
+ }
  }
  bar.Add(1)
+
  }
- sp.Close()
+ForEnd:
 
  result := BookInfo{
  Name: Bi.Name,
  Author: Bi.Author,
  Description: Bi.Description,
- Chapters: chapters,
+ Volumes: Bi.Volumes, //小说分卷信息在 GetBookInfo()的时候已经下载完成
+ HasVolume: Bi.VolumeState(), //小说分卷信息在 GetBookInfo()的时候已经定义
+ Chapters: c,
  }
 
  return result
 }
 
+//func DownloaderChapter(ResultChan chan chan Chapter)
+func (this Ebook23US) DownloaderChapter(ResultChan chan chan Chapter, pc ProxyChapter, wg *sync.WaitGroup) {
+ c := make(chan Chapter)
+ ResultChan <- c
+ wg.Add(1)
+ go func(pc ProxyChapter) {
+ pollURL := pc.C.Link
+ proxy := pc.Proxy
+ var result Chapter
+
+ if proxy != "" {
+ doc, _ := htmlquery.LoadURLWithProxy(pollURL, proxy)
+ contentNode, _ := htmlquery.FindOne(doc, "//div[@id='content']")
+ contentText := htmlquery.InnerText(contentNode)
+
+ //替换字符串中的特殊字符 \xE3\x80\x80\xE3\x80\x80 为换行符 \n
+ tmp := strings.Replace(contentText, "\xE3\x80\x80\xE3\x80\x80", "\r\n", -1)
+
+ //把 readx(); 替换成 ""
+ tmp = strings.Replace(tmp, "</p>", "", -1)
+ tmp = strings.Replace(tmp, "(https://)", "", -1)
+
+ //tmp = tmp + "\r\n"
+ //返回数据，填写Content内容
+ result = Chapter{
+ Title: pc.C.Title,
+ Link: pc.C.Link,
+ Content: tmp,
+ }
+ } else {
+ doc, _ := htmlquery.LoadURL(pollURL)
+ contentNode, _ := htmlquery.FindOne(doc, "//div[@id='content']")
+ contentText := htmlquery.InnerText(contentNode)
+
+ //替换字符串中的特殊字符 \xE3\x80\x80\xE3\x80\x80 为换行符 \n
+ tmp := strings.Replace(contentText, "\xE3\x80\x80\xE3\x80\x80", "\r\n", -1)
+
+ //把 readx(); 替换成 ""
+ tmp = strings.Replace(tmp, "</p>", "", -1)
+ tmp = strings.Replace(tmp, "(https://)", "", -1)
+
+ //tmp = tmp + "\r\n"
+ //返回数据，填写Content内容
+ result = Chapter{
+ Title: pc.C.Title,
+ Link: pc.C.Link,
+ Content: tmp,
+ }
+ }
+ //fmt.Printf("result.Content= %s\n", result.Content)
+ c <- result
+ wg.Done()
+ }(pc)
+}
+
+//检测是 第一个 dt标签是否包含 “正文卷”，如果不包含就表示是分卷
 func TestContainVolume(src string) bool {
- return !strings.Contains(src, "正文卷")
+ return !strings.Contains(src, "正文")
 }