forked from FabulousFabs/GoodReadsCrawler
-
Notifications
You must be signed in to change notification settings - Fork 0
/
ResponseHandler.go
executable file
·117 lines (98 loc) · 2.96 KB
/
ResponseHandler.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
package main
import (
"fmt"
"github.com/PuerkitoBio/goquery"
"strings"
"strconv"
)
const key = "Zvx2qu5J51vpDU3kmyvvQ"
type RGoodReadsBook struct {
keywordhandler *KeywordHandler
httphandler *HttpHandler
next []string
chanJob chan GRBookJob
chanResult chan GRBookResult
results []GRBookResult
}
type GRBookJob struct {
index int
body string
}
type GRBookResult struct {
books []book
}
type book struct {
id int
name string
authors []string
}
func CDATA(s string) string {
if len(s) > 13 {
if s[:9] == "<![CDATA[" {
s = s[9:len(s)-3]
}
}
s = strings.Trim(s, "[]")
return s
}
func (b *RGoodReadsBook) Setup(keywordhandler *KeywordHandler, httphandler *HttpHandler) {
b.keywordhandler = keywordhandler
b.httphandler = httphandler
}
func (b *RGoodReadsBook) Handle(books []string) bool {
// setup target urls
targets := []string{}
for _, book := range books {
targets = append(targets, fmt.Sprintf("https://goodreads.com/book/show/%s.xml?key=%s&format=xml", book, key))
}
// get results
results := b.httphandler.Handle(targets, 1)
// setup channels + make sure we close them
b.chanJob = make(chan GRBookJob, len(results))
b.chanResult = make(chan GRBookResult, len(results))
defer func(){
close(b.chanJob)
close(b.chanResult)
}()
// create workers
for i := 0; i < len(results); i++ {
go GRBookWorker(i, b.chanJob, b.chanResult)
}
// feed jobs
for _, result := range results {
b.chanJob <- GRBookJob{result.index, result.body}
}
// wait and pull
for {
resp := <-b.chanResult
b.results = append(b.results, resp)
for _, book := range resp.books {
sId := strconv.Itoa(book.id)
b.next = append(b.next, sId)
b.keywordhandler.Include(book.name)
for _, author := range book.authors {
b.keywordhandler.Include(author)
}
}
if len(b.results) == len(results) {
return true
}
}
return false
}
func GRBookWorker(index int, chanJob <-chan GRBookJob, chanResult chan<- GRBookResult) {
for job := range chanJob {
document, _ := goquery.NewDocumentFromReader(strings.NewReader(job.body))
books := []book{}
document.Find("similar_books").Find("book").Each(func(index int, e *goquery.Selection) {
id, _ := strconv.Atoi(e.Find("id").Get(0).FirstChild.Data)
title := CDATA(e.Find("title").Get(0).FirstChild.Data)
authors := []string{}
e.Find("authors").Find("author").Each(func(inde int, a *goquery.Selection) {
authors = append(authors, CDATA(a.Find("name").Get(0).FirstChild.Data))
})
books = append(books, book{id, title, authors})
})
chanResult <- GRBookResult{books}
}
}