forked from FabulousFabs/GoodReadsCrawler
-
Notifications
You must be signed in to change notification settings - Fork 0
/
GRCrawler.go
executable file
·115 lines (93 loc) · 2.97 KB
/
GRCrawler.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
/* PSA:
* Currently throttled. Using one worker only, because the API soft bans for excessive use.
* Change in ResponseHandler.go:59 if you want to.
* @to-do: Support for multiple API keys?
*/
package main
import (
"fmt"
"time"
"bufio"
"os"
"math"
)
func input(arg ... bool) string {
if len(arg) == 0 {
fmt.Printf("> ")
}
reader := bufio.NewReader(os.Stdin)
in, _ := reader.ReadString('\n')
return in[:len(in)-1]
}
func log(s string) {
t := time.Now()
fmt.Printf("[%d:%d:%d] %s\n", t.Hour(), t.Minute(), t.Second(), s)
}
func main() {
httphandler := HttpHandler{}
keywordhandler := KeywordHandler{}
// prompt baseline
log("Give me the GR-ID baseline, please.")
base := input()
log(fmt.Sprintf("Okay. I'm working on '%s', then. Thanks very much!", base))
// load baseline from GR API
b := RGoodReadsBook{}
b.Setup(&keywordhandler, &httphandler)
b.Handle([]string{base})
log("Loaded.")
// start crawling in second thread
log("Starting to crawl.")
poisonpill := false
go crawl(&httphandler, &keywordhandler, &poisonpill, b.next)
log("I'm crawling. Press any key to stop.")
input(false)
poisonpill = true
keywordhandler.Collapse()
log(fmt.Sprintf("Keywords total (unique): %d", len(keywordhandler.keywords)))
export(base, &keywordhandler)
}
func crawl(httphandler *HttpHandler, keywordhandler *KeywordHandler, poisonpill *bool, next []string) {
defer log("Alright, I stopped crawling.")
for {
if *poisonpill {
break
}
b := RGoodReadsBook{}
b.Setup(&*keywordhandler, &*httphandler)
b.Handle(next)
log(fmt.Sprintf("Keywords total (generic): %d", len(keywordhandler.keywords)))
next = b.next
}
}
func export(base string, keywordhandler *KeywordHandler) {
log("Exporting your keywords.")
files := int(math.Ceil(float64(len(keywordhandler.keywords)) / float64(999)))
fmt.Println(files)
for i := 0; i < files; i++ {
name := fmt.Sprintf("/users/fabianschneider/desktop/programming/go/GRCrawler/export/%s_%d.txt", base, i)
f, err := os.Create(name)
if err != nil {
fmt.Println(err)
}
bytes := []byte{}
for index, kw := range keywordhandler.keywords {
if index > i * 998 + 998 {
break
}
if index > i * 998 && index < i * 998 + 998 {
str := fmt.Sprintf("%s\n", kw)
bb := []byte(str)
for _, bbb := range bb {
bytes = append(bytes, bbb)
}
}
}
b, err2 := f.Write(bytes)
if err2 != nil {
fmt.Println(err2)
}
f.Close()
log(fmt.Sprintf("File%d of %d: %d bytes written.", (i+1), files, b))
}
log("Done! Hope it works well. Bye now!")
}