-
Notifications
You must be signed in to change notification settings - Fork 0
/
scrapeNh.ts
98 lines (78 loc) · 2.38 KB
/
scrapeNh.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import StealthPlugin from 'puppeteer-extra-plugin-stealth'
import puppeteer from 'puppeteer-extra'
import UserAgent from 'user-agents'
import { NhService } from './src/domains/nh-helper/NhService'
import { myEnvs } from './src/utils/myEnvs'
import { sleep } from './src/utils/sleep'
puppeteer.use(StealthPlugin())
const userId = 'cldmecnxg0000j5kknybcpnl1'
// await
async function execute() {
const service = new NhService()
const browser = await puppeteer.launch({
headless: false,
})
const page = await browser.newPage()
await page.setUserAgent(new UserAgent().random().toString())
const NH_URL = myEnvs.NH_URL
if (NH_URL === undefined) {
return console.log('NhService: NH_URL is undefined')
}
await page.goto(NH_URL)
// Set screen size
await page.setViewport({ width: 1080, height: 900 })
let isFavorites = page.url()
while (!isFavorites.includes('favorites')) {
await sleep(1000)
isFavorites = page.url()
}
// fa fa-chevron-right
let hasNext = await page.$('.fa.fa-chevron-right')
while (hasNext) {
const currentUrl = page.url()
const pathnames = await page.evaluate(() => {
const elements = document.querySelectorAll('a.cover')
const urls = []
for (let i = 0; i < elements.length; i++) {
urls.push(elements[i].getAttribute('href'))
}
return urls
})
for (const pathname of pathnames) {
if (pathname === null) {
continue
}
await page.goto(NH_URL + pathname)
// await class="tag-container field-name "
const authorPathname = await page.evaluate(() => {
const infoDiv = document.querySelector('#info')
if (infoDiv === null) {
return ''
}
const links = infoDiv.querySelectorAll('a')
const authorLink = Array.from(links).find((l) =>
l.href.includes('artist')
)
if (authorLink === undefined) {
return ''
}
return authorLink.getAttribute('href')
})
if (!authorPathname) {
continue
}
await service.saveFavorite({
authorUrl: NH_URL + authorPathname,
url: NH_URL + pathname,
userId: userId,
})
}
await page.goto(currentUrl)
await page.click('.fa.fa-chevron-right')
await sleep(1000)
hasNext = await page.$('.fa.fa-chevron-right')
}
await browser.close()
console.log('success')
}
execute()