Skip to content

Commit

Permalink
fix: getLinkWithPuppeteer timeout
Browse files Browse the repository at this point in the history
  • Loading branch information
sjdonado committed Nov 24, 2024
1 parent 3c4282d commit 9fef2e7
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 23 deletions.
2 changes: 2 additions & 0 deletions src/adapters/youtube.ts
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ export async function getYouTubeLink(query: string, metadata: SearchMetadata) {
};
});

return; // TEMPFIX: youtube is blocked

const link = await getLinkWithPuppeteer(
url.toString(),
'ytmusic-card-shelf-renderer a',
Expand Down
64 changes: 41 additions & 23 deletions src/utils/scraper.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,42 +10,60 @@ async function launchBrowser(): Promise<Browser> {
return browser;
}

async function withTimeout<T>(promise: Promise<T>, timeout: number): Promise<T> {
return Promise.race([
promise,
new Promise<never>((_, reject) =>
setTimeout(() => reject(new Error('Operation timed out')), timeout)
),
]);
}

export async function getLinkWithPuppeteer(
url: string,
linkQuerySelector: string,
cookies: CookieParam[] = []
cookies: CookieParam[] = [],
timeout: number = 4000 // Default timeout of 4 seconds
) {
const browser = await launchBrowser();
const page: Page = await browser.newPage();

await page.setUserAgent(
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:124.0) Gecko/20100101 Firefox/124.0'
);
try {
await page.setUserAgent(
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:124.0) Gecko/20100101 Firefox/124.0'
);

await page.setCookie(...cookies);
await page.setRequestInterception(true);
await page.setCookie(...cookies);
await page.setRequestInterception(true);

page.on('request', request => {
if (request.resourceType() === 'image') {
request.abort();
} else {
request.continue();
}
});
page.on('request', request => {
if (request.resourceType() === 'image') {
request.abort();
} else {
request.continue();
}
});

await page.setViewport({ width: 768, height: 600 });
await page.goto(url, { waitUntil: 'networkidle0' });
await page.setViewport({ width: 768, height: 600 });

const href = await page.evaluate(
// eslint-disable-next-line
// @ts-ignore
selector => document.querySelector(selector)?.href,
linkQuerySelector
);
// Use timeout for the page.goto operation
await withTimeout(page.goto(url, { waitUntil: 'networkidle0' }), timeout);

page.close();
// Use timeout for the page.evaluate operation
const href = await withTimeout(
page.evaluate(
// eslint-disable-next-line
// @ts-ignore
selector => document.querySelector(selector)?.href,
linkQuerySelector
),
timeout
);

return href;
return href;
} finally {
page.close();
}
}

export function getCheerioDoc(html: string) {
Expand Down

0 comments on commit 9fef2e7

Please sign in to comment.