diff --git a/packages/kb.gbapp/services/KBService.ts b/packages/kb.gbapp/services/KBService.ts index 8cad665b..b0accd81 100644 --- a/packages/kb.gbapp/services/KBService.ts +++ b/packages/kb.gbapp/services/KBService.ts @@ -1571,10 +1571,19 @@ export class KBService implements IGBKBService { return filePath; // Return the saved file path } else { + + // Get the last part of the URL path or default to 'index' if empty + const pathParts = parsedUrl.pathname.split('/').filter(Boolean); // Remove empty parts + const lastPath = pathParts.length > 0 ? pathParts[pathParts.length - 1] : 'index'; + const flatLastPath = lastPath.replace(/\W+/g, '-'); // Flatten the last part of the path + + const fileName = `${flatLastPath}.html`; + const filePath = path.join(directoryPath, fileName); + // Configure request interception before navigation await page.setRequestInterception(true); page.on('request', request => { - // Only allow document requests, block everything else + // Only allow document requests, block everything else if (request.resourceType() === 'document') { request.continue(); } else { @@ -1583,21 +1592,18 @@ export class KBService implements IGBKBService { }); // Navigate with strict timeout and wait for content - await page.goto(url, { - waitUntil: 'networkidle0', // Wait until network is idle - timeout: 30000 // 30 second timeout - }); - - const parsedUrl = new URL(url); - - // Get the last part of the URL path or default to 'index' if empty - const pathParts = parsedUrl.pathname.split('/').filter(Boolean); // Remove empty parts - const lastPath = pathParts.length > 0 ? pathParts[pathParts.length - 1] : 'index'; - const flatLastPath = lastPath.replace(/\W+/g, '-'); // Flatten the last part of the path - - const fileName = `${flatLastPath}.html`; - const filePath = path.join(directoryPath, fileName); + // Navigate and get content even if page fails to load fully + let content = ''; + try { + await page.goto(url, { + waitUntil: 'networkidle0', // Wait until network is idle + timeout: 30000 // 30 second timeout + }); + } catch (err) { + // Ignore timeout/navigation errors + } + // Get whatever HTML content was loaded const htmlContent = await page.content(); // Convert HTML to Markdown using html2md