Skip to content

Commit

Permalink
fix(llm.gblib): Talk to data local db use fix.
Browse files Browse the repository at this point in the history
  • Loading branch information
rodrigorodriguez committed Dec 12, 2024
1 parent 173d473 commit 9cebd81
Showing 1 changed file with 21 additions and 15 deletions.
36 changes: 21 additions & 15 deletions packages/kb.gbapp/services/KBService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1571,10 +1571,19 @@ export class KBService implements IGBKBService {

return filePath; // Return the saved file path
} else {

// Get the last part of the URL path or default to 'index' if empty
const pathParts = parsedUrl.pathname.split('/').filter(Boolean); // Remove empty parts
const lastPath = pathParts.length > 0 ? pathParts[pathParts.length - 1] : 'index';
const flatLastPath = lastPath.replace(/\W+/g, '-'); // Flatten the last part of the path

const fileName = `${flatLastPath}.html`;
const filePath = path.join(directoryPath, fileName);

// Configure request interception before navigation
await page.setRequestInterception(true);
page.on('request', request => {
// Only allow document requests, block everything else
// Only allow document requests, block everything else
if (request.resourceType() === 'document') {
request.continue();
} else {
Expand All @@ -1583,21 +1592,18 @@ export class KBService implements IGBKBService {
});

// Navigate with strict timeout and wait for content
await page.goto(url, {
waitUntil: 'networkidle0', // Wait until network is idle
timeout: 30000 // 30 second timeout
});

const parsedUrl = new URL(url);

// Get the last part of the URL path or default to 'index' if empty
const pathParts = parsedUrl.pathname.split('/').filter(Boolean); // Remove empty parts
const lastPath = pathParts.length > 0 ? pathParts[pathParts.length - 1] : 'index';
const flatLastPath = lastPath.replace(/\W+/g, '-'); // Flatten the last part of the path

const fileName = `${flatLastPath}.html`;
const filePath = path.join(directoryPath, fileName);
// Navigate and get content even if page fails to load fully
let content = '';
try {
await page.goto(url, {
waitUntil: 'networkidle0', // Wait until network is idle
timeout: 30000 // 30 second timeout
});
} catch (err) {
// Ignore timeout/navigation errors
}

// Get whatever HTML content was loaded
const htmlContent = await page.content();

// Convert HTML to Markdown using html2md
Expand Down

0 comments on commit 9cebd81

Please sign in to comment.