Skip to content

Commit

Permalink
Limit the index planner to only 10 concurrent readFile(...) which pre…
Browse files Browse the repository at this point in the history
…vents issues related to too many open files in large repositories

With this change I am able to run the planner on the linux kernel repository. Previously the planner would error out with exceptions related to "too many file handles" on linux or "file table overflow" on macos
  • Loading branch information
Rob Leidle committed Aug 12, 2024
1 parent 10e8608 commit 6cec89d
Show file tree
Hide file tree
Showing 3 changed files with 107 additions and 28 deletions.
23 changes: 12 additions & 11 deletions core/indexing/refreshIndex.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import crypto from "node:crypto";
import * as fs from "node:fs";
import { open, type Database } from "sqlite";
import sqlite3 from "sqlite3";
import plimit from "p-limit";
import { IndexTag, IndexingProgressUpdate } from "../index.js";
import { getIndexSqlitePath } from "../util/paths.js";
import {
Expand Down Expand Up @@ -143,7 +144,6 @@ async function getAddRemoveForTag(

const saved = await getSavedItemsForTag(tag);

const add: PathAndCacheKey[] = [];
const updateNewVersion: PathAndCacheKey[] = [];
const updateOldVersion: PathAndCacheKey[] = [];
const remove: PathAndCacheKey[] = [];
Expand Down Expand Up @@ -178,15 +178,16 @@ async function getAddRemoveForTag(
}
}

// Any leftover in current files need to be added
add.push(
...(await Promise.all(
Object.keys(files).map(async (path) => {
const fileContents = await readFile(path);
return { path, cacheKey: calculateHash(fileContents) };
}),
)),
);
// limit to only 10 concurrent file reads to avoid issues such as
// "too many file handles". A large number here does not improve
// throughput due to the nature of disk or network i/o -- huge
// amounts of readers generally does not improve performance
const limit = plimit(10);
const promises = Object.keys(files).map(async (path) => {
const fileContents = await limit(() => readFile(path));
return { path, cacheKey: calculateHash(fileContents) };
});
const add: PathAndCacheKey[] = await Promise.all(promises);

// Create the markComplete callback function
const db = await SqliteDb.get();
Expand Down Expand Up @@ -408,7 +409,7 @@ export async function getComputeDeleteAddRemove(
for await (const _ of globalCacheIndex.update(
tag,
results,
async () => {},
async () => { },
repoName,
)) {
}
Expand Down
107 changes: 92 additions & 15 deletions core/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions core/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -39,16 +39,15 @@
},
"dependencies": {
"@aws-sdk/client-bedrock-runtime": "^3.620.1",
"@aws-sdk/credential-providers": "^3.620.1",
"@aws-sdk/client-sagemaker-runtime": "^3.621.0",
"@aws-sdk/credential-providers": "^3.620.1",
"@continuedev/config-types": "^1.0.10",
"@continuedev/llm-info": "^1.0.1",
"@mozilla/readability": "^0.5.0",
"@octokit/rest": "^20.0.2",
"@typescript-eslint/eslint-plugin": "^7.8.0",
"@typescript-eslint/parser": "^7.8.0",
"@xenova/transformers": "2.14.0",
"jinja-js": "0.1.8",
"adf-to-md": "^1.1.0",
"async-mutex": "^0.5.0",
"axios": "^1.6.7",
Expand All @@ -63,6 +62,7 @@
"http-proxy-agent": "^7.0.1",
"https-proxy-agent": "^7.0.3",
"ignore": "^5.3.1",
"jinja-js": "0.1.8",
"js-tiktoken": "^1.0.8",
"jsdom": "^24.0.0",
"launchdarkly-node-client-sdk": "^3.2.0",
Expand All @@ -73,6 +73,7 @@
"ollama": "^0.4.6",
"onnxruntime-node": "1.14.0",
"openai": "^4.20.1",
"p-limit": "^6.1.0",
"pg": "^8.11.3",
"posthog-node": "^3.6.3",
"quick-lru": "^7.0.0",
Expand Down

0 comments on commit 6cec89d

Please sign in to comment.