diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 7aeba50a..9d424bff 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -13,5 +13,7 @@ jobs: distribution: 'temurin' - name: Grant execute permissions for gradlew run: chmod +x ./gradlew + - name: Set Gradle options + run: echo "GRADLE_OPTS='-Xmx4g'" >> $GITHUB_ENV - name: Build and test - run: ./gradlew build + run: ./gradlew build -PskipSass \ No newline at end of file diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index ac235bfc..8dc3355f 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -9,12 +9,14 @@ jobs: steps: - uses: actions/checkout@v2 - name: Set up JDK 11 - uses: actions/setup-java@v2 + - uses: actions/setup-java@v2 with: java-version: 11 distribution: 'temurin' - name: Grant execute permissions for gradlew run: chmod +x ./gradlew + - name: Set Gradle options + run: echo "GRADLE_OPTS='-Xmx4g'" >> $GITHUB_ENV - name: Publish to Maven Central env: GPG_PRIVATE_KEY: ${{ secrets.GPG_PRIVATE_KEY }} @@ -22,4 +24,4 @@ jobs: OSSRH_USERNAME: ${{ secrets.OSSRH_USERNAME }} OSSRH_PASSWORD: ${{ secrets.OSSRH_PASSWORD }} GITHUB_TOKEN: ${{ secrets._GITHUB_TOKEN }} - run: ./gradlew publish -x test --no-configuration-cache --no-daemon --no-build-cache --no-parallel + run: ./gradlew publish -PskipSass -x test --no-configuration-cache --no-daemon --no-build-cache --no-parallel diff --git a/.gitignore b/.gitignore index b925fc56..807493bc 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,4 @@ openai.key *.log *.log.* client_secret_google_oauth.json +settings.gradle.kts diff --git a/core/build.gradle.kts b/core/build.gradle.kts index 4560f284..13e45b77 100644 --- a/core/build.gradle.kts +++ b/core/build.gradle.kts @@ -27,7 +27,7 @@ kotlin { } val junit_version = "5.10.1" -val logback_version = "1.4.11" +val logback_version = "1.5.8" val jackson_version = "2.17.2" val hsqldb_version = "2.7.2" diff --git a/docs/Document_Indexing.md b/docs/Document_Indexing.md new file mode 100644 index 00000000..95d0d981 --- /dev/null +++ b/docs/Document_Indexing.md @@ -0,0 +1,63 @@ +# User Guide: Document Data Extraction and Query Index Creation + +This guide covers two main features: Document Data Extraction and Query Index Creation. These tools are designed to help you extract structured data from various document types and create searchable indexes for efficient querying. + +## 1. Document Data Extraction + +### Overview +The Document Data Extractor allows you to parse and extract structured information from PDF, TXT, MD, and HTML files. It uses AI to analyze the content and create a hierarchical JSON representation of the document's structure, entities, and metadata. + +### How to Use +1. In your IDE, right-click on a supported file (PDF, TXT, MD, or HTML) in the project explorer. +2. Select the "Document Data Extractor" option from the context menu. +3. A configuration dialog will appear with the following options: + - DPI: Set the resolution for image rendering (for PDFs). + - Max Pages: Limit the number of pages to process. + - Output Format: Choose the format for saved images (PNG, JPEG, GIF, BMP). + - Pages Per Batch: Set how many pages to process in each batch. + - Show Images: Toggle whether to display rendered images in the results. + - Save Image Files: Choose to save rendered images to disk. + - Save Text Files: Choose to save extracted text to disk. + - Save Final JSON: Choose to save the final parsed JSON to disk. +4. Click "OK" to start the extraction process. +5. A new browser window will open, showing the progress and results of the extraction. + +### Output +- The extracted data will be displayed in the browser, organized by pages or batches. +- If enabled, image files, text files, and the final JSON will be saved in an "output" directory next to the source file. +- The final JSON file will have a ".parsed.json" extension. + +## 2. Query Index Creation + +### Overview +The Query Index Creator takes the parsed JSON files from the Document Data Extractor and creates a binary index file that can be efficiently searched using embedding-based similarity search. + +### How to Use +1. In your IDE, select one or more ".parsed.json" files in the project explorer. +2. Right-click and choose the "Save As Query Index" option from the context menu. +3. A file chooser dialog will appear. Select the directory where you want to save the index file. +4. Click "OK" to start the conversion process. +5. A progress bar will show the status of the index creation. + +### Output +- A binary index file named "document.index.data" will be created in the selected output directory. +- This index file can be used for fast similarity searches on the extracted document data. + +## Using the Query Index + +Once you have created the query index, you can use it with the EmbeddingSearchTask to perform similarity searches on your document data. This allows you to quickly find relevant information across all your indexed documents. + +To use the EmbeddingSearchTask: +1. Set up your search query and parameters (e.g., distance type, number of results). +2. Point the task to your "document.index.data" file. +3. Run the search to get the most relevant results based on embedding similarity. + +## Tips and Best Practices + +1. For large documents, consider processing them in smaller batches by adjusting the "Max Pages" and "Pages Per Batch" settings. +2. Save the final JSON files when extracting data, as these are required to create the query index. +3. Organize your parsed JSON files in a dedicated folder to make it easier to select them when creating the query index. +4. When creating the query index, choose an output location that is easily accessible for your search tasks. +5. Experiment with different DPI settings for PDFs to balance image quality and processing speed. +6. Use the "Show Images" option during extraction to visually verify the content being processed, especially for PDFs. + diff --git a/gradle.properties b/gradle.properties index ada092f9..ac1df5c0 100644 --- a/gradle.properties +++ b/gradle.properties @@ -1,5 +1,5 @@ # Gradle Releases -> https://github.com/gradle/gradle/releases libraryGroup = com.simiacryptus.skyenet -libraryVersion = 1.2.6 +libraryVersion = 1.2.7 gradleVersion = 7.6.1 -kotlin.daemon.jvmargs=-Xmx2g +kotlin.daemon.jvmargs=-Xmx4g diff --git a/gradle/wrapper/gradle-wrapper.jar b/gradle/wrapper/gradle-wrapper.jar index 7f93135c..e6441136 100644 Binary files a/gradle/wrapper/gradle-wrapper.jar and b/gradle/wrapper/gradle-wrapper.jar differ diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties index a4413138..df97d72b 100644 --- a/gradle/wrapper/gradle-wrapper.properties +++ b/gradle/wrapper/gradle-wrapper.properties @@ -1,6 +1,6 @@ distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists -distributionUrl=https\://services.gradle.org/distributions/gradle-8.8-bin.zip +distributionUrl=https\://services.gradle.org/distributions/gradle-8.10.2-bin.zip networkTimeout=10000 validateDistributionUrl=true zipStoreBase=GRADLE_USER_HOME diff --git a/gradlew b/gradlew old mode 100644 new mode 100755 index 1aa94a42..b740cf13 --- a/gradlew +++ b/gradlew @@ -55,7 +55,7 @@ # Darwin, MinGW, and NonStop. # # (3) This script is generated from the Groovy template -# https://github.com/gradle/gradle/blob/HEAD/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt +# https://github.com/gradle/gradle/blob/HEAD/platforms/jvm/plugins-application/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt # within the Gradle project. # # You can find Gradle at https://github.com/gradle/gradle/. diff --git a/gradlew.bat b/gradlew.bat index 6689b85b..7101f8e4 100644 --- a/gradlew.bat +++ b/gradlew.bat @@ -43,11 +43,11 @@ set JAVA_EXE=java.exe %JAVA_EXE% -version >NUL 2>&1 if %ERRORLEVEL% equ 0 goto execute -echo. -echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. -echo. -echo Please set the JAVA_HOME variable in your environment to match the -echo location of your Java installation. +echo. 1>&2 +echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 1>&2 +echo. 1>&2 +echo Please set the JAVA_HOME variable in your environment to match the 1>&2 +echo location of your Java installation. 1>&2 goto fail @@ -57,11 +57,11 @@ set JAVA_EXE=%JAVA_HOME%/bin/java.exe if exist "%JAVA_EXE%" goto execute -echo. -echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% -echo. -echo Please set the JAVA_HOME variable in your environment to match the -echo location of your Java installation. +echo. 1>&2 +echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 1>&2 +echo. 1>&2 +echo Please set the JAVA_HOME variable in your environment to match the 1>&2 +echo location of your Java installation. 1>&2 goto fail diff --git a/kotlin/build.gradle.kts b/kotlin/build.gradle.kts index 93ce6341..916f23aa 100644 --- a/kotlin/build.gradle.kts +++ b/kotlin/build.gradle.kts @@ -48,8 +48,8 @@ dependencies { testRuntimeOnly(group = "org.junit.jupiter", name = "junit-jupiter-engine", version = "5.10.1") implementation(group = "org.slf4j", name = "slf4j-api", version = "2.0.16") - testImplementation(group = "ch.qos.logback", name = "logback-classic", version = "1.4.11") - testImplementation(group = "ch.qos.logback", name = "logback-core", version = "1.4.11") + testImplementation(group = "ch.qos.logback", name = "logback-classic", version = "1.5.8") + testImplementation(group = "ch.qos.logback", name = "logback-core", version = "1.5.8") testImplementation("org.ow2.asm:asm:9.6") diff --git a/webui/build.gradle.kts b/webui/build.gradle.kts index c90595f5..b6b60618 100644 --- a/webui/build.gradle.kts +++ b/webui/build.gradle.kts @@ -138,6 +138,9 @@ tasks { ) } } +tasks.withType().configureEach { + onlyIf { !project.hasProperty("skipSass") } +} val javadocJar by tasks.registering(Jar::class) { diff --git a/webui/src/main/kotlin/com/simiacryptus/skyenet/apps/plan/TaskType.kt b/webui/src/main/kotlin/com/simiacryptus/skyenet/apps/plan/TaskType.kt index fa2fde7f..4cfe64c1 100644 --- a/webui/src/main/kotlin/com/simiacryptus/skyenet/apps/plan/TaskType.kt +++ b/webui/src/main/kotlin/com/simiacryptus/skyenet/apps/plan/TaskType.kt @@ -39,6 +39,8 @@ class TaskType( val TaskPlanning = TaskType("TaskPlanning", PlanningTaskData::class.java) val Inquiry = TaskType("Inquiry", InquiryTaskData::class.java) + val Search = TaskType("Search", SearchTask.SearchTaskData::class.java) + val EmbeddingSearch = TaskType("EmbeddingSearch", EmbeddingSearchTask.EmbeddingSearchTaskData::class.java) val FileModification = TaskType("FileModification", FileModificationTaskData::class.java) val Documentation = TaskType("Documentation", DocumentationTaskData::class.java) val CodeReview = TaskType("CodeReview", CodeReviewTaskData::class.java) @@ -54,6 +56,8 @@ class TaskType( init { registerConstructor(CommandAutoFix) { settings, task -> CommandAutoFixTask(settings, task) } registerConstructor(Inquiry) { settings, task -> InquiryTask(settings, task) } + registerConstructor(Search) { settings, task -> SearchTask(settings, task) } + registerConstructor(EmbeddingSearch) { settings, task -> EmbeddingSearchTask(settings, task) } registerConstructor(FileModification) { settings, task -> FileModificationTask(settings, task) } registerConstructor(Documentation) { settings, task -> DocumentationTask(settings, task) } registerConstructor(RunShellCommand) { settings, task -> RunShellCommandTask(settings, task) } diff --git a/webui/src/main/kotlin/com/simiacryptus/skyenet/apps/plan/file/AbstractFileTask.kt b/webui/src/main/kotlin/com/simiacryptus/skyenet/apps/plan/file/AbstractFileTask.kt index 247d2027..55e893c4 100644 --- a/webui/src/main/kotlin/com/simiacryptus/skyenet/apps/plan/file/AbstractFileTask.kt +++ b/webui/src/main/kotlin/com/simiacryptus/skyenet/apps/plan/file/AbstractFileTask.kt @@ -1,6 +1,7 @@ package com.simiacryptus.skyenet.apps.plan.file import com.simiacryptus.diff.FileValidationUtils +import com.simiacryptus.jopenai.describe.Description import com.simiacryptus.skyenet.apps.plan.AbstractTask import com.simiacryptus.skyenet.apps.plan.PlanSettings import com.simiacryptus.skyenet.apps.plan.PlanTaskBase @@ -18,7 +19,9 @@ abstract class AbstractFileTask( task_type: String, task_description: String? = null, task_dependencies: List? = null, + @Description("The specific files to be used as input for the task") val input_files: List? = null, + @Description("The specific files to be generated as output for the task") val output_files: List? = null, state: TaskState? = TaskState.Pending, ) : PlanTaskBase( @@ -62,7 +65,7 @@ abstract class AbstractFileTask( companion object { private val log = org.slf4j.LoggerFactory.getLogger(AbstractFileTask::class.java) - private const val TRIPLE_TILDE = "```" + const val TRIPLE_TILDE = "```" } } \ No newline at end of file diff --git a/webui/src/main/kotlin/com/simiacryptus/skyenet/apps/plan/file/DocumentationTask.kt b/webui/src/main/kotlin/com/simiacryptus/skyenet/apps/plan/file/DocumentationTask.kt index a5ec7a44..8f9e57d6 100644 --- a/webui/src/main/kotlin/com/simiacryptus/skyenet/apps/plan/file/DocumentationTask.kt +++ b/webui/src/main/kotlin/com/simiacryptus/skyenet/apps/plan/file/DocumentationTask.kt @@ -1,4 +1,5 @@ package com.simiacryptus.skyenet.apps.plan.file +import com.simiacryptus.diff.addApplyFileDiffLinks import com.simiacryptus.jopenai.API import com.simiacryptus.jopenai.describe.Description @@ -37,6 +38,7 @@ class DocumentationTask( return """ Documentation - Generate documentation ** List input files/tasks to be examined + ** List output files to be modified or created with documentation """.trimMargin() } @@ -48,6 +50,14 @@ class DocumentationTask( Use a structured and consistent format that facilitates easy understanding and navigation. Include code examples where applicable, and explain the rationale behind key design decisions and algorithm choices. Document any known issues or areas for improvement, providing guidance for future developers on how to extend or maintain the code. + For existing files, provide documentation in the form of comments within the code. + For new files, create separate markdown files with the documentation. + Response format: + For existing files: Use ${TRIPLE_TILDE}diff code blocks with a header specifying the file path. + For new files: Use $TRIPLE_TILDE markdown blocks with a header specifying the new file path. + The diff format should use + for line additions, - for line deletions. + Include 2 lines of context before and after every change in diffs. + Separate code blocks with a single blank line. """.trimMargin(), model = planSettings.getTaskSettings(TaskType.Documentation).model ?: planSettings.defaultModel, temperature = planSettings.temperature, @@ -64,6 +74,10 @@ class DocumentationTask( api: API, resultFn: (String) -> Unit ) { + if (((planTask?.input_files ?: listOf()) + (planTask?.output_files ?: listOf())).isEmpty()) { + task.complete("No input or output files specified") + return + } val semaphore = Semaphore(0) val onComplete = { semaphore.release() @@ -76,22 +90,44 @@ class DocumentationTask( JsonUtil.toJson(plan), getPriorCode(planProcessingState), getInputFileCode(), - "Items to document: ${itemsToDocument.joinToString(", ")}" + "Items to document: ${itemsToDocument.joinToString(", ")}", + "Output files: ${planTask?.output_files?.joinToString(", ") ?: ""}" ).filter { it.isNotBlank() }, api ) resultFn(docResult) if (agent.planSettings.autoFix) { + val diffLinks = agent.ui.socketManager!!.addApplyFileDiffLinks( + root = agent.root, + response = docResult, + handle = { newCodeMap -> + newCodeMap.forEach { (path, newCode) -> + task.complete("$path Updated") + } + }, + ui = agent.ui, + api = api, + shouldAutoApply = { agent.planSettings.autoFix } + ) task.complete() onComplete() - MarkdownUtil.renderMarkdown("## Generated Documentation\n$docResult\nAuto-accepted", ui = agent.ui) + MarkdownUtil.renderMarkdown(diffLinks + "\n\n## Auto-applied documentation changes", ui = agent.ui) } else { MarkdownUtil.renderMarkdown( - "## Generated Documentation\n$docResult", - ui = agent.ui - ) + acceptButtonFooter(agent.ui) { - task.complete() - onComplete() - } + agent.ui.socketManager!!.addApplyFileDiffLinks( + root = agent.root, + response = docResult, + handle = { newCodeMap -> + newCodeMap.forEach { (path, newCode) -> + task.complete("$path Updated") + } + }, + ui = agent.ui, + api = api + ) + acceptButtonFooter(agent.ui) { + task.complete() + onComplete() + }, ui = agent.ui + ) } } Retryable(agent.ui, task = task, process = process) diff --git a/webui/src/main/kotlin/com/simiacryptus/skyenet/apps/plan/file/EmbeddingSearchTask.kt b/webui/src/main/kotlin/com/simiacryptus/skyenet/apps/plan/file/EmbeddingSearchTask.kt new file mode 100644 index 00000000..07fed4b9 --- /dev/null +++ b/webui/src/main/kotlin/com/simiacryptus/skyenet/apps/plan/file/EmbeddingSearchTask.kt @@ -0,0 +1,124 @@ +package com.simiacryptus.skyenet.apps.plan.file + +import com.simiacryptus.jopenai.API +import com.simiacryptus.jopenai.OpenAIClient +import com.simiacryptus.jopenai.describe.Description +import com.simiacryptus.jopenai.models.ApiModel +import com.simiacryptus.jopenai.opt.DistanceType +import com.simiacryptus.skyenet.apps.parsers.DocumentRecord +import com.simiacryptus.skyenet.apps.plan.* +import com.simiacryptus.skyenet.util.MarkdownUtil +import com.simiacryptus.skyenet.webui.session.SessionTask +import org.slf4j.LoggerFactory +import java.nio.file.FileSystems +import java.nio.file.Files +import kotlin.streams.asSequence + +class EmbeddingSearchTask( + planSettings: PlanSettings, + planTask: EmbeddingSearchTaskData? +) : AbstractTask(planSettings, planTask) { + class EmbeddingSearchTaskData( + @Description("The search query to look for in the embeddings") + val search_query: String, + @Description("The distance type to use for comparing embeddings (Euclidean, Manhattan, or Cosine)") + val distance_type: DistanceType = DistanceType.Cosine, + @Description("The number of top results to return") + val top_k: Int = 5, + @Description("The specific index files (or file patterns) to be searched") + val input_files: List? = null, + task_description: String? = null, + task_dependencies: List? = null, + state: TaskState? = null, + ) : PlanTaskBase( + task_type = "EmbeddingSearch", + task_description = task_description, + task_dependencies = task_dependencies, + state = state + ) + + override fun promptSegment() = """ +EmbeddingSearch - Search for similar embeddings in index files and provide top results + ** Specify the search query + ** Specify the distance type (Euclidean, Manhattan, or Cosine) + ** Specify the number of top results to return + ** List input index files or file patterns to be searched + """.trimMargin() + + override fun run( + agent: PlanCoordinator, + taskId: String, + userMessage: String, + plan: Map, + planProcessingState: PlanProcessingState, + task: SessionTask, + api: API, + resultFn: (String) -> Unit + ) { + val searchResults = performEmbeddingSearch(api as OpenAIClient) + val formattedResults = formatSearchResults(searchResults) + task.add(MarkdownUtil.renderMarkdown(formattedResults, ui = agent.ui)) + resultFn(formattedResults) + } + + private fun performEmbeddingSearch(api: OpenAIClient): List { + val queryEmbedding = api.createEmbedding(ApiModel.EmbeddingRequest( + input = planTask?.search_query ?: "", + model = (planSettings.getTaskSettings(TaskType.EmbeddingSearch).model + ?: planSettings.defaultModel).modelName + + )).data[0].embedding + val distanceType = planTask?.distance_type ?: DistanceType.Cosine + + return (planTask?.input_files ?: listOf()) + .flatMap { filePattern -> + val matcher = FileSystems.getDefault().getPathMatcher("glob:$filePattern") + Files.walk(root).asSequence() + .filter { path -> + matcher.matches(root.relativize(path)) && path.toString().endsWith(".index.data") + } + .flatMap { path -> + val records = DocumentRecord.readBinary(path.toString()) + records.mapNotNull { record -> + record.vector?.let { vector -> + EmbeddingSearchResult( + file = root.relativize(path).toString(), + record = record, + distance = distanceType.distance(vector, queryEmbedding ?: DoubleArray(0)) + ) + } + } + } + .toList() + } + .sortedBy { it.distance } + .take(planTask?.top_k ?: 5) + } + + private fun formatSearchResults(results: List): String { + return buildString { + appendLine("# Embedding Search Results") + appendLine() + results.forEachIndexed { index, result -> + appendLine("## Result ${index + 1}") + appendLine("- File: ${result.file}") + appendLine("- Distance: ${result.distance}") + appendLine("- Text: ${result.record.text}") + appendLine("- Type: ${result.record.type}") + appendLine("- Source Path: ${result.record.sourcePath}") + appendLine("- JSON Path: ${result.record.jsonPath}") + appendLine() + } + } + } + + data class EmbeddingSearchResult( + val file: String, + val record: DocumentRecord, + val distance: Double + ) + + companion object { + private val log = LoggerFactory.getLogger(EmbeddingSearchTask::class.java) + } +} \ No newline at end of file diff --git a/webui/src/main/kotlin/com/simiacryptus/skyenet/apps/plan/file/InquiryTask.kt b/webui/src/main/kotlin/com/simiacryptus/skyenet/apps/plan/file/InquiryTask.kt index d042ab80..6c56a0f2 100644 --- a/webui/src/main/kotlin/com/simiacryptus/skyenet/apps/plan/file/InquiryTask.kt +++ b/webui/src/main/kotlin/com/simiacryptus/skyenet/apps/plan/file/InquiryTask.kt @@ -1,5 +1,6 @@ package com.simiacryptus.skyenet.apps.plan.file +import com.simiacryptus.diff.FileValidationUtils import com.simiacryptus.jopenai.API import com.simiacryptus.jopenai.describe.Description import com.simiacryptus.jopenai.models.ApiModel @@ -11,32 +12,43 @@ import com.simiacryptus.skyenet.util.MarkdownUtil import com.simiacryptus.skyenet.webui.session.SessionTask import com.simiacryptus.util.JsonUtil import org.slf4j.LoggerFactory +import java.nio.file.FileSystems +import java.nio.file.Files import java.util.concurrent.Semaphore import java.util.concurrent.atomic.AtomicReference +import kotlin.streams.asSequence class InquiryTask( planSettings: PlanSettings, planTask: InquiryTaskData? -) : AbstractFileTask(planSettings, planTask) { +) : AbstractTask(planSettings, planTask) { class InquiryTaskData( @Description("The specific questions or topics to be addressed in the inquiry") val inquiry_questions: List? = null, @Description("The goal or purpose of the inquiry") val inquiry_goal: String? = null, + @Description("The specific files (or file patterns) to be used as input for the task") + val input_files: List? = null, task_description: String? = null, task_dependencies: List? = null, - input_files: List? = null, - output_files: List? = null, - state: TaskState? = null - ) : FileTaskBase( + state: TaskState? = null, + ) : PlanTaskBase( task_type = TaskType.Inquiry.name, task_description = task_description, task_dependencies = task_dependencies, - input_files = input_files, - output_files = output_files, state = state ) + override fun promptSegment() = if (planSettings.allowBlocking) """ + |Inquiry - Answer questions by reading in files and providing a summary that can be discussed with and approved by the user + | ** Specify the questions and the goal of the inquiry + | ** List input files to be examined when answering the questions + """.trimMargin() else """ + |Inquiry - Answer questions by reading in files and providing a report + | ** Specify the questions and the goal of the inquiry + | ** List input files to be examined when answering the questions + """.trimMargin() + private val inquiryActor by lazy { SimpleActor( name = "Inquiry", @@ -49,21 +61,15 @@ class InquiryTask( When generating insights, consider the existing project context and focus on information that is directly relevant and applicable. Focus on generating insights and information that support the task types available in the system (${ - planSettings.taskSettings.filter { it.value.enabled }.keys.joinToString(", ")}). + planSettings.taskSettings.filter { it.value.enabled }.keys.joinToString(", ") + }). This will ensure that the inquiries are tailored to assist in the planning and execution of tasks within the system's framework. """.trimMargin(), - model = planSettings.getTaskSettings(TaskType.valueOf(planTask?.task_type!!)).model - ?: planSettings.defaultModel, + model = planSettings.getTaskSettings(TaskType.valueOf(planTask?.task_type!!)).model ?: planSettings.defaultModel, temperature = planSettings.temperature, ) } - override fun promptSegment() = """ - |Inquiry - Answer questions by reading in files and providing a summary that can be discussed with and approved by the user - | ** Specify the questions and the goal of the inquiry - | ** List input files to be examined when answering the questions - """.trimMargin() - override fun run( agent: PlanCoordinator, taskId: String, @@ -128,6 +134,38 @@ class InquiryTask( resultFn(inquiryResult) } + private fun getInputFileCode(): String = + ((planTask?.input_files ?: listOf())) + .flatMap { pattern: String -> + val matcher = FileSystems.getDefault().getPathMatcher("glob:$pattern") + Files.walk(root).asSequence() + .filter { path -> + matcher.matches(root.relativize(path)) && + FileValidationUtils.isLLMIncludable(path.toFile()) + } + .map { path -> + root.relativize(path).toString() + } + .toList() + } + .distinct() + .sortedBy { it } + .joinToString("\n\n") { relativePath -> + val file = root.resolve(relativePath).toFile() + try { + """ + |# $relativePath + | + |${AbstractFileTask.TRIPLE_TILDE} + |${codeFiles[file.toPath()] ?: file.readText()} + |${AbstractFileTask.TRIPLE_TILDE} + """.trimMargin() + } catch (e: Throwable) { + log.warn("Error reading file: $relativePath", e) + "" + } + } + companion object { private val log = LoggerFactory.getLogger(InquiryTask::class.java) } diff --git a/webui/src/main/kotlin/com/simiacryptus/skyenet/apps/plan/file/SearchTask.kt b/webui/src/main/kotlin/com/simiacryptus/skyenet/apps/plan/file/SearchTask.kt new file mode 100644 index 00000000..c7e843f4 --- /dev/null +++ b/webui/src/main/kotlin/com/simiacryptus/skyenet/apps/plan/file/SearchTask.kt @@ -0,0 +1,134 @@ +package com.simiacryptus.skyenet.apps.plan.file + +import com.simiacryptus.diff.FileValidationUtils +import com.simiacryptus.jopenai.API +import com.simiacryptus.jopenai.describe.Description +import com.simiacryptus.skyenet.apps.plan.* +import com.simiacryptus.skyenet.util.MarkdownUtil +import com.simiacryptus.skyenet.webui.session.SessionTask +import org.slf4j.LoggerFactory +import java.nio.file.FileSystems +import java.nio.file.Files +import java.util.regex.Pattern +import kotlin.streams.asSequence + +class SearchTask( + planSettings: PlanSettings, + planTask: SearchTaskData? +) : AbstractTask(planSettings, planTask) { + class SearchTaskData( + @Description("The search pattern (substring or regex) to look for in the files") + val search_pattern: String, + @Description("Whether the search pattern is a regex (true) or a substring (false)") + val is_regex: Boolean = false, + @Description("The number of context lines to include before and after each match") + val context_lines: Int = 2, + @Description("The specific files (or file patterns) to be searched") + val input_files: List? = null, + task_description: String? = null, + task_dependencies: List? = null, + state: TaskState? = null, + ) : PlanTaskBase( + task_type = TaskType.Search.name, + task_description = task_description, + task_dependencies = task_dependencies, + state = state + ) + + override fun promptSegment() = """ +Search - Search for patterns in files and provide results with context + ** Specify the search pattern (substring or regex) + ** Specify whether the pattern is a regex or a substring + ** Specify the number of context lines to include + ** List input files or file patterns to be searched + """.trimMargin() + + override fun run( + agent: PlanCoordinator, + taskId: String, + userMessage: String, + plan: Map, + planProcessingState: PlanProcessingState, + task: SessionTask, + api: API, + resultFn: (String) -> Unit + ) { + val searchResults = performSearch() + val formattedResults = formatSearchResults(searchResults) + task.add(MarkdownUtil.renderMarkdown(formattedResults, ui = agent.ui)) + resultFn(formattedResults) + } + + private fun performSearch(): List { + val pattern = if (planTask?.is_regex == true) { + Pattern.compile(planTask.search_pattern) + } else { + Pattern.compile(Pattern.quote(planTask?.search_pattern)) + } + + return (planTask?.input_files ?: listOf()) + .flatMap { filePattern -> + val matcher = FileSystems.getDefault().getPathMatcher("glob:$filePattern") + Files.walk(root).asSequence() + .filter { path -> + matcher.matches(root.relativize(path)) && + FileValidationUtils.isLLMIncludable(path.toFile()) + } + .flatMap { path -> + val relativePath = root.relativize(path).toString() + val lines = Files.readAllLines(path) + lines.mapIndexed { index, line -> + if (pattern.matcher(line).find()) { + SearchResult( + file = relativePath, + lineNumber = index + 1, + matchedLine = line, + context = getContext(lines, index, planTask?.context_lines ?: 2) + ) + } else null + }.filterNotNull() + } + .toList() + } + } + + private fun getContext(lines: List, matchIndex: Int, contextLines: Int): List { + val start = (matchIndex - contextLines).coerceAtLeast(0) + val end = (matchIndex + contextLines + 1).coerceAtMost(lines.size) + return lines.subList(start, end) + } + + private fun formatSearchResults(results: List): String { + return buildString { + appendLine("# Search Results") + appendLine() + results.groupBy { it.file }.forEach { (file, fileResults) -> + appendLine("## $file") + appendLine() + fileResults.forEach { result -> + appendLine("### Line ${result.lineNumber}") + appendLine() + appendLine("```") + result.context.forEachIndexed { index, line -> + val lineNumber = result.lineNumber - (result.context.size / 2) + index + val prefix = if (lineNumber == result.lineNumber) ">" else " " + appendLine("$prefix $lineNumber: $line") + } + appendLine("```") + appendLine() + } + } + } + } + + data class SearchResult( + val file: String, + val lineNumber: Int, + val matchedLine: String, + val context: List + ) + + companion object { + private val log = LoggerFactory.getLogger(SearchTask::class.java) + } +} \ No newline at end of file