Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
acharneski committed Dec 12, 2024
1 parent 302c6f5 commit 583970c
Show file tree
Hide file tree
Showing 30 changed files with 345 additions and 343 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import com.simiacryptus.jopenai.describe.Description
import com.simiacryptus.jopenai.models.ChatModel
import com.simiacryptus.skyenet.TabbedDisplay
import com.simiacryptus.skyenet.apps.plan.*
import com.simiacryptus.skyenet.apps.plan.file.FileModificationTask.FileModificationTaskConfigData
import com.simiacryptus.skyenet.apps.plan.tools.file.FileModificationTask.FileModificationTaskConfigData
import com.simiacryptus.skyenet.core.actors.ParsedActor
import com.simiacryptus.skyenet.core.platform.Session
import com.simiacryptus.skyenet.core.platform.model.User
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import com.simiacryptus.jopenai.ChatClient
import com.simiacryptus.jopenai.OpenAIClient
import com.simiacryptus.jopenai.models.ChatModel
import com.simiacryptus.skyenet.apps.plan.*
import com.simiacryptus.skyenet.apps.plan.file.InquiryTask.InquiryTaskConfigData
import com.simiacryptus.skyenet.apps.plan.tools.file.InquiryTask.InquiryTaskConfigData
import com.simiacryptus.skyenet.core.platform.Session
import com.simiacryptus.skyenet.core.platform.model.User
import com.simiacryptus.skyenet.util.MarkdownUtil
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,14 @@ package com.simiacryptus.skyenet.apps.plan

import com.simiacryptus.jopenai.describe.AbbrevWhitelistYamlDescriber
import com.simiacryptus.jopenai.models.ChatModel
import com.simiacryptus.skyenet.apps.plan.CommandAutoFixTask.CommandAutoFixTaskConfigData
import com.simiacryptus.skyenet.apps.plan.tools.CommandAutoFixTask.CommandAutoFixTaskConfigData
import com.simiacryptus.skyenet.apps.plan.PlanUtil.isWindows
import com.simiacryptus.skyenet.apps.plan.PlanningTask.PlanningTaskConfigData
import com.simiacryptus.skyenet.apps.plan.PlanningTask.TaskBreakdownResult
import com.simiacryptus.skyenet.apps.plan.tools.PlanningTask.PlanningTaskConfigData
import com.simiacryptus.skyenet.apps.plan.tools.PlanningTask.TaskBreakdownResult
import com.simiacryptus.skyenet.apps.plan.TaskType.Companion.getAvailableTaskTypes
import com.simiacryptus.skyenet.apps.plan.TaskType.Companion.getImpl
import com.simiacryptus.skyenet.apps.plan.file.FileModificationTask.FileModificationTaskConfigData
import com.simiacryptus.skyenet.apps.plan.tools.file.FileModificationTask.FileModificationTaskConfigData
import com.simiacryptus.skyenet.apps.plan.tools.CommandAutoFixTask
import com.simiacryptus.skyenet.core.actors.ParsedActor


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,24 +2,26 @@ package com.simiacryptus.skyenet.apps.plan

import com.fasterxml.jackson.databind.annotation.JsonDeserialize
import com.fasterxml.jackson.databind.annotation.JsonSerialize
import com.simiacryptus.skyenet.apps.plan.CommandAutoFixTask.CommandAutoFixTaskConfigData
import com.simiacryptus.skyenet.apps.plan.ForeachTask.ForeachTaskConfigData
import com.simiacryptus.skyenet.apps.plan.GoogleSearchTask.GoogleSearchTaskConfigData
import com.simiacryptus.skyenet.apps.plan.PlanningTask.PlanningTaskConfigData
import com.simiacryptus.skyenet.apps.plan.RunShellCommandTask.RunShellCommandTaskConfigData
import com.simiacryptus.skyenet.apps.plan.tools.CommandAutoFixTask.CommandAutoFixTaskConfigData
import com.simiacryptus.skyenet.apps.plan.tools.ForeachTask.ForeachTaskConfigData
import com.simiacryptus.skyenet.apps.plan.tools.GoogleSearchTask.GoogleSearchTaskConfigData
import com.simiacryptus.skyenet.apps.plan.tools.PlanningTask.PlanningTaskConfigData
import com.simiacryptus.skyenet.apps.plan.tools.RunShellCommandTask.RunShellCommandTaskConfigData
import com.simiacryptus.skyenet.apps.plan.file.*
import com.simiacryptus.skyenet.apps.plan.file.CodeOptimizationTask.CodeOptimizationTaskConfigData
import com.simiacryptus.skyenet.apps.plan.file.CodeReviewTask.CodeReviewTaskConfigData
import com.simiacryptus.skyenet.apps.plan.file.DocumentationTask.DocumentationTaskConfigData
import com.simiacryptus.skyenet.apps.plan.file.FileModificationTask.FileModificationTaskConfigData
import com.simiacryptus.skyenet.apps.plan.file.InquiryTask.InquiryTaskConfigData
import com.simiacryptus.skyenet.apps.plan.file.PerformanceAnalysisTask.PerformanceAnalysisTaskConfigData
import com.simiacryptus.skyenet.apps.plan.file.RefactorTask.RefactorTaskConfigData
import com.simiacryptus.skyenet.apps.plan.file.SecurityAuditTask.SecurityAuditTaskConfigData
import com.simiacryptus.skyenet.apps.plan.file.TestGenerationTask.TestGenerationTaskConfigData
import com.simiacryptus.skyenet.apps.plan.knowledge.EmbeddingSearchTask
import com.simiacryptus.skyenet.apps.plan.knowledge.KnowledgeIndexingTask
import com.simiacryptus.skyenet.apps.plan.knowledge.WebSearchAndIndexTask
import com.simiacryptus.skyenet.apps.plan.tools.file.CodeOptimizationTask.CodeOptimizationTaskConfigData
import com.simiacryptus.skyenet.apps.plan.tools.file.CodeReviewTask.CodeReviewTaskConfigData
import com.simiacryptus.skyenet.apps.plan.tools.file.DocumentationTask.DocumentationTaskConfigData
import com.simiacryptus.skyenet.apps.plan.tools.file.FileModificationTask.FileModificationTaskConfigData
import com.simiacryptus.skyenet.apps.plan.tools.file.InquiryTask.InquiryTaskConfigData
import com.simiacryptus.skyenet.apps.plan.tools.file.PerformanceAnalysisTask.PerformanceAnalysisTaskConfigData
import com.simiacryptus.skyenet.apps.plan.tools.file.RefactorTask.RefactorTaskConfigData
import com.simiacryptus.skyenet.apps.plan.tools.file.SecurityAuditTask.SecurityAuditTaskConfigData
import com.simiacryptus.skyenet.apps.plan.tools.file.TestGenerationTask.TestGenerationTaskConfigData
import com.simiacryptus.skyenet.apps.plan.tools.knowledge.EmbeddingSearchTask
import com.simiacryptus.skyenet.apps.plan.tools.knowledge.KnowledgeIndexingTask
import com.simiacryptus.skyenet.apps.plan.tools.knowledge.WebSearchAndIndexTask
import com.simiacryptus.skyenet.apps.plan.tools.*
import com.simiacryptus.skyenet.apps.plan.tools.file.*
import com.simiacryptus.util.DynamicEnum
import com.simiacryptus.util.DynamicEnumDeserializer
import com.simiacryptus.util.DynamicEnumSerializer
Expand Down
Original file line number Diff line number Diff line change
@@ -1,150 +1,6 @@
package com.simiacryptus.skyenet.apps.plan

import com.simiacryptus.jopenai.API
import com.simiacryptus.jopenai.ChatClient
import com.simiacryptus.jopenai.OpenAIClient
import com.simiacryptus.jopenai.describe.Description
import com.simiacryptus.skyenet.core.actors.SimpleActor
import com.simiacryptus.skyenet.util.MarkdownUtil
import com.simiacryptus.skyenet.webui.session.SessionTask
import org.apache.hc.client5.http.classic.methods.HttpGet
import org.apache.hc.client5.http.impl.classic.HttpClients
import org.apache.hc.core5.http.io.entity.EntityUtils
import org.jsoup.Jsoup
import org.jsoup.nodes.Document
import org.jsoup.nodes.Node
import org.jsoup.select.NodeFilter.FilterResult
import org.slf4j.LoggerFactory

open class WebFetchAndTransformTask(
planSettings: PlanSettings,
planTask: WebFetchAndTransformTaskConfigData?
) : AbstractTask<WebFetchAndTransformTask.WebFetchAndTransformTaskConfigData>(planSettings, planTask) {
class WebFetchAndTransformTaskConfigData(
@Description("The URL to fetch")
val url: String,
@Description("The desired format or focus for the transformation")
val transformationGoal: String,
task_description: String? = null,
task_dependencies: List<String>? = null,
state: TaskState? = null,
) : TaskConfigBase(
task_type = TaskType.WebFetchAndTransform.name,
task_description = task_description,
task_dependencies = task_dependencies,
state = state
)

override fun promptSegment() = """
WebFetchAndTransform - Fetch a web page, strip HTML, and transform content
** Specify the URL to fetch
** Specify the desired format or focus for the transformation
""".trimMargin()

override fun run(
agent: PlanCoordinator,
messages: List<String>,
task: SessionTask,
api: ChatClient,
resultFn: (String) -> Unit,
api2: OpenAIClient,
planSettings: PlanSettings
) {
val fetchedContent = fetchAndStripHtml(taskConfig?.url ?: "")
val transformedContent = transformContent(fetchedContent, taskConfig?.transformationGoal ?: "", api, planSettings)
task.add(MarkdownUtil.renderMarkdown(transformedContent, ui = agent.ui))
resultFn(transformedContent)
}

private fun fetchAndStripHtml(url: String): String {
HttpClients.createDefault().use { httpClient ->
val httpGet = HttpGet(url)
httpClient.execute(httpGet).use { response ->
val entity = response.entity
val content = EntityUtils.toString(entity)
return scrubHtml(content)
}
}
}

private fun transformContent(content: String, transformationGoal: String, api: API, planSettings: PlanSettings): String {
val prompt = """
Transform the following web content according to this goal: $transformationGoal
Content:
$content
Transformed content:
""".trimIndent()
return SimpleActor(
prompt = prompt,
model = planSettings.defaultModel,
).answer(
listOf(
"""
|Transform the following web content according to this goal: $transformationGoal
|
|$content
""".trimMargin(),
), api
)
}

companion object {
private val log = LoggerFactory.getLogger(WebFetchAndTransformTask::class.java)
fun scrubHtml(str: String, maxLength: Int = 100 * 1024): String {
val document: Document = Jsoup.parse(str)
// Remove unnecessary elements, attributes, and optimize the document
document.apply {
if (document.body().html().length > maxLength) return@apply
select("script, style, link, meta, iframe, noscript").remove() // Remove unnecessary and potentially harmful tags
outputSettings().prettyPrint(false) // Disable pretty printing for compact output
if (document.body().html().length > maxLength) return@apply
// Remove comments
select("*").forEach { it.childNodes().removeAll { node -> node.nodeName() == "#comment" } }
if (document.body().html().length > maxLength) return@apply
// Remove data-* attributes
select("*[data-*]").forEach { it.attributes().removeAll { attr -> attr.key.startsWith("data-") } }
if (document.body().html().length > maxLength) return@apply
select("*").forEach { element ->
val importantAttributes = setOf("href", "src", "alt", "title", "width", "height", "style", "class", "id", "name")
element.attributes().removeAll { it.key !in importantAttributes }
}
if (document.body().html().length > maxLength) return@apply
// Remove empty elements
select("*").filter { node, depth ->
if(node.text().isBlank() && node.attributes().isEmpty() && !node.hasAttr("img")) FilterResult.REMOVE else FilterResult.CONTINUE
}
if (document.body().html().length > maxLength) return@apply
// Unwrap single-child elements with no attributes
select("*").forEach { element ->
if (element.childNodes().size == 1 && element.childNodes()[0].nodeName() == "#text" && element.attributes().isEmpty()) {
element.unwrap()
}
}
if (document.body().html().length > maxLength) return@apply
// Convert relative URLs to absolute
select("[href],[src]").forEach { element ->
element.attr("href").let { href -> element.attr("href", href) }
element.attr("src").let { src -> element.attr("src", src) }
}
if (document.body().html().length > maxLength) return@apply
// Remove empty attributes
select("*").forEach { element ->
element.attributes().removeAll { it.value.isBlank() }
}
}

// Truncate if necessary
val result = document.body().html()
return if (result.length > maxLength) {
result.substring(0, maxLength)
} else {
result
}
}
}
}

private fun Node.text(): String {
return this.childNodes().joinToString("") { it.text() }
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package com.simiacryptus.skyenet.apps.plan
package com.simiacryptus.skyenet.apps.plan.tools

import com.simiacryptus.jopenai.ChatClient
import com.simiacryptus.jopenai.OpenAIClient
Expand All @@ -7,7 +7,7 @@ import com.simiacryptus.jopenai.models.ChatModel
import com.simiacryptus.skyenet.Retryable
import com.simiacryptus.skyenet.apps.general.CmdPatchApp
import com.simiacryptus.skyenet.apps.general.PatchApp
import com.simiacryptus.skyenet.apps.plan.CommandAutoFixTask.CommandAutoFixTaskConfigData
import com.simiacryptus.skyenet.apps.plan.*
import com.simiacryptus.skyenet.util.MarkdownUtil
import com.simiacryptus.skyenet.webui.session.SessionTask
import org.slf4j.LoggerFactory
Expand Down Expand Up @@ -146,7 +146,7 @@ ${settings.commandAutoFixCommands?.joinToString("\n") { " * ${File(it).name}"
) {
onComplete()
}
if(autoRetries-- > 0) retryable?.retry()
if (autoRetries-- > 0) retryable?.retry()
s
})
task.placeholder
Expand Down
Loading

0 comments on commit 583970c

Please sign in to comment.