strings

SimiaCryptus · Dec 14, 2024 · fcb98a2 · fcb98a2
1 parent 78d4343
commit fcb98a2
Show file tree

Hide file tree

Showing 59 changed files with 1,444 additions and 1,783 deletions.
diff --git a/core/src/main/kotlin/com/simiacryptus/skyenet/core/actors/CodingActor.kt b/core/src/main/kotlin/com/simiacryptus/skyenet/core/actors/CodingActor.kt
@@ -106,10 +106,7 @@ ${details ?: ""}
     get() = this.symbols.map { (name, utilityObj) ->
       val describe = this.describer.describe(utilityObj.javaClass)
       log.info("Describing $name (${utilityObj.javaClass}) in ${describe.length} characters")
-      """
- $name:
-     ${describe.indent("    ")}
- """.trimMargin().trim()
+      "$name:\n    ${describe.indent("    ")}"
     }.joinToString("\n")
 
 
@@ -163,8 +160,8 @@ ${details ?: ""}
       val blocks = extractTextBlocks(respondWithCode)
       val renderedResponse = getRenderedResponse(blocks)
       val codedInstruction = codeInterceptor(getCode(language, blocks))
-      log.debug("Response: \n\t${renderedResponse.replace("\n", "\n\t", false)}".trimMargin())
-      log.debug("New Code: \n\t${codedInstruction.replace("\n", "\n\t", false)}".trimMargin())
+      log.debug("Response: \n\t${renderedResponse.replace("\n", "\n\t", false)}")
+      log.debug("New Code: \n\t${codedInstruction.replace("\n", "\n\t", false)}")
       result = CodeResultImpl(
         *messages,
         input = input,
@@ -254,8 +251,8 @@ ${details ?: ""}
           val codeBlocks = extractTextBlocks(chat(api, request, model))
           val renderedResponse = getRenderedResponse(codeBlocks)
           val codedInstruction = codeInterceptor(getCode(language, codeBlocks))
-          log.debug("Response: \n\t${renderedResponse.replace("\n", "\n\t", false)}".trimMargin())
-          log.debug("New Code: \n\t${codedInstruction.replace("\n", "\n\t", false)}".trimMargin())
+          log.debug("Response: \n\t${renderedResponse.replace("\n", "\n\t", false)}")
+          log.debug("New Code: \n\t${codedInstruction.replace("\n", "\n\t", false)}")
           var workingCode = codedInstruction
           var workingRenderedResponse = renderedResponse
           for (fixAttempt in 0..input.fixIterations) {
@@ -286,16 +283,8 @@ ${TT}
               val codeBlocks = extractTextBlocks(respondWithCode)
               workingRenderedResponse = getRenderedResponse(codeBlocks)
               workingCode = codeInterceptor(getCode(language, codeBlocks))
-              log.debug(
-                "Response: \n\t${
-                  workingRenderedResponse.replace(
-                    "\n",
-                    "\n\t",
-                    false
-                  )
-                }".trimMargin()
-              )
-              log.debug("New Code: \n\t${workingCode.replace("\n", "\n\t", false)}".trimMargin())
+              log.debug("Response: \n\t" + workingRenderedResponse.replace("\n", "\n\t", false))
+              log.debug("New Code: \n\t${workingCode.replace("\n", "\n\t", false)}")
             }
           }
         } catch (ex: FailedToImplementException) {
@@ -424,7 +413,7 @@ Correct the code and try again.
       if (textSegments.size == 1) return textSegments.joinToString("\n") { it.second }
       return textSegments.joinToString("\n") {
         if (it.first.lowercase() == "code" || it.first.lowercase() == language.lowercase()) {
-          it.second.trimMargin().trim()
+          it.second
         } else {
           ""
         }
@@ -487,13 +476,11 @@ Correct the code and try again.
     }
 
     fun errorMessage(ex: ScriptException, code: String) = try {
-      """
-          |${TT}text
-          |${ex.message ?: ""} at line ${ex.lineNumber} column ${ex.columnNumber}
-          |  ${if (ex.lineNumber > 0) code.split("\n")[ex.lineNumber - 1] else ""}
-          |  ${if (ex.columnNumber > 0) " ".repeat(ex.columnNumber - 1) + "^" else ""}
-          |${TT}
-          """.trimMargin().trim()
+      "${TT}text\n${ex.message ?: ""} at line ${ex.lineNumber} column ${ex.columnNumber}\n  ${if (ex.lineNumber > 0) code.split("\n")[ex.lineNumber - 1] else ""}\n  ${
+        if (ex.columnNumber > 0) " ".repeat(
+          ex.columnNumber - 1
+        ) + "^" else ""
+      }\n${TT}".trim()
     } catch (_: Exception) {
       ex.message ?: ""
     }

diff --git a/core/src/main/kotlin/com/simiacryptus/skyenet/core/actors/LargeOutputActor.kt b/core/src/main/kotlin/com/simiacryptus/skyenet/core/actors/LargeOutputActor.kt
@@ -1,10 +1,13 @@
 package com.simiacryptus.skyenet.core.actors
 
+import com.google.common.base.Strings.commonPrefix
 import com.simiacryptus.jopenai.API
 import com.simiacryptus.jopenai.models.ApiModel
+import com.simiacryptus.jopenai.models.ApiModel.Role
 import com.simiacryptus.jopenai.models.ChatModel
 import com.simiacryptus.jopenai.models.OpenAIModels
 import com.simiacryptus.jopenai.models.TextModel
+import com.simiacryptus.jopenai.util.ClientUtil.toChatMessage
 import com.simiacryptus.jopenai.util.ClientUtil.toContentList
 
 /**
@@ -13,135 +16,127 @@ import com.simiacryptus.jopenai.util.ClientUtil.toContentList
  * then recursively expands the result by searching for the pattern and making additional LLM calls.
  */
 class LargeOutputActor(
-    prompt: String = """
-        When generating large responses, please:
-        1. Break down the content into logical sections
-        2. Use named ellipsis markers like '...sectionName...' to indicate where content needs expansion
-        3. Keep each section focused and concise
-        4. Use descriptive section names that reflect the content
-
-        ## Example format:
-        
-        ```markdown
-        # Topic Title
-        ## Overview
-        Here's an overview of the topic ...introduction...
-        ## Main Points
-        The first important aspect is ...mainPoints...
-        ## Technical Details
-        For technical details, ...technicalDetails...
-        ## Conclusion
-        To conclude, ...conclusion...
-        ```
-        
-        Note: Each '...sectionName...' will be expanded in subsequent iterations.
+  prompt: String = """
+        You are a long-form content writer. You have been tasked with writing a comprehensive guide on a topic.
+        1. Break down the content into logical sections using markdown formatting and headers.
+        2. To support large content generation, use markers to indicate where content needs expansion.
+        3. Expansion markers should use a line formatted like '...sectionName...' to indicate where detailed content should be inserted.
+        4. Use descriptive and unique section names that reflect the content expected in that section.
+        5. For the initial iteration, provide a high level document structure with a few expansion markers. Each '...sectionName...' will be expanded in subsequent iterations.
     """.trimIndent(),
-    name: String? = null,
-    model: TextModel = OpenAIModels.GPT4o,
-    temperature: Double = 0.3,
-    private val maxIterations: Int = 5,
-    private val namedEllipsisPattern: Regex = Regex("""\.\.\.(?<sectionName>[\w\s-]+?)\.\.\.""")
+  name: String? = null,
+  model: TextModel = OpenAIModels.GPT4o,
+  temperature: Double = 0.3, private val maxIterations: Int = 3, private val namedEllipsisPattern: Regex = Regex("""\.\.\.(?<sectionName>[\w\s-_]+?)\.\.\.""")
 ) : BaseActor<List<String>, String>(
-    prompt = prompt,
-    name = name,
-    model = model,
-    temperature = temperature
+  prompt = prompt, name = name, model = model, temperature = temperature
 ) {
 
-    override fun chatMessages(questions: List<String>): Array<ApiModel.ChatMessage> {
-        val systemMessage = ApiModel.ChatMessage(
-            role = ApiModel.Role.system,
-            content = prompt.toContentList()
+  override fun chatMessages(questions: List<String>): Array<ApiModel.ChatMessage> {
+    val systemMessage = ApiModel.ChatMessage(
+      role = Role.system, content = prompt.toContentList()
+    )
+    val userMessages = questions.map {
+      ApiModel.ChatMessage(
+        role = Role.user, content = it.toContentList()
+      )
+    }
+    return arrayOf(systemMessage) + userMessages
+  }
+
+  override fun respond(input: List<String>, api: API, vararg messages: ApiModel.ChatMessage): String {
+    var accumulatedResponse = ""
+    var iterations = 0
+    while (iterations < maxIterations) {
+      if (accumulatedResponse.isEmpty()) {
+        accumulatedResponse = response(*messages, api = api).choices.first().message?.content?.trim() ?: throw RuntimeException("No response from LLM")
+      }
+      val matches = namedEllipsisPattern.findAll(accumulatedResponse).toMutableList()
+      if (matches.isEmpty()) break
+      val pairs = matches.mapNotNull { matchResult ->
+        val nextSection = matchResult.groups["sectionName"]?.value ?: return@mapNotNull null
+        val contextLines = 100
+        val contextChars = 10000
+        Pair(
+          matchResult, response(
+            *(listOf(
+              """
+              You are a long-form content writer. You have been tasked with writing a comprehensive guide on a topic by filling in a detail section.
+              1. Break down the content into logical sections using markdown formatting and headers.
+              2. To support large content generation, use markers to indicate where content needs expansion.
+              3. Expansion markers should use a line formatted like '...sectionName...' to indicate where detailed content should be inserted.
+              4. Use descriptive and unique section names that reflect the content expected in that section.
+              """.trimIndent().toChatMessage(Role.system)
+            ) + messages.toList().drop(1) + listOf(
+              ApiModel.ChatMessage(
+                role = Role.user, content = ("""
+                  Previous context:
+                  
+                  ```
+                  """.trimIndent() + accumulatedResponse.substring(0, matchResult.range.first).lines().takeLast(contextLines).joinToString { "  $it" }.takeLast(contextChars) + """
+                  ```
+                  
+                  Continue the section '""".trimIndent() + nextSection + """'
+                  Make sure the response flows naturally with the existing content.
+                  It should end so that it matches the next section, provided below:
+                  
+                  ```
+                  """.trimIndent() + accumulatedResponse.substring(matchResult.range.last).lines().take(contextLines).joinToString { "  $it" }.take(contextChars) + """
+                  ```
+                  """.trimIndent()).toContentList()
+              )
+            )).toTypedArray(), api = api
+          )
         )
-        val userMessages = questions.map {
-            ApiModel.ChatMessage(
-                role = ApiModel.Role.user,
-                content = it.toContentList()
-            )
+      }
+      accumulatedResponse = pairs.reversed().fold(accumulatedResponse) { acc, (match, response) ->
+        val original = response.choices.first().message?.content?.trim() ?: ""
+        var replacement = original
+        if (replacement.isEmpty()) return acc
+        //val replaced = acc.substring(match.range)
+        if (replacement.startsWith("```")) {
+          replacement = replacement.lines().drop(1).reversed().dropWhile { !it.startsWith("```") }.drop(1).reversed().joinToString("\n")
         }
-        return arrayOf(systemMessage) + userMessages
+        val prefix = acc.substring(0, match.range.first)
+        val suffix = acc.substring(match.range.last)
+        val commonPrefix = commonPrefix(prefix, replacement)
+        if (commonPrefix.isNotBlank() && commonPrefix.contains('\n')) replacement = replacement.substring(commonPrefix.length)
+        val largestCommonSubstring = largestCommonSubstring(replacement, suffix)
+        if (largestCommonSubstring.isNotBlank()) replacement = replacement.substring(0, replacement.indexOf(largestCommonSubstring))
+        val replaceRange = acc.replaceRange(match.range, replacement)
+        replaceRange
+      }
+      iterations++
     }
+    return accumulatedResponse
+  }
 
-    override fun respond(input: List<String>, api: API, vararg messages: ApiModel.ChatMessage): String {
-        var accumulatedResponse = ""
-        var currentMessages = messages.toList()
-        var iterations = 0
-        var previousContext = ""
-        var processedSections = mutableSetOf<String>()
-
-        while (iterations < maxIterations) {
-            val response = response(*currentMessages.toTypedArray(), api = api).choices.first().message?.content
-                ?: throw RuntimeException("No response from LLM")
-
-            // Replace the ellipsis in the accumulated response with the new content
-            if (previousContext.isNotEmpty()) {
-                val lastEllipsis = namedEllipsisPattern.find(accumulatedResponse)
-                if (lastEllipsis != null) {
-                    accumulatedResponse = accumulatedResponse.replaceRange(
-                        lastEllipsis.range.first,
-                        lastEllipsis.range.last + 1,
-                        response.trim()
-                    )
-                }
-            } else {
-                accumulatedResponse = response.trim()
-            }
-
-            val matches = namedEllipsisPattern.findAll(response)
-                .mapNotNull { it.groups["sectionName"]?.value }
-                .filter { it !in processedSections }
-                .toList()
-
-            if (matches.isNotEmpty()) {
-                val nextSection = matches.first()
-                processedSections.add(nextSection)
+  override fun withModel(model: ChatModel): LargeOutputActor {
+    return LargeOutputActor(
+      prompt = this.prompt,
+      name = this.name,
+      model = model,
+      temperature = this.temperature,
+      maxIterations = this.maxIterations,
+      namedEllipsisPattern = this.namedEllipsisPattern
+    )
+  }
+}
 
-                // Identify the pattern after the ellipsis to continue
-                val continuationRequest = """
-                    |Previous context:
-                    |$accumulatedResponse
-                    |
-                    |Continue the section '$nextSection' by expanding the ellipsis. 
-                    |Make sure the response flows naturally with the existing content.
-                    |Keep the response focused and avoid creating new ellipsis markers.
-                    """.trimMargin()
-                currentMessages = listOf(
-                    ApiModel.ChatMessage(
-                        role = ApiModel.Role.user,
-                        content = continuationRequest.toContentList()
-                    )
-                )
-                previousContext = accumulatedResponse
-                iterations++
-            } else {
-                break
-            }
+fun largestCommonSubstring(a: String, b: String): String {
+  val lengths = Array(a.length + 1) { IntArray(b.length + 1) }
+  var z = 0
+  var ret = ""
+  for (i in 0 until a.length) {
+    for (j in 0 until b.length) {
+      if (a[i] == b[j]) {
+        lengths[i + 1][j + 1] = lengths[i][j] + 1
+        val len = lengths[i + 1][j + 1]
+        if (len > z) {
+          z = len
+          ret = a.substring(i - z + 1, i + 1)
         }
-
-        if (iterations == maxIterations && namedEllipsisPattern.containsMatchIn(accumulatedResponse)) {
-            throw RuntimeException("""
-                |Maximum iterations ($maxIterations) reached. Output may be incomplete.
-                |Processed sections: ${processedSections.joinToString(", ")}
-                |Remaining ellipsis markers: ${
-                    namedEllipsisPattern.findAll(accumulatedResponse)
-                        .mapNotNull { it.groups["sectionName"]?.value }
-                        .joinToString(", ")
-                }
-                |Current length: ${accumulatedResponse.length}
-            """.trimMargin())
-        }
-
-        return accumulatedResponse
-    }
-
-    override fun withModel(model: ChatModel): LargeOutputActor {
-        return LargeOutputActor(
-            prompt = this.prompt,
-            name = this.name,
-            model = model,
-            temperature = this.temperature,
-            maxIterations = this.maxIterations,
-            namedEllipsisPattern = this.namedEllipsisPattern
-        )
+      }
     }
+  }
+  return ret
 }
diff --git a/core/src/main/kotlin/com/simiacryptus/skyenet/core/actors/ParsedActor.kt b/core/src/main/kotlin/com/simiacryptus/skyenet/core/actors/ParsedActor.kt
@@ -66,19 +66,17 @@ open class ParsedActor<T : Any>(
     val describe = resultClass?.let { describer.describe(it) } ?: ""
     val exceptions = mutableListOf<Exception>()
     val prompt = """
-            |Parse the user's message into a json object described by:
-            |
-            |```yaml
-            |${describe.replace("\n", "\n  ")}
-            |```
-            |
-            |This is an example output:
-            |```json
-            |${JsonUtil.toJson(exampleInstance!!)/*.indent("  ")*/}
-            |```
-            |${promptSuffix?.let { "\n$it" } ?: ""}
-            |
-          """.trimMargin()
+              Parse the user's message into a json object described by:
+              
+              ```yaml
+              """.trimIndent() + describe.replace("\n", "\n  ") + """
+              ```
+              
+              This is an example output:
+              ```json
+              """ + JsonUtil.toJson(exampleInstance!!) + """
+              ```
+              """.trimIndent() + (promptSuffix?.let { "\n$it" } ?: "")
     for (i in 0 until deserializerRetries) {
       try {
         val content = (api as ChatClient).chat(