Skip to content

Commit

Permalink
strings
Browse files Browse the repository at this point in the history
  • Loading branch information
acharneski committed Dec 14, 2024
1 parent 78d4343 commit fcb98a2
Show file tree
Hide file tree
Showing 59 changed files with 1,444 additions and 1,783 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -106,10 +106,7 @@ ${details ?: ""}
get() = this.symbols.map { (name, utilityObj) ->
val describe = this.describer.describe(utilityObj.javaClass)
log.info("Describing $name (${utilityObj.javaClass}) in ${describe.length} characters")
"""
$name:
${describe.indent(" ")}
""".trimMargin().trim()
"$name:\n ${describe.indent(" ")}"
}.joinToString("\n")


Expand Down Expand Up @@ -163,8 +160,8 @@ ${details ?: ""}
val blocks = extractTextBlocks(respondWithCode)
val renderedResponse = getRenderedResponse(blocks)
val codedInstruction = codeInterceptor(getCode(language, blocks))
log.debug("Response: \n\t${renderedResponse.replace("\n", "\n\t", false)}".trimMargin())
log.debug("New Code: \n\t${codedInstruction.replace("\n", "\n\t", false)}".trimMargin())
log.debug("Response: \n\t${renderedResponse.replace("\n", "\n\t", false)}")
log.debug("New Code: \n\t${codedInstruction.replace("\n", "\n\t", false)}")
result = CodeResultImpl(
*messages,
input = input,
Expand Down Expand Up @@ -254,8 +251,8 @@ ${details ?: ""}
val codeBlocks = extractTextBlocks(chat(api, request, model))
val renderedResponse = getRenderedResponse(codeBlocks)
val codedInstruction = codeInterceptor(getCode(language, codeBlocks))
log.debug("Response: \n\t${renderedResponse.replace("\n", "\n\t", false)}".trimMargin())
log.debug("New Code: \n\t${codedInstruction.replace("\n", "\n\t", false)}".trimMargin())
log.debug("Response: \n\t${renderedResponse.replace("\n", "\n\t", false)}")
log.debug("New Code: \n\t${codedInstruction.replace("\n", "\n\t", false)}")
var workingCode = codedInstruction
var workingRenderedResponse = renderedResponse
for (fixAttempt in 0..input.fixIterations) {
Expand Down Expand Up @@ -286,16 +283,8 @@ ${TT}
val codeBlocks = extractTextBlocks(respondWithCode)
workingRenderedResponse = getRenderedResponse(codeBlocks)
workingCode = codeInterceptor(getCode(language, codeBlocks))
log.debug(
"Response: \n\t${
workingRenderedResponse.replace(
"\n",
"\n\t",
false
)
}".trimMargin()
)
log.debug("New Code: \n\t${workingCode.replace("\n", "\n\t", false)}".trimMargin())
log.debug("Response: \n\t" + workingRenderedResponse.replace("\n", "\n\t", false))
log.debug("New Code: \n\t${workingCode.replace("\n", "\n\t", false)}")
}
}
} catch (ex: FailedToImplementException) {
Expand Down Expand Up @@ -424,7 +413,7 @@ Correct the code and try again.
if (textSegments.size == 1) return textSegments.joinToString("\n") { it.second }
return textSegments.joinToString("\n") {
if (it.first.lowercase() == "code" || it.first.lowercase() == language.lowercase()) {
it.second.trimMargin().trim()
it.second
} else {
""
}
Expand Down Expand Up @@ -487,13 +476,11 @@ Correct the code and try again.
}

fun errorMessage(ex: ScriptException, code: String) = try {
"""
|${TT}text
|${ex.message ?: ""} at line ${ex.lineNumber} column ${ex.columnNumber}
| ${if (ex.lineNumber > 0) code.split("\n")[ex.lineNumber - 1] else ""}
| ${if (ex.columnNumber > 0) " ".repeat(ex.columnNumber - 1) + "^" else ""}
|${TT}
""".trimMargin().trim()
"${TT}text\n${ex.message ?: ""} at line ${ex.lineNumber} column ${ex.columnNumber}\n ${if (ex.lineNumber > 0) code.split("\n")[ex.lineNumber - 1] else ""}\n ${
if (ex.columnNumber > 0) " ".repeat(
ex.columnNumber - 1
) + "^" else ""
}\n${TT}".trim()
} catch (_: Exception) {
ex.message ?: ""
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
package com.simiacryptus.skyenet.core.actors

import com.google.common.base.Strings.commonPrefix
import com.simiacryptus.jopenai.API
import com.simiacryptus.jopenai.models.ApiModel
import com.simiacryptus.jopenai.models.ApiModel.Role
import com.simiacryptus.jopenai.models.ChatModel
import com.simiacryptus.jopenai.models.OpenAIModels
import com.simiacryptus.jopenai.models.TextModel
import com.simiacryptus.jopenai.util.ClientUtil.toChatMessage
import com.simiacryptus.jopenai.util.ClientUtil.toContentList

/**
Expand All @@ -13,135 +16,127 @@ import com.simiacryptus.jopenai.util.ClientUtil.toContentList
* then recursively expands the result by searching for the pattern and making additional LLM calls.
*/
class LargeOutputActor(
prompt: String = """
When generating large responses, please:
1. Break down the content into logical sections
2. Use named ellipsis markers like '...sectionName...' to indicate where content needs expansion
3. Keep each section focused and concise
4. Use descriptive section names that reflect the content
## Example format:
```markdown
# Topic Title
## Overview
Here's an overview of the topic ...introduction...
## Main Points
The first important aspect is ...mainPoints...
## Technical Details
For technical details, ...technicalDetails...
## Conclusion
To conclude, ...conclusion...
```
Note: Each '...sectionName...' will be expanded in subsequent iterations.
prompt: String = """
You are a long-form content writer. You have been tasked with writing a comprehensive guide on a topic.
1. Break down the content into logical sections using markdown formatting and headers.
2. To support large content generation, use markers to indicate where content needs expansion.
3. Expansion markers should use a line formatted like '...sectionName...' to indicate where detailed content should be inserted.
4. Use descriptive and unique section names that reflect the content expected in that section.
5. For the initial iteration, provide a high level document structure with a few expansion markers. Each '...sectionName...' will be expanded in subsequent iterations.
""".trimIndent(),
name: String? = null,
model: TextModel = OpenAIModels.GPT4o,
temperature: Double = 0.3,
private val maxIterations: Int = 5,
private val namedEllipsisPattern: Regex = Regex("""\.\.\.(?<sectionName>[\w\s-]+?)\.\.\.""")
name: String? = null,
model: TextModel = OpenAIModels.GPT4o,
temperature: Double = 0.3, private val maxIterations: Int = 3, private val namedEllipsisPattern: Regex = Regex("""\.\.\.(?<sectionName>[\w\s-_]+?)\.\.\.""")
) : BaseActor<List<String>, String>(
prompt = prompt,
name = name,
model = model,
temperature = temperature
prompt = prompt, name = name, model = model, temperature = temperature
) {

override fun chatMessages(questions: List<String>): Array<ApiModel.ChatMessage> {
val systemMessage = ApiModel.ChatMessage(
role = ApiModel.Role.system,
content = prompt.toContentList()
override fun chatMessages(questions: List<String>): Array<ApiModel.ChatMessage> {
val systemMessage = ApiModel.ChatMessage(
role = Role.system, content = prompt.toContentList()
)
val userMessages = questions.map {
ApiModel.ChatMessage(
role = Role.user, content = it.toContentList()
)
}
return arrayOf(systemMessage) + userMessages
}

override fun respond(input: List<String>, api: API, vararg messages: ApiModel.ChatMessage): String {
var accumulatedResponse = ""
var iterations = 0
while (iterations < maxIterations) {
if (accumulatedResponse.isEmpty()) {
accumulatedResponse = response(*messages, api = api).choices.first().message?.content?.trim() ?: throw RuntimeException("No response from LLM")
}
val matches = namedEllipsisPattern.findAll(accumulatedResponse).toMutableList()
if (matches.isEmpty()) break
val pairs = matches.mapNotNull { matchResult ->
val nextSection = matchResult.groups["sectionName"]?.value ?: return@mapNotNull null
val contextLines = 100
val contextChars = 10000
Pair(
matchResult, response(
*(listOf(
"""
You are a long-form content writer. You have been tasked with writing a comprehensive guide on a topic by filling in a detail section.
1. Break down the content into logical sections using markdown formatting and headers.
2. To support large content generation, use markers to indicate where content needs expansion.
3. Expansion markers should use a line formatted like '...sectionName...' to indicate where detailed content should be inserted.
4. Use descriptive and unique section names that reflect the content expected in that section.
""".trimIndent().toChatMessage(Role.system)
) + messages.toList().drop(1) + listOf(
ApiModel.ChatMessage(
role = Role.user, content = ("""
Previous context:
```
""".trimIndent() + accumulatedResponse.substring(0, matchResult.range.first).lines().takeLast(contextLines).joinToString { " $it" }.takeLast(contextChars) + """
```
Continue the section '""".trimIndent() + nextSection + """'
Make sure the response flows naturally with the existing content.
It should end so that it matches the next section, provided below:
```
""".trimIndent() + accumulatedResponse.substring(matchResult.range.last).lines().take(contextLines).joinToString { " $it" }.take(contextChars) + """
```
""".trimIndent()).toContentList()
)
)).toTypedArray(), api = api
)
)
val userMessages = questions.map {
ApiModel.ChatMessage(
role = ApiModel.Role.user,
content = it.toContentList()
)
}
accumulatedResponse = pairs.reversed().fold(accumulatedResponse) { acc, (match, response) ->
val original = response.choices.first().message?.content?.trim() ?: ""
var replacement = original
if (replacement.isEmpty()) return acc
//val replaced = acc.substring(match.range)
if (replacement.startsWith("```")) {
replacement = replacement.lines().drop(1).reversed().dropWhile { !it.startsWith("```") }.drop(1).reversed().joinToString("\n")
}
return arrayOf(systemMessage) + userMessages
val prefix = acc.substring(0, match.range.first)
val suffix = acc.substring(match.range.last)
val commonPrefix = commonPrefix(prefix, replacement)
if (commonPrefix.isNotBlank() && commonPrefix.contains('\n')) replacement = replacement.substring(commonPrefix.length)
val largestCommonSubstring = largestCommonSubstring(replacement, suffix)
if (largestCommonSubstring.isNotBlank()) replacement = replacement.substring(0, replacement.indexOf(largestCommonSubstring))
val replaceRange = acc.replaceRange(match.range, replacement)
replaceRange
}
iterations++
}
return accumulatedResponse
}

override fun respond(input: List<String>, api: API, vararg messages: ApiModel.ChatMessage): String {
var accumulatedResponse = ""
var currentMessages = messages.toList()
var iterations = 0
var previousContext = ""
var processedSections = mutableSetOf<String>()

while (iterations < maxIterations) {
val response = response(*currentMessages.toTypedArray(), api = api).choices.first().message?.content
?: throw RuntimeException("No response from LLM")

// Replace the ellipsis in the accumulated response with the new content
if (previousContext.isNotEmpty()) {
val lastEllipsis = namedEllipsisPattern.find(accumulatedResponse)
if (lastEllipsis != null) {
accumulatedResponse = accumulatedResponse.replaceRange(
lastEllipsis.range.first,
lastEllipsis.range.last + 1,
response.trim()
)
}
} else {
accumulatedResponse = response.trim()
}

val matches = namedEllipsisPattern.findAll(response)
.mapNotNull { it.groups["sectionName"]?.value }
.filter { it !in processedSections }
.toList()

if (matches.isNotEmpty()) {
val nextSection = matches.first()
processedSections.add(nextSection)
override fun withModel(model: ChatModel): LargeOutputActor {
return LargeOutputActor(
prompt = this.prompt,
name = this.name,
model = model,
temperature = this.temperature,
maxIterations = this.maxIterations,
namedEllipsisPattern = this.namedEllipsisPattern
)
}
}

// Identify the pattern after the ellipsis to continue
val continuationRequest = """
|Previous context:
|$accumulatedResponse
|
|Continue the section '$nextSection' by expanding the ellipsis.
|Make sure the response flows naturally with the existing content.
|Keep the response focused and avoid creating new ellipsis markers.
""".trimMargin()
currentMessages = listOf(
ApiModel.ChatMessage(
role = ApiModel.Role.user,
content = continuationRequest.toContentList()
)
)
previousContext = accumulatedResponse
iterations++
} else {
break
}
fun largestCommonSubstring(a: String, b: String): String {
val lengths = Array(a.length + 1) { IntArray(b.length + 1) }
var z = 0
var ret = ""
for (i in 0 until a.length) {
for (j in 0 until b.length) {
if (a[i] == b[j]) {
lengths[i + 1][j + 1] = lengths[i][j] + 1
val len = lengths[i + 1][j + 1]
if (len > z) {
z = len
ret = a.substring(i - z + 1, i + 1)
}

if (iterations == maxIterations && namedEllipsisPattern.containsMatchIn(accumulatedResponse)) {
throw RuntimeException("""
|Maximum iterations ($maxIterations) reached. Output may be incomplete.
|Processed sections: ${processedSections.joinToString(", ")}
|Remaining ellipsis markers: ${
namedEllipsisPattern.findAll(accumulatedResponse)
.mapNotNull { it.groups["sectionName"]?.value }
.joinToString(", ")
}
|Current length: ${accumulatedResponse.length}
""".trimMargin())
}

return accumulatedResponse
}

override fun withModel(model: ChatModel): LargeOutputActor {
return LargeOutputActor(
prompt = this.prompt,
name = this.name,
model = model,
temperature = this.temperature,
maxIterations = this.maxIterations,
namedEllipsisPattern = this.namedEllipsisPattern
)
}
}
}
return ret
}
Original file line number Diff line number Diff line change
Expand Up @@ -66,19 +66,17 @@ open class ParsedActor<T : Any>(
val describe = resultClass?.let { describer.describe(it) } ?: ""
val exceptions = mutableListOf<Exception>()
val prompt = """
|Parse the user's message into a json object described by:
|
|```yaml
|${describe.replace("\n", "\n ")}
|```
|
|This is an example output:
|```json
|${JsonUtil.toJson(exampleInstance!!)/*.indent(" ")*/}
|```
|${promptSuffix?.let { "\n$it" } ?: ""}
|
""".trimMargin()
Parse the user's message into a json object described by:
```yaml
""".trimIndent() + describe.replace("\n", "\n ") + """
```
This is an example output:
```json
""" + JsonUtil.toJson(exampleInstance!!) + """
```
""".trimIndent() + (promptSuffix?.let { "\n$it" } ?: "")
for (i in 0 until deserializerRetries) {
try {
val content = (api as ChatClient).chat(
Expand Down
Loading

0 comments on commit fcb98a2

Please sign in to comment.