Skip to content

Commit

Permalink
1.2.17 2 (#118)
Browse files Browse the repository at this point in the history
* 1.2.17

* 1.2.17

* fix

* 1.2.17-2

* 1.2.17

* Update MetadataStorageInterfaceTest.kt

* fix

* Update HSQLMetadataStorage.kt

* Update ActorTestAppServer.kt
  • Loading branch information
acharneski authored Nov 15, 2024
1 parent b4feb03 commit 59f93bd
Show file tree
Hide file tree
Showing 26 changed files with 1,050 additions and 86 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,5 @@ openai.key
*.log.*
client_secret_google_oauth.json
settings.gradle.kts
*.data
*.parsed.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
package com.simiacryptus.skyenet.core.actors

import com.simiacryptus.jopenai.API
import com.simiacryptus.jopenai.models.ApiModel
import com.simiacryptus.jopenai.models.ChatModel
import com.simiacryptus.jopenai.models.OpenAIModels
import com.simiacryptus.jopenai.models.TextModel
import com.simiacryptus.jopenai.util.ClientUtil.toContentList

/**
* An actor that handles large outputs by using recursive replacement.
* It instructs the initial LLM call to use ellipsis expressions to manage result size,
* then recursively expands the result by searching for the pattern and making additional LLM calls.
*/
class LargeOutputActor(
prompt: String = """
When generating large responses, please:
1. Break down the content into logical sections
2. Use named ellipsis markers like '...sectionName...' to indicate where content needs expansion
3. Keep each section focused and concise
4. Use descriptive section names that reflect the content
## Example format:
```markdown
# Topic Title
## Overview
Here's an overview of the topic ...introduction...
## Main Points
The first important aspect is ...mainPoints...
## Technical Details
For technical details, ...technicalDetails...
## Conclusion
To conclude, ...conclusion...
```
Note: Each '...sectionName...' will be expanded in subsequent iterations.
""".trimIndent(),
name: String? = null,
model: TextModel = OpenAIModels.GPT4o,
temperature: Double = 0.3,
private val maxIterations: Int = 5,
private val ellipsisPattern: Regex = Regex("\\.\\.\\."),
private val namedEllipsisPattern: Regex = Regex("""\.\.\.(?<sectionName>[\w\s]+)\.\.\.""")
) : BaseActor<List<String>, String>(
prompt = prompt,
name = name,
model = model,
temperature = temperature
) {

override fun chatMessages(questions: List<String>): Array<ApiModel.ChatMessage> {
val systemMessage = ApiModel.ChatMessage(
role = ApiModel.Role.system,
content = prompt.toContentList()
)
val userMessages = questions.map {
ApiModel.ChatMessage(
role = ApiModel.Role.user,
content = it.toContentList()
)
}
return arrayOf(systemMessage) + userMessages
}

override fun respond(input: List<String>, api: API, vararg messages: ApiModel.ChatMessage): String {
var accumulatedResponse = ""
var currentMessages = messages.toList()
var iterations = 0

while (iterations < maxIterations) {
val response = response(*currentMessages.toTypedArray(), api = api).choices.first().message?.content
?: throw RuntimeException("No response from LLM")

accumulatedResponse += response.trim()

val matches = namedEllipsisPattern.findAll(response).mapNotNull { it.groups["sectionName"]?.value }.toList()
if (matches.isNotEmpty()) {
// Identify the pattern after the ellipsis to continue
val continuationRequests = matches.map { name ->
"Continue the section '$name' by expanding the ellipsis."
}
currentMessages = continuationRequests.map { request ->
ApiModel.ChatMessage(
role = ApiModel.Role.user,
content = request.toContentList()
)
}
iterations++
} else {
break
}
}

if (iterations == maxIterations && namedEllipsisPattern.containsMatchIn(accumulatedResponse)) {
throw RuntimeException("Maximum iterations reached. Output may be incomplete.")
}

return accumulatedResponse
}

override fun withModel(model: ChatModel): LargeOutputActor {
return LargeOutputActor(
prompt = this.prompt,
name = this.name,
model = model,
temperature = this.temperature,
maxIterations = this.maxIterations,
ellipsisPattern = this.ellipsisPattern
)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -123,10 +123,14 @@ open class ParsedActor<T : Any>(
}

contentUnwrapped.let {
return@Function JsonUtil.fromJson<T>(
it, resultClass
?: throw RuntimeException("Result class undefined")
)
try {
return@Function JsonUtil.fromJson<T>(
it, resultClass
?: throw RuntimeException("Result class undefined")
)
} catch (e: Exception) {
throw RuntimeException("Failed to parse response: ${it.replace("\n", "\n ")}", e)
}
}
} catch (e: Exception) {
log.info("Failed to parse response", e)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,14 +47,11 @@ object ApplicationServices {
require(!isLocked) { "ApplicationServices is locked" }
field = value
}

var cloud: CloudPlatformInterface? = AwsPlatform.get()
set(value) {
require(!isLocked) { "ApplicationServices is locked" }
field = value
}


var seleniumFactory: ((ThreadPoolExecutor, Array<out jakarta.servlet.http.Cookie>?) -> Selenium)? = null
set(value) {
require(!isLocked) { "ApplicationServices is locked" }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ package com.simiacryptus.skyenet.core.platform

import com.simiacryptus.skyenet.core.platform.model.CloudPlatformInterface
import org.slf4j.LoggerFactory
import software.amazon.awssdk.auth.credentials.AwsCredentialsProviderChain
import software.amazon.awssdk.auth.credentials.InstanceProfileCredentialsProvider
import software.amazon.awssdk.auth.credentials.ProfileCredentialsProvider
import software.amazon.awssdk.core.SdkBytes
import software.amazon.awssdk.core.sync.RequestBody
Expand All @@ -19,9 +21,20 @@ open class AwsPlatform(
private val bucket: String = System.getProperty("share_bucket", "share.simiacrypt.us"),
override val shareBase: String = System.getProperty("share_base", "https://" + bucket),
private val region: Region? = Region.US_EAST_1,
private val profileName: String = "default",
profileName: String? = System.getProperty("aws.profile", "default").let { if (it.isBlank()) null else it },
) : CloudPlatformInterface {
open val credentialsProvider: ProfileCredentialsProvider? = ProfileCredentialsProvider.create(profileName)

open val credentialsProvider = AwsCredentialsProviderChain.builder()
.credentialsProviders(
// Try EC2 instance profile credentials first
InstanceProfileCredentialsProvider.create(),
// Then try profile credentials if profile name is provided
profileName?.let {
ProfileCredentialsProvider.create(it)
} ?: ProfileCredentialsProvider.create()
)
.build()

private val log = LoggerFactory.getLogger(AwsPlatform::class.java)

protected open val kmsClient: KmsClient by lazy {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -162,24 +162,29 @@ open class DataStorage(
ApplicationServices.metadataStorageFactory(dataDir).deleteSession(user, session)
sessionDir.deleteRecursively()
}
@Deprecated("Use metadataStorage instead")

override fun listSessions(dir: File, path: String): List<String> = ApplicationServices.metadataStorageFactory(dataDir).listSessions(path)
@Deprecated("Use metadataStorage instead")

override fun getSessionName(
user: User?,
session: Session
): String = ApplicationServices.metadataStorageFactory(dataDir).getSessionName(user, session)
@Deprecated("Use metadataStorage instead")

override fun getMessageIds(
user: User?,
session: Session
): List<String> = ApplicationServices.metadataStorageFactory(dataDir).getMessageIds(user, session)
@Deprecated("Use metadataStorage instead")

override fun setMessageIds(
user: User?,
session: Session,
ids: List<String>
) = ApplicationServices.metadataStorageFactory(dataDir).setMessageIds(user, session, ids)
@Deprecated("Use metadataStorage instead")

override fun getSessionTime(
user: User?,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ class HSQLMetadataStorage(private val dbFile: File) : MetadataStorageInterface {
private val log = LoggerFactory.getLogger(javaClass)

private val connection: Connection by lazy {
require(dbFile.absoluteFile.exists() || dbFile.absoluteFile.mkdirs()) { "Unable to create database directory: ${dbFile.absolutePath}" }
log.info("Initializing HSQLMetadataStorage with database file: ${dbFile.absolutePath}")
Class.forName("org.hsqldb.jdbc.JDBCDriver")
val connection = DriverManager.getConnection("jdbc:hsqldb:file:${dbFile.absolutePath}/metadata;shutdown=true", "SA", "")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,22 @@ package com.simiacryptus.skyenet.core.util
import java.net.URL

interface Selenium : AutoCloseable {
fun navigate(url: String)
fun getPageSource(): String
fun getCurrentUrl(): String
fun executeScript(script: String): Any?
fun quit()

fun save(
url: URL,
currentFilename: String?,
saveRoot: String
)

abstract fun setScriptTimeout(timeout: Long)
abstract fun getBrowserInfo(): String
fun forceQuit()
abstract fun isAlive(): Boolean
//
// open fun setCookies(
// driver: WebDriver,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ package com.simiacryptus.skyenet.core.platform.hsql
import com.simiacryptus.skyenet.core.platform.test.MetadataStorageInterfaceTest
import java.nio.file.Files

class HSQLMetadataStorageTest : MetadataStorageInterfaceTest(HSQLMetadataStorage(Files.createTempDirectory("metadataStorage").toFile()))
//class HSQLMetadataStorageTest : MetadataStorageInterfaceTest(HSQLMetadataStorage(Files.createTempDirectory("metadataStorage").toFile()))
2 changes: 1 addition & 1 deletion gradle.properties
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Gradle Releases -> https://github.com/gradle/gradle/releases
libraryGroup=com.simiacryptus.skyenet
libraryVersion=1.2.16
libraryVersion=1.2.17
gradleVersion=7.6.1
kotlin.daemon.jvmargs=-Xmx4g
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,12 @@ import com.simiacryptus.skyenet.core.actors.ParsedActor
open class CodeParsingModel(
private val parsingModel: ChatModel,
private val temperature: Double
) : ParsingModel {
) : ParsingModel<CodeParsingModel.CodeData> {

override fun merge(
runningDocument: ParsingModel.DocumentData,
newData: ParsingModel.DocumentData
): ParsingModel.DocumentData {
val runningDocument = runningDocument as CodeData
val newData = newData as CodeData
runningDocument: CodeData,
newData: CodeData
): CodeData {
return CodeData(
id = newData.id ?: runningDocument.id,
content_list = mergeContent(runningDocument.content_list, newData.content_list).takeIf { it.isNotEmpty() },
Expand All @@ -40,20 +38,25 @@ open class CodeParsingModel(

protected open fun mergeContentData(existing: CodeContent, new: CodeContent) = existing.copy(
content_list = mergeContent(existing.content_list, new.content_list).takeIf { it.isNotEmpty() },
tags = ((existing.tags ?: emptyList()) + (new.tags ?: emptyList())).distinct().takeIf { it.isNotEmpty() }
tags = ((existing.tags ?: emptyList()) + (new.tags ?: emptyList())).distinct().takeIf { it.isNotEmpty() },
startLine = new.startLine ?: existing.startLine,
endLine = new.endLine ?: existing.endLine,
startPos = new.startPos ?: existing.startPos,
endPos = new.endPos ?: existing.endPos
)

open val promptSuffix = """
Parse the code into a structured format that describes its components:
1. Separate the content into sections, paragraphs, statements, etc.
2. All source content should be included in the output, with paraphrasing, corrections, and context as needed
3. Each content leaf node text should be simple and self-contained
4. Assign relevant tags to each node to improve searchability and categorization.
4. Assign relevant tags to each node to improve searchability and categorization
5. Track line numbers and character positions for each content node when possible
""".trimMargin()

open val exampleInstance = CodeData()

override fun getParser(api: API): (String) -> CodeData {
override fun getFastParser(api: API): (String) -> CodeData {
val parser = ParsedActor(
resultClass = CodeData::class.java,
exampleInstance = exampleInstance,
Expand All @@ -77,7 +80,11 @@ Parse the code into a structured format that describes its components:
@Description("Content type, e.g. function, class, comment") override val type: String = "",
@Description("Brief, self-contained text either copied, paraphrased, or summarized") override val text: String? = null,
@Description("Sub-elements") override val content_list: List<CodeContent>? = null,
@Description("Tags - related topics and non-entity indexing") override val tags: List<String>? = null
@Description("Tags - related topics and non-entity indexing") override val tags: List<String>? = null,
@Description("Starting line number in source") val startLine: Int? = null,
@Description("Ending line number in source") val endLine: Int? = null,
@Description("Starting character position") val startPos: Int? = null,
@Description("Ending character position") val endPos: Int? = null
) : ParsingModel.ContentData

companion object {
Expand Down
Loading

0 comments on commit 59f93bd

Please sign in to comment.