Skip to content

Commit

Permalink
1.2.8 (#108)
Browse files Browse the repository at this point in the history
* 1.2.8

* fix
  • Loading branch information
acharneski authored Oct 10, 2024
1 parent 6acd6e1 commit 42c3875
Show file tree
Hide file tree
Showing 13 changed files with 247 additions and 77 deletions.
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,18 +76,18 @@ Maven:
<dependency>
<groupId>com.simiacryptus</groupId>
<artifactId>skyenet-webui</artifactId>
<version>1.1.6</version>
<version>1.1.7</version>
</dependency>
```

Gradle:

```groovy
implementation group: 'com.simiacryptus', name: 'skyenet', version: '1.1.6'
implementation group: 'com.simiacryptus', name: 'skyenet', version: '1.1.7'
```

```kotlin
implementation("com.simiacryptus:skyenet:1.1.6")
implementation("com.simiacryptus:skyenet:1.1.7")
```

### 🌟 To Use
Expand Down
2 changes: 1 addition & 1 deletion core/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ val hsqldb_version = "2.7.2"

dependencies {

implementation(group = "com.simiacryptus", name = "jo-penai", version = "1.1.6")
implementation(group = "com.simiacryptus", name = "jo-penai", version = "1.1.7")
implementation(group = "org.hsqldb", name = "hsqldb", version = hsqldb_version)

implementation("org.apache.commons:commons-text:1.11.0")
Expand Down
2 changes: 1 addition & 1 deletion gradle.properties
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Gradle Releases -> https://github.com/gradle/gradle/releases
libraryGroup = com.simiacryptus.skyenet
libraryVersion = 1.2.7
libraryVersion = 1.2.8
gradleVersion = 7.6.1
kotlin.daemon.jvmargs=-Xmx4g
2 changes: 1 addition & 1 deletion webui/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ val jackson_version = "2.17.2"

dependencies {

implementation(group = "com.simiacryptus", name = "jo-penai", version = "1.1.6") {
implementation(group = "com.simiacryptus", name = "jo-penai", version = "1.1.7") {
exclude(group = "org.slf4j")
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
package com.simiacryptus.skyenet.apps.parsers

import com.simiacryptus.jopenai.API
import com.simiacryptus.jopenai.describe.Description
import com.simiacryptus.jopenai.models.ChatModels
import com.simiacryptus.skyenet.core.actors.ParsedActor

open class CodeParsingModel(
private val parsingModel: ChatModels,
private val temperature: Double
) : ParsingModel {

override fun merge(
runningDocument: ParsingModel.DocumentData,
newData: ParsingModel.DocumentData
): ParsingModel.DocumentData {
val runningDocument = runningDocument as CodeData
val newData = newData as CodeData
return CodeData(
id = newData.id ?: runningDocument.id,
content = mergeContent(runningDocument.content, newData.content).takeIf { it.isNotEmpty() },
entities = mergeEntities(runningDocument.entities, newData.entities).takeIf { it.isNotEmpty() },
metadata = mergeMetadata(runningDocument.metadata, newData.metadata)
)
}

protected open fun mergeMetadata(existing: CodeMetadata?, new: CodeMetadata?): CodeMetadata {
return CodeMetadata(
language = new?.language ?: existing?.language,
libraries = ((existing?.libraries ?: emptyList()) + (new?.libraries ?: emptyList())).distinct(),
properties = ((existing?.properties ?: emptyMap()) + (new?.properties ?: emptyMap())).takeIf { it.isNotEmpty() }
)
}

protected open fun mergeContent(
existingContent: List<CodeContent>?,
newContent: List<CodeContent>?
): List<CodeContent> {
val mergedContent = (existingContent ?: emptyList()).toMutableList()
(newContent ?: emptyList()).forEach { newItem ->
val existingIndex = mergedContent.indexOfFirst { it.type == newItem.type && it.text?.trim() == newItem.text?.trim() }
if (existingIndex != -1) {
mergedContent[existingIndex] = mergeContentData(mergedContent[existingIndex], newItem)
} else {
mergedContent.add(newItem)
}
}
return mergedContent
}

protected open fun mergeContentData(existing: CodeContent, new: CodeContent) = existing.copy(
content = mergeContent(existing.content, new.content).takeIf { it.isNotEmpty() },
entities = ((existing.entities ?: emptyList()) + (new.entities ?: emptyList())).distinct()
.takeIf { it.isNotEmpty() },
tags = ((existing.tags ?: emptyList()) + (new.tags ?: emptyList())).distinct().takeIf { it.isNotEmpty() }
)

protected open fun mergeEntities(
existingEntities: Map<String, CodeEntity>?,
newEntities: Map<String, CodeEntity>?
) = ((existingEntities?.keys ?: emptySet()) + (newEntities?.keys ?: emptySet())).associateWith { key ->
val existing = existingEntities?.get(key)
val new = newEntities?.get(key)
when {
existing == null -> new!!
new == null -> existing
else -> mergeEntityData(existing, new)
}
}

protected open fun mergeEntityData(existing: CodeEntity, new: CodeEntity) = existing.copy(
aliases = ((existing.aliases ?: emptyList()) + (new.aliases ?: emptyList())).distinct()
.takeIf { it.isNotEmpty() },
properties = ((existing.properties ?: emptyMap()) + (new.properties ?: emptyMap())).takeIf { it.isNotEmpty() },
relations = ((existing.relations ?: emptyMap()) + (new.relations ?: emptyMap())).takeIf { it.isNotEmpty() },
type = new.type ?: existing.type
)

open val promptSuffix = """
Parse the code into a structured format that describes its components:
1. Identify functions, classes, and other code structures.
2. Extract comments and document them with their associated code.
3. Capture any dependencies or libraries used in the code.
4. Extract metadata such as programming language and version if available.
5. Assign relevant tags to each code section to improve searchability and categorization.
6. Do not copy data from the accumulated code JSON to your response; it is provided for context only.
""".trimMargin()

open val exampleInstance = CodeData()

override fun getParser(api: API): (String) -> CodeData {
val parser = ParsedActor(
resultClass = CodeData::class.java,
exampleInstance = exampleInstance,
prompt = "",
parsingModel = parsingModel,
temperature = temperature
).getParser(
api, promptSuffix = promptSuffix
)
return { text -> parser.apply(text) }
}

override fun newDocument() = CodeData()

data class CodeData(
@Description("Code identifier") override val id: String? = null,
@Description("Entities extracted") val entities: Map<String, CodeEntity>? = null,
@Description("Hierarchical structure and data") override val content: List<CodeContent>? = null,
@Description("Code metadata") override val metadata: CodeMetadata? = null
) : ParsingModel.DocumentData

data class CodeEntity(
@Description("Aliases for the entity") val aliases: List<String>? = null,
@Description("Entity attributes extracted from the code") val properties: Map<String, Any>? = null,
@Description("Entity relationships extracted from the code") val relations: Map<String, String>? = null,
@Description("Entity type (e.g., function, class, variable)") val type: String? = null
)

data class CodeContent(
@Description("Content type, e.g. function, class, comment") override val type: String = "",
@Description("Brief, self-contained text either copied, paraphrased, or summarized") override val text: String? = null,
@Description("Sub-elements") override val content: List<CodeContent>? = null,
@Description("Related entities by ID") val entities: List<String>? = null,
@Description("Tags - related topics and non-entity indexing") override val tags: List<String>? = null
) : ParsingModel.ContentData

data class CodeMetadata(
@Description("Programming language") val language: String? = null,
@Description("Libraries or dependencies associated with the code") val libraries: List<String>? = null,
@Description("Other metadata") val properties: Map<String, Any>? = null,
) : ParsingModel.DocumentMetadata

companion object {
val log = org.slf4j.LoggerFactory.getLogger(CodeParsingModel::class.java)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package com.simiacryptus.skyenet.apps.parsers

import com.simiacryptus.jopenai.API
import com.simiacryptus.jopenai.ChatClient
import com.simiacryptus.jopenai.models.AnthropicModels
import com.simiacryptus.util.JsonUtil
import com.simiacryptus.skyenet.TabbedDisplay
import com.simiacryptus.skyenet.core.platform.Session
Expand All @@ -26,14 +25,11 @@ open class DocumentParserApp(
applicationName: String = "Document Extractor",
path: String = "/pdfExtractor",
val api: API = ChatClient(),
val parsingModel: ParsingModel = DefaultParsingModel(AnthropicModels.Claude35Sonnet, 0.1),
val parsingModel: ParsingModel,
val reader: (File) -> DocumentReader = {
when {
it.name.endsWith(".pdf", ignoreCase = true) -> PDFReader(it)
it.name.endsWith(".txt", ignoreCase = true) -> TextReader(it)
it.name.endsWith(".md", ignoreCase = true) -> TextReader(it)
it.name.endsWith(".html", ignoreCase = true) -> TextReader(it)
else -> throw IllegalArgumentException("Unsupported file type")
else -> TextReader(it)
}
},
val fileInput: Path? = null,
Expand Down Expand Up @@ -262,9 +258,9 @@ open class DocumentParserApp(
val fileInput: String? = "",
val showImages: Boolean = true,
val pagesPerBatch: Int = 1,
val saveImageFiles: Boolean = true,
val saveTextFiles: Boolean = true,
val saveFinalJson: Boolean = false
val saveImageFiles: Boolean = false,
val saveTextFiles: Boolean = false,
val saveFinalJson: Boolean = true
)

override val settingsClass: Class<*> get() = Settings::class.java
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,15 @@ import com.simiacryptus.jopenai.models.ChatModels
import com.simiacryptus.skyenet.core.actors.ParsedActor


open class DefaultParsingModel(
open class DocumentParsingModel(
private val parsingModel: ChatModels,
private val temperature: Double
) : ParsingModel {

override fun merge(
runningDocument: ParsingModel.DocumentData,
newData: ParsingModel.DocumentData
) : ParsingModel.DocumentData {
): ParsingModel.DocumentData {
val runningDocument = runningDocument as DocumentData
val newData = newData as DocumentData
return DocumentData(
Expand Down Expand Up @@ -87,7 +87,9 @@ open class DefaultParsingModel(
|6. Assign relevant tags to each content section to improve searchability and categorization.
|7. Do not copy data from the accumulated document JSON to your response; it is provided for context only.
""".trimMargin()

open val exampleInstance = DocumentData()

override fun getParser(api: API): (String) -> DocumentData {
val parser = ParsedActor(
resultClass = DocumentData::class.java,
Expand All @@ -104,10 +106,10 @@ open class DefaultParsingModel(
override fun newDocument() = DocumentData()

data class DocumentData(
@Description("Document/Page identifier") val id: String? = null,
@Description("Document/Page identifier") override val id: String? = null,
@Description("Entities extracted") val entities: Map<String, EntityData>? = null,
@Description("Hierarchical structure and data") val content: List<ContentData>? = null,
@Description("Document metadata") val metadata: DocumentMetadata? = null
@Description("Hierarchical structure and data") override val content: List<ContentData>? = null,
@Description("Document metadata") override val metadata: DocumentMetadata? = null
) : ParsingModel.DocumentData

data class EntityData(
Expand All @@ -118,20 +120,21 @@ open class DefaultParsingModel(
)

data class ContentData(
@Description("Content type, e.g. heading, paragraph, statement, list") val type: String = "",
@Description("Brief, self-contained text either copied, paraphrased, or summarized") val text: String? = null,
@Description("Sub-elements") val content: List<ContentData>? = null,
@Description("Content type, e.g. heading, paragraph, statement, list") override val type: String = "",
@Description("Brief, self-contained text either copied, paraphrased, or summarized") override val text: String? = null,
@Description("Sub-elements") override val content: List<ContentData>? = null,
@Description("Related entities by ID") val entities: List<String>? = null,
@Description("Tags - related topics and non-entity indexing") val tags: List<String>? = null
)
@Description("Tags - related topics and non-entity indexing") override val tags: List<String>? = null
) : ParsingModel.ContentData

data class DocumentMetadata(
@Description("Document title") val title: String? = null,
@Description("Keywords or tags associated with the document") val keywords: List<String>? = null,
@Description("Other metadata") val properties: Map<String, Any>? = null,
)
) : ParsingModel.DocumentMetadata

companion object {
val log = org.slf4j.LoggerFactory.getLogger(DefaultParsingModel::class.java)
val log = org.slf4j.LoggerFactory.getLogger(DocumentParsingModel::class.java)

}

Expand Down
Loading

0 comments on commit 42c3875

Please sign in to comment.