Skip to content

Commit

Permalink
feat(comment): make kotlin comment support works #1
Browse files Browse the repository at this point in the history
  • Loading branch information
phodal committed Dec 29, 2023
1 parent 61f99ab commit 1e411a7
Show file tree
Hide file tree
Showing 8 changed files with 112 additions and 55 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ class SingleProjectCodePicker(private val config: InsPickerOption) {

val workerManager = WorkerManager(
WorkerContext(
config.codeContextStrategies,
config.buildPlan,
config.codeQualityTypes,
config.insOutputConfig,
pureDataFileName = config.pureDataFileName(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,12 @@ class KotlinCommentBuilder : CommentBuilder {
override val docInstruction: DocInstruction = DocInstruction.KOTLIN

override fun build(code: String, container: CodeContainer): List<TypedCommentIns> {
val posComments = extractKdocComments(code)
val posComments = try {
extractKdocComments(code)
} catch (e: Exception) {
emptyList()
}

val startLineCommentMap: Map<Int, CodeComment> = posComments.associateBy {
it.position.StopLine
}
Expand All @@ -25,60 +30,91 @@ class KotlinCommentBuilder : CommentBuilder {

container.DataStructures.forEach { dataStruct ->
val classComment = startLineCommentMap[dataStruct.Position.StartLine - 1]
classComment?.let { comments.add(ClassCommentIns(dataStruct, it)) }
classComment?.let { comments.add(ClassCommentIns(dataStruct, it, language = "kotlin")) }

dataStruct.Functions
.filter { it.Name != "constructor" && it.Name != "PrimaryConstructor" }
.forEach { function ->
val functionComment = startLineCommentMap[function.Position.StartLine - 1]
functionComment?.let { comments.add(MethodCommentIns(function, it, dataStruct)) }
functionComment?.let {
comments.add(
MethodCommentIns(
function,
it,
dataStruct,
language = "kotlin"
)
)
}
}
}

return comments
}

companion object {
private val commentPattern = Regex("""\s+/\*\*([^*]|(\*+[^*/]))*\*+/""")

/**
* Extracts the Kotlin documentation comments (KDoc) from the given code.
*
* @param code the Kotlin code from which to extract the KDoc comments
* @return a list of pairs, where each pair contains the line number and the extracted KDoc comment
*/
fun extractKdocComments(code: String): List<CodeComment> {
val matches = commentPattern.findAll(code)

val comments = mutableListOf<CodeComment>()

for (match in matches) {
val commentContent = match.value.trimIndent()
val startLine = code.substring(0, match.range.first).count { it == '\n' } + 1
val stopLine = code.substring(0, match.range.last).count { it == '\n' } + 1
val startLinePosition = match.range.first - code.lastIndexOf('\n', match.range.first) - 1
val stopLinePosition = match.range.last - code.lastIndexOf('\n', match.range.last) - 1

val position = CodePosition(startLine, startLinePosition, stopLine, stopLinePosition)
val comment = CodeComment(commentContent, position)
comments.add(comment)
}
/**
* Extracts the Kotlin documentation comments (KDoc) from the given code.
*
* @param code the Kotlin code from which to extract the KDoc comments
* @return a list of pairs, where each pair contains the line number and the extracted KDoc comment
*/
fun extractKdocComments(code: String): List<CodeComment> {
val pattern = Regex("""/\*\*[^*]*\*+([^/*][^*]*\*+)*/""")

val matches = pattern.findAll(code)

val comments = mutableListOf<CodeComment>()

for (match in matches) {
val commentContent = match.value.trimIndent()
val startLine = code.substring(0, match.range.first).count { it == '\n' } + 1
val stopLine = code.substring(0, match.range.last).count { it == '\n' } + 1
val startLinePosition = match.range.first - code.lastIndexOf('\n', match.range.first) - 1
val stopLinePosition = match.range.last - code.lastIndexOf('\n', match.range.last) - 1

val position = CodePosition(startLine, startLinePosition, stopLine, stopLinePosition)
val content = reIndentComment(commentContent)


return comments
val comment = CodeComment(content, position)
comments.add(comment)
}

return comments
}

/// Re-indent the comment to remove the leading spaces.
private fun reIndentComment(content: String): String {
val lines = content.split("\n")
val indent = lines[1].takeWhile { it == ' ' }
val linesWithoutIndent = lines
.map { it.removePrefix(indent) }

// except the first line, every line should have one leading space
val linesWithLeadingSpace = linesWithoutIndent
.mapIndexed { index, line ->
if (index == 0) {
line
} else {
" $line"
}
}

return linesWithLeadingSpace.joinToString("\n")
}
}

@Serializable
data class ClassCommentIns(
val dataStructure: CodeDataStruct,
val comment: CodeComment,
val language: String,
) : TypedCommentIns() {
override val builderLevel: CommentBuilderType = CommentBuilderType.CLASS_LEVEL

override fun unique(): Instruction {
val instruction = "Write documentation for given class " + dataStructure.NodeName + " ."
val input = dataStructure.Content
val instruction = "Write documentation for given class " + dataStructure.NodeName + " .\n"
val input = "Code:\n```$language\n" + dataStructure.Content + "\n```"
val output = comment.content

return Instruction(instruction, input, output)
Expand All @@ -90,12 +126,13 @@ data class MethodCommentIns(
val function: CodeFunction,
val comment: CodeComment,
val currentDataStruct: CodeDataStruct,
val language: String,
) : TypedCommentIns() {
override val builderLevel: CommentBuilderType = CommentBuilderType.METHOD_LEVEL

override fun unique(): Instruction {
val instruction = "Write documentation for given method" + function.Name + " ."
val input = "\n### Current class:\n" + currentDataStruct.toUml() + "\n###" + function.Content
val instruction = "Write documentation for given method " + function.Name + " .\n"
val input = "### Current class:\n" + currentDataStruct.toUml() + "\n###\n" + "Code:\n```$language\n" + currentDataStruct.Content + "\n```"
val output = comment.content

return Instruction(instruction, input, output)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package cc.unitmesh.pick.option

import cc.unitmesh.pick.strategy.BizCodeContextStrategy
import cc.unitmesh.pick.strategy.BuildPlanType
import cc.unitmesh.core.completion.CompletionBuilderType
import cc.unitmesh.pick.threshold.InsQualityThreshold
import cc.unitmesh.quality.CodeQualityType
Expand All @@ -25,7 +25,7 @@ const val MAX_COMPLETION_EACH_FILE = 10
* @property branch The branch of the repository. Default value is "master".
* @property language The programming language of the code in the repository. Default value is "java".
* @property baseDir The base directory where the datasets are stored. Default value is "datasets".
* @property codeContextStrategies The strategies to determine the code context. Default value is [BizCodeContextStrategy.RELATED_CODE].
* @property buildPlan The strategies to determine the code context. Default value is [BuildPlanType.RELATED_CODE].
* Possible values are:
* - [CodeContextStrategy.SIMILAR_CHUNKS]: Determines the code context based on similar code chunks.
* - [CodeContextStrategy.RELATED_CODE]: Determines the code context based on related code.
Expand Down Expand Up @@ -53,15 +53,16 @@ data class InsPickerOption(
val language: String = "java",
val baseDir: String = "datasets",
/**
* The [BizCodeContextStrategy], suggest to be one of:.
* The [BuildPlanType], suggest to be one of:.
*
* - [BizCodeContextStrategy.SIMILAR_CHUNKS]
* - [BizCodeContextStrategy.RELATED_CODE]
* - [BuildPlanType.SIMILAR_CHUNKS]
* - [BuildPlanType.RELATED_CODE]
*
*/
val codeContextStrategies: List<BizCodeContextStrategy> = listOf(
BizCodeContextStrategy.RELATED_CODE,
BizCodeContextStrategy.SIMILAR_CHUNKS,
val buildPlan: List<BuildPlanType> = listOf(
// BuildPlanType.RELATED_CODE,
// BuildPlanType.SIMILAR_CHUNKS,
BuildPlanType.COMMENT,
),
/**
* The [CompletionBuilderType], which will according you IDE strategy to generate the type.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,19 @@ import cc.unitmesh.pick.worker.job.JobContext
import kotlinx.serialization.SerializationException

/**
* The `CodeContextStrategy` enum class represents different strategies for generating code context in AutoDev.
* The `BuildPlanType` enum class represents different strategies for generating code context in AutoDev.
*
* There are two available strategies:
* 1. [BizCodeContextStrategy.SIMILAR_CHUNKS]: This prompt is used with pre-built context for unsupported languages.
* 1. [BuildPlanType.SIMILAR_CHUNKS]: This prompt is used with pre-built context for unsupported languages.
* It allows AutoDev to generate code context with similar code chunks builder.
* 2. [BizCodeContextStrategy.RELATED_CODE]: This prompt is used with pre-built context. It allows AutoDev to
* 2. [BuildPlanType.RELATED_CODE]: This prompt is used with pre-built context. It allows AutoDev to
* generate code context with similar code builder.
*
* The strategies are used through the `builder` function, which takes an `InstructionContext` parameter and returns an `InstructionBuilder` object.
*
* Note that the `builder` function throws a `SerializationException` if the prompt is unknown.
*/
enum class BizCodeContextStrategy {
enum class BuildPlanType {
/**
* the AutoDev with pre-build context for un-support language
*/
Expand All @@ -34,14 +34,14 @@ enum class BizCodeContextStrategy {
/**
* the AutoDev with pre-build context
*/
EMPTY_CONTEXT
COMMENT
;

fun builder(context: JobContext): CodeStrategyBuilder {
return mapOf(
SIMILAR_CHUNKS to SimilarChunksStrategyBuilder(context),
RELATED_CODE to RelatedCodeStrategyBuilder(context),
EMPTY_CONTEXT to CommentsStrategyBuilder(context),
COMMENT to CommentsStrategyBuilder(context),
)[this] ?: throw SerializationException("Unknown message type: $this")
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,17 +1,27 @@
package cc.unitmesh.pick.strategy.bizcode

import cc.unitmesh.core.SupportedLang
import cc.unitmesh.core.completion.TypedIns
import cc.unitmesh.pick.builder.comment.KotlinCommentBuilder
import cc.unitmesh.pick.strategy.base.CodeStrategyBuilder
import cc.unitmesh.pick.worker.job.JobContext

/**
* 对于其它不需要上下文的 AI 能力,需要实现一个空的上下文策略,如注释生成。
*/
class CommentsStrategyBuilder(val context: JobContext) : CodeStrategyBuilder {
private val kotlinCommentBuilder = KotlinCommentBuilder()

override fun build(): List<TypedIns> {
val language = context.job.fileSummary.language.lowercase()
val container = context.job.container ?: return emptyList()

return emptyList()
val language = context.project.language
return when (language) {
SupportedLang.JAVA -> kotlinCommentBuilder.build(context.job.code, container)
SupportedLang.TYPESCRIPT -> TODO()
SupportedLang.KOTLIN -> {
kotlinCommentBuilder.build(context.job.code, container)
}
}
}
}
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package cc.unitmesh.pick.worker

import cc.unitmesh.pick.option.InsOutputConfig
import cc.unitmesh.pick.strategy.BizCodeContextStrategy
import cc.unitmesh.pick.strategy.BuildPlanType
import cc.unitmesh.core.completion.CompletionBuilderType
import cc.unitmesh.pick.project.ProjectContext
import cc.unitmesh.pick.threshold.InsQualityThreshold
Expand All @@ -12,7 +12,7 @@ import org.jetbrains.annotations.TestOnly

@Serializable
data class WorkerContext(
val codeContextStrategies: List<BizCodeContextStrategy>,
val codeContextStrategies: List<BuildPlanType>,
val qualityTypes: List<CodeQualityType>,
val insOutputConfig: InsOutputConfig,
val pureDataFileName: String,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package cc.unitmesh.pick
import cc.unitmesh.core.Instruction
import cc.unitmesh.core.completion.CompletionBuilderType
import cc.unitmesh.pick.option.InsPickerOption
import cc.unitmesh.pick.strategy.BuildPlanType
import kotlinx.coroutines.runBlocking
import kotlinx.serialization.encodeToString
import kotlinx.serialization.json.Json
Expand Down Expand Up @@ -38,8 +39,9 @@ class SingleProjectCodePickerTest {
language = "kotlin",
url = root,
maxTokenLength = 8192,
buildPlan = listOf(BuildPlanType.COMMENT),
completionTypes = listOf(
CompletionBuilderType.TEST_CODE_GEN
CompletionBuilderType.DOCUMENTATION
),
)
)
Expand All @@ -52,4 +54,4 @@ class SingleProjectCodePickerTest {
})
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ class Group<T>(val name: String) {
@Test
fun `should extract KDoc comments when valid code provided`() {
// When
val result = KotlinCommentBuilder.extractKdocComments(kotlinCode)
val result = KotlinCommentBuilder().extractKdocComments(kotlinCode)

// Then
result.size shouldBe 3
Expand Down Expand Up @@ -79,5 +79,12 @@ class Group<T>(val name: String) {
* @constructor Creates an empty group.
*/
""".trimIndent()

result[1].unique().output shouldBe """
/**
* Adds a [member] to this group.
* @return the new size of the group.
*/
""".trimIndent()
}
}

0 comments on commit 1e411a7

Please sign in to comment.