From 1e411a71e7965e6754bf5300cfa98c12069796b0 Mon Sep 17 00:00:00 2001 From: Phodal Huang Date: Fri, 29 Dec 2023 16:37:25 +0800 Subject: [PATCH] feat(comment): make kotlin comment support works #1 --- .../unitmesh/pick/SingleProjectCodePicker.kt | 2 +- .../builder/comment/KotlinCommentBuilder.kt | 103 ++++++++++++------ .../unitmesh/pick/option/InsPickerOption.kt | 17 +-- ...odeContextStrategy.kt => BuildPlanType.kt} | 12 +- .../bizcode/CommentsStrategyBuilder.kt | 14 ++- .../cc/unitmesh/pick/worker/WorkerContext.kt | 4 +- .../pick/SingleProjectCodePickerTest.kt | 6 +- .../comment/KotlinCommentBuilderTest.kt | 9 +- 8 files changed, 112 insertions(+), 55 deletions(-) rename unit-picker/src/main/kotlin/cc/unitmesh/pick/strategy/{BizCodeContextStrategy.kt => BuildPlanType.kt} (75%) diff --git a/unit-picker/src/main/kotlin/cc/unitmesh/pick/SingleProjectCodePicker.kt b/unit-picker/src/main/kotlin/cc/unitmesh/pick/SingleProjectCodePicker.kt index 1a786600..1b9664e6 100644 --- a/unit-picker/src/main/kotlin/cc/unitmesh/pick/SingleProjectCodePicker.kt +++ b/unit-picker/src/main/kotlin/cc/unitmesh/pick/SingleProjectCodePicker.kt @@ -72,7 +72,7 @@ class SingleProjectCodePicker(private val config: InsPickerOption) { val workerManager = WorkerManager( WorkerContext( - config.codeContextStrategies, + config.buildPlan, config.codeQualityTypes, config.insOutputConfig, pureDataFileName = config.pureDataFileName(), diff --git a/unit-picker/src/main/kotlin/cc/unitmesh/pick/builder/comment/KotlinCommentBuilder.kt b/unit-picker/src/main/kotlin/cc/unitmesh/pick/builder/comment/KotlinCommentBuilder.kt index 813d46f5..f5ba0015 100644 --- a/unit-picker/src/main/kotlin/cc/unitmesh/pick/builder/comment/KotlinCommentBuilder.kt +++ b/unit-picker/src/main/kotlin/cc/unitmesh/pick/builder/comment/KotlinCommentBuilder.kt @@ -15,7 +15,12 @@ class KotlinCommentBuilder : CommentBuilder { override val docInstruction: DocInstruction = DocInstruction.KOTLIN override fun build(code: String, container: CodeContainer): List { - val posComments = extractKdocComments(code) + val posComments = try { + extractKdocComments(code) + } catch (e: Exception) { + emptyList() + } + val startLineCommentMap: Map = posComments.associateBy { it.position.StopLine } @@ -25,47 +30,77 @@ class KotlinCommentBuilder : CommentBuilder { container.DataStructures.forEach { dataStruct -> val classComment = startLineCommentMap[dataStruct.Position.StartLine - 1] - classComment?.let { comments.add(ClassCommentIns(dataStruct, it)) } + classComment?.let { comments.add(ClassCommentIns(dataStruct, it, language = "kotlin")) } dataStruct.Functions .filter { it.Name != "constructor" && it.Name != "PrimaryConstructor" } .forEach { function -> val functionComment = startLineCommentMap[function.Position.StartLine - 1] - functionComment?.let { comments.add(MethodCommentIns(function, it, dataStruct)) } + functionComment?.let { + comments.add( + MethodCommentIns( + function, + it, + dataStruct, + language = "kotlin" + ) + ) + } } } return comments } - companion object { - private val commentPattern = Regex("""\s+/\*\*([^*]|(\*+[^*/]))*\*+/""") - - /** - * Extracts the Kotlin documentation comments (KDoc) from the given code. - * - * @param code the Kotlin code from which to extract the KDoc comments - * @return a list of pairs, where each pair contains the line number and the extracted KDoc comment - */ - fun extractKdocComments(code: String): List { - val matches = commentPattern.findAll(code) - - val comments = mutableListOf() - - for (match in matches) { - val commentContent = match.value.trimIndent() - val startLine = code.substring(0, match.range.first).count { it == '\n' } + 1 - val stopLine = code.substring(0, match.range.last).count { it == '\n' } + 1 - val startLinePosition = match.range.first - code.lastIndexOf('\n', match.range.first) - 1 - val stopLinePosition = match.range.last - code.lastIndexOf('\n', match.range.last) - 1 - - val position = CodePosition(startLine, startLinePosition, stopLine, stopLinePosition) - val comment = CodeComment(commentContent, position) - comments.add(comment) - } + /** + * Extracts the Kotlin documentation comments (KDoc) from the given code. + * + * @param code the Kotlin code from which to extract the KDoc comments + * @return a list of pairs, where each pair contains the line number and the extracted KDoc comment + */ + fun extractKdocComments(code: String): List { + val pattern = Regex("""/\*\*[^*]*\*+([^/*][^*]*\*+)*/""") + + val matches = pattern.findAll(code) + + val comments = mutableListOf() + + for (match in matches) { + val commentContent = match.value.trimIndent() + val startLine = code.substring(0, match.range.first).count { it == '\n' } + 1 + val stopLine = code.substring(0, match.range.last).count { it == '\n' } + 1 + val startLinePosition = match.range.first - code.lastIndexOf('\n', match.range.first) - 1 + val stopLinePosition = match.range.last - code.lastIndexOf('\n', match.range.last) - 1 + + val position = CodePosition(startLine, startLinePosition, stopLine, stopLinePosition) + val content = reIndentComment(commentContent) + - return comments + val comment = CodeComment(content, position) + comments.add(comment) } + + return comments + } + + /// Re-indent the comment to remove the leading spaces. + private fun reIndentComment(content: String): String { + val lines = content.split("\n") + val indent = lines[1].takeWhile { it == ' ' } + val linesWithoutIndent = lines + .map { it.removePrefix(indent) } + + // except the first line, every line should have one leading space + val linesWithLeadingSpace = linesWithoutIndent + .mapIndexed { index, line -> + if (index == 0) { + line + } else { + " $line" + } + } + + return linesWithLeadingSpace.joinToString("\n") } } @@ -73,12 +108,13 @@ class KotlinCommentBuilder : CommentBuilder { data class ClassCommentIns( val dataStructure: CodeDataStruct, val comment: CodeComment, + val language: String, ) : TypedCommentIns() { override val builderLevel: CommentBuilderType = CommentBuilderType.CLASS_LEVEL override fun unique(): Instruction { - val instruction = "Write documentation for given class " + dataStructure.NodeName + " ." - val input = dataStructure.Content + val instruction = "Write documentation for given class " + dataStructure.NodeName + " .\n" + val input = "Code:\n```$language\n" + dataStructure.Content + "\n```" val output = comment.content return Instruction(instruction, input, output) @@ -90,12 +126,13 @@ data class MethodCommentIns( val function: CodeFunction, val comment: CodeComment, val currentDataStruct: CodeDataStruct, + val language: String, ) : TypedCommentIns() { override val builderLevel: CommentBuilderType = CommentBuilderType.METHOD_LEVEL override fun unique(): Instruction { - val instruction = "Write documentation for given method" + function.Name + " ." - val input = "\n### Current class:\n" + currentDataStruct.toUml() + "\n###" + function.Content + val instruction = "Write documentation for given method " + function.Name + " .\n" + val input = "### Current class:\n" + currentDataStruct.toUml() + "\n###\n" + "Code:\n```$language\n" + currentDataStruct.Content + "\n```" val output = comment.content return Instruction(instruction, input, output) diff --git a/unit-picker/src/main/kotlin/cc/unitmesh/pick/option/InsPickerOption.kt b/unit-picker/src/main/kotlin/cc/unitmesh/pick/option/InsPickerOption.kt index 6dc088bf..472a9585 100644 --- a/unit-picker/src/main/kotlin/cc/unitmesh/pick/option/InsPickerOption.kt +++ b/unit-picker/src/main/kotlin/cc/unitmesh/pick/option/InsPickerOption.kt @@ -1,6 +1,6 @@ package cc.unitmesh.pick.option -import cc.unitmesh.pick.strategy.BizCodeContextStrategy +import cc.unitmesh.pick.strategy.BuildPlanType import cc.unitmesh.core.completion.CompletionBuilderType import cc.unitmesh.pick.threshold.InsQualityThreshold import cc.unitmesh.quality.CodeQualityType @@ -25,7 +25,7 @@ const val MAX_COMPLETION_EACH_FILE = 10 * @property branch The branch of the repository. Default value is "master". * @property language The programming language of the code in the repository. Default value is "java". * @property baseDir The base directory where the datasets are stored. Default value is "datasets". - * @property codeContextStrategies The strategies to determine the code context. Default value is [BizCodeContextStrategy.RELATED_CODE]. + * @property buildPlan The strategies to determine the code context. Default value is [BuildPlanType.RELATED_CODE]. * Possible values are: * - [CodeContextStrategy.SIMILAR_CHUNKS]: Determines the code context based on similar code chunks. * - [CodeContextStrategy.RELATED_CODE]: Determines the code context based on related code. @@ -53,15 +53,16 @@ data class InsPickerOption( val language: String = "java", val baseDir: String = "datasets", /** - * The [BizCodeContextStrategy], suggest to be one of:. + * The [BuildPlanType], suggest to be one of:. * - * - [BizCodeContextStrategy.SIMILAR_CHUNKS] - * - [BizCodeContextStrategy.RELATED_CODE] + * - [BuildPlanType.SIMILAR_CHUNKS] + * - [BuildPlanType.RELATED_CODE] * */ - val codeContextStrategies: List = listOf( - BizCodeContextStrategy.RELATED_CODE, - BizCodeContextStrategy.SIMILAR_CHUNKS, + val buildPlan: List = listOf( +// BuildPlanType.RELATED_CODE, +// BuildPlanType.SIMILAR_CHUNKS, + BuildPlanType.COMMENT, ), /** * The [CompletionBuilderType], which will according you IDE strategy to generate the type. diff --git a/unit-picker/src/main/kotlin/cc/unitmesh/pick/strategy/BizCodeContextStrategy.kt b/unit-picker/src/main/kotlin/cc/unitmesh/pick/strategy/BuildPlanType.kt similarity index 75% rename from unit-picker/src/main/kotlin/cc/unitmesh/pick/strategy/BizCodeContextStrategy.kt rename to unit-picker/src/main/kotlin/cc/unitmesh/pick/strategy/BuildPlanType.kt index c5337020..80383c1f 100644 --- a/unit-picker/src/main/kotlin/cc/unitmesh/pick/strategy/BizCodeContextStrategy.kt +++ b/unit-picker/src/main/kotlin/cc/unitmesh/pick/strategy/BuildPlanType.kt @@ -8,19 +8,19 @@ import cc.unitmesh.pick.worker.job.JobContext import kotlinx.serialization.SerializationException /** - * The `CodeContextStrategy` enum class represents different strategies for generating code context in AutoDev. + * The `BuildPlanType` enum class represents different strategies for generating code context in AutoDev. * * There are two available strategies: - * 1. [BizCodeContextStrategy.SIMILAR_CHUNKS]: This prompt is used with pre-built context for unsupported languages. + * 1. [BuildPlanType.SIMILAR_CHUNKS]: This prompt is used with pre-built context for unsupported languages. * It allows AutoDev to generate code context with similar code chunks builder. - * 2. [BizCodeContextStrategy.RELATED_CODE]: This prompt is used with pre-built context. It allows AutoDev to + * 2. [BuildPlanType.RELATED_CODE]: This prompt is used with pre-built context. It allows AutoDev to * generate code context with similar code builder. * * The strategies are used through the `builder` function, which takes an `InstructionContext` parameter and returns an `InstructionBuilder` object. * * Note that the `builder` function throws a `SerializationException` if the prompt is unknown. */ -enum class BizCodeContextStrategy { +enum class BuildPlanType { /** * the AutoDev with pre-build context for un-support language */ @@ -34,14 +34,14 @@ enum class BizCodeContextStrategy { /** * the AutoDev with pre-build context */ - EMPTY_CONTEXT + COMMENT ; fun builder(context: JobContext): CodeStrategyBuilder { return mapOf( SIMILAR_CHUNKS to SimilarChunksStrategyBuilder(context), RELATED_CODE to RelatedCodeStrategyBuilder(context), - EMPTY_CONTEXT to CommentsStrategyBuilder(context), + COMMENT to CommentsStrategyBuilder(context), )[this] ?: throw SerializationException("Unknown message type: $this") } } diff --git a/unit-picker/src/main/kotlin/cc/unitmesh/pick/strategy/bizcode/CommentsStrategyBuilder.kt b/unit-picker/src/main/kotlin/cc/unitmesh/pick/strategy/bizcode/CommentsStrategyBuilder.kt index 580aa32f..1dec6ba7 100644 --- a/unit-picker/src/main/kotlin/cc/unitmesh/pick/strategy/bizcode/CommentsStrategyBuilder.kt +++ b/unit-picker/src/main/kotlin/cc/unitmesh/pick/strategy/bizcode/CommentsStrategyBuilder.kt @@ -1,6 +1,8 @@ package cc.unitmesh.pick.strategy.bizcode +import cc.unitmesh.core.SupportedLang import cc.unitmesh.core.completion.TypedIns +import cc.unitmesh.pick.builder.comment.KotlinCommentBuilder import cc.unitmesh.pick.strategy.base.CodeStrategyBuilder import cc.unitmesh.pick.worker.job.JobContext @@ -8,10 +10,18 @@ import cc.unitmesh.pick.worker.job.JobContext * 对于其它不需要上下文的 AI 能力,需要实现一个空的上下文策略,如注释生成。 */ class CommentsStrategyBuilder(val context: JobContext) : CodeStrategyBuilder { + private val kotlinCommentBuilder = KotlinCommentBuilder() + override fun build(): List { - val language = context.job.fileSummary.language.lowercase() val container = context.job.container ?: return emptyList() - return emptyList() + val language = context.project.language + return when (language) { + SupportedLang.JAVA -> kotlinCommentBuilder.build(context.job.code, container) + SupportedLang.TYPESCRIPT -> TODO() + SupportedLang.KOTLIN -> { + kotlinCommentBuilder.build(context.job.code, container) + } + } } } diff --git a/unit-picker/src/main/kotlin/cc/unitmesh/pick/worker/WorkerContext.kt b/unit-picker/src/main/kotlin/cc/unitmesh/pick/worker/WorkerContext.kt index 16e47867..9737f82b 100644 --- a/unit-picker/src/main/kotlin/cc/unitmesh/pick/worker/WorkerContext.kt +++ b/unit-picker/src/main/kotlin/cc/unitmesh/pick/worker/WorkerContext.kt @@ -1,7 +1,7 @@ package cc.unitmesh.pick.worker import cc.unitmesh.pick.option.InsOutputConfig -import cc.unitmesh.pick.strategy.BizCodeContextStrategy +import cc.unitmesh.pick.strategy.BuildPlanType import cc.unitmesh.core.completion.CompletionBuilderType import cc.unitmesh.pick.project.ProjectContext import cc.unitmesh.pick.threshold.InsQualityThreshold @@ -12,7 +12,7 @@ import org.jetbrains.annotations.TestOnly @Serializable data class WorkerContext( - val codeContextStrategies: List, + val codeContextStrategies: List, val qualityTypes: List, val insOutputConfig: InsOutputConfig, val pureDataFileName: String, diff --git a/unit-picker/src/test/kotlin/cc/unitmesh/pick/SingleProjectCodePickerTest.kt b/unit-picker/src/test/kotlin/cc/unitmesh/pick/SingleProjectCodePickerTest.kt index 4b1de65e..6e0ffe44 100644 --- a/unit-picker/src/test/kotlin/cc/unitmesh/pick/SingleProjectCodePickerTest.kt +++ b/unit-picker/src/test/kotlin/cc/unitmesh/pick/SingleProjectCodePickerTest.kt @@ -3,6 +3,7 @@ package cc.unitmesh.pick import cc.unitmesh.core.Instruction import cc.unitmesh.core.completion.CompletionBuilderType import cc.unitmesh.pick.option.InsPickerOption +import cc.unitmesh.pick.strategy.BuildPlanType import kotlinx.coroutines.runBlocking import kotlinx.serialization.encodeToString import kotlinx.serialization.json.Json @@ -38,8 +39,9 @@ class SingleProjectCodePickerTest { language = "kotlin", url = root, maxTokenLength = 8192, + buildPlan = listOf(BuildPlanType.COMMENT), completionTypes = listOf( - CompletionBuilderType.TEST_CODE_GEN + CompletionBuilderType.DOCUMENTATION ), ) ) @@ -52,4 +54,4 @@ class SingleProjectCodePickerTest { }) } } -} \ No newline at end of file +} diff --git a/unit-picker/src/test/kotlin/cc/unitmesh/pick/builder/comment/KotlinCommentBuilderTest.kt b/unit-picker/src/test/kotlin/cc/unitmesh/pick/builder/comment/KotlinCommentBuilderTest.kt index 29916b03..86dc3cc5 100644 --- a/unit-picker/src/test/kotlin/cc/unitmesh/pick/builder/comment/KotlinCommentBuilderTest.kt +++ b/unit-picker/src/test/kotlin/cc/unitmesh/pick/builder/comment/KotlinCommentBuilderTest.kt @@ -35,7 +35,7 @@ class Group(val name: String) { @Test fun `should extract KDoc comments when valid code provided`() { // When - val result = KotlinCommentBuilder.extractKdocComments(kotlinCode) + val result = KotlinCommentBuilder().extractKdocComments(kotlinCode) // Then result.size shouldBe 3 @@ -79,5 +79,12 @@ class Group(val name: String) { * @constructor Creates an empty group. */ """.trimIndent() + + result[1].unique().output shouldBe """ + /** + * Adds a [member] to this group. + * @return the new size of the group. + */ + """.trimIndent() } }