From 1db7ca2fc8d91984ea378f74373cd03d13b84d04 Mon Sep 17 00:00:00 2001 From: Brian Finney Date: Tue, 13 Feb 2024 09:36:47 -0800 Subject: [PATCH] feat: add ruby support (#24) --- codeqai/constants.py | 1 + codeqai/repo.py | 1 + codeqai/treesitter/__init__.py | 1 + codeqai/treesitter/treesitter_rb.py | 38 +++++++++++++++++++++++++++++ codeqai/utils.py | 3 +++ 5 files changed, 44 insertions(+) create mode 100644 codeqai/treesitter/treesitter_rb.py diff --git a/codeqai/constants.py b/codeqai/constants.py index 4ca591a..9ab7e23 100644 --- a/codeqai/constants.py +++ b/codeqai/constants.py @@ -16,6 +16,7 @@ class Language(Enum): SCALA = "scala" LUA = "lua" HASKELL = "haskell" + RUBY = "ruby" UNKNOWN = "unknown" diff --git a/codeqai/repo.py b/codeqai/repo.py index 7d8c77b..b0693a3 100644 --- a/codeqai/repo.py +++ b/codeqai/repo.py @@ -114,6 +114,7 @@ def get_commit_hash(file_path): ".rst", ".md", ".hs", + ".rb", ] BLACKLIST_FILES = [ "package-lock.json", diff --git a/codeqai/treesitter/__init__.py b/codeqai/treesitter/__init__.py index 8090da4..059212a 100644 --- a/codeqai/treesitter/__init__.py +++ b/codeqai/treesitter/__init__.py @@ -8,5 +8,6 @@ from codeqai.treesitter.treesitter_js import TreesitterJavascript from codeqai.treesitter.treesitter_kt import TreesitterKotlin from codeqai.treesitter.treesitter_py import TreesitterPython +from codeqai.treesitter.treesitter_rb import TreesitterRuby from codeqai.treesitter.treesitter_rs import TreesitterRust from codeqai.treesitter.treesitter_ts import TreesitterTypescript diff --git a/codeqai/treesitter/treesitter_rb.py b/codeqai/treesitter/treesitter_rb.py new file mode 100644 index 0000000..7ba25e7 --- /dev/null +++ b/codeqai/treesitter/treesitter_rb.py @@ -0,0 +1,38 @@ +import tree_sitter + +from codeqai.constants import Language +from codeqai.treesitter.treesitter import Treesitter, TreesitterMethodNode +from codeqai.treesitter.treesitter_registry import TreesitterRegistry + + +class TreesitterRuby(Treesitter): + def __init__(self): + super().__init__( + Language.RUBY, "method", "identifier", "comment" + ) + + def parse(self, file_bytes: bytes) -> list[TreesitterMethodNode]: + return super().parse(file_bytes) + + def _query_all_methods( + self, + node: tree_sitter.Node, + ): + methods = [] + if node.type == self.method_declaration_identifier: + doc_comment = [] + doc_comment_node = node + while ( + doc_comment_node.prev_named_sibling + and doc_comment_node.prev_named_sibling.type == self.doc_comment_identifier + ): + doc_comment_node = doc_comment_node.prev_named_sibling + doc_comment.insert(0, doc_comment_node.text.decode()) + methods.append({"method": node, "doc_comment": "\n".join(doc_comment)}) + else: + for child in node.children: + methods.extend(self._query_all_methods(child)) + return methods + +# Register the TreesitterRuby class in the registry +TreesitterRegistry.register_treesitter(Language.RUBY, TreesitterRuby) diff --git a/codeqai/utils.py b/codeqai/utils.py index 7ab0243..aa1e253 100644 --- a/codeqai/utils.py +++ b/codeqai/utils.py @@ -33,6 +33,7 @@ def get_programming_language(file_extension: str) -> Language: ".c": Language.C, ".cs": Language.C_SHARP, ".hs": Language.HASKELL, + ".rb": Language.RUBY, } return language_mapping.get(file_extension, Language.UNKNOWN) @@ -72,6 +73,8 @@ def get_langchain_language(language: Language): return text_splitter.Language.CSHARP elif language == Language.HASKELL: return text_splitter.Language.HASKELL + elif language == Language.RUBY: + return text_splitter.Language.RUBY else: return None