Skip to content

Commit

Permalink
feat: add ruby support (#24)
Browse files Browse the repository at this point in the history
  • Loading branch information
yenif authored Feb 13, 2024
1 parent ce0de81 commit 1db7ca2
Show file tree
Hide file tree
Showing 5 changed files with 44 additions and 0 deletions.
1 change: 1 addition & 0 deletions codeqai/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ class Language(Enum):
SCALA = "scala"
LUA = "lua"
HASKELL = "haskell"
RUBY = "ruby"
UNKNOWN = "unknown"


Expand Down
1 change: 1 addition & 0 deletions codeqai/repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ def get_commit_hash(file_path):
".rst",
".md",
".hs",
".rb",
]
BLACKLIST_FILES = [
"package-lock.json",
Expand Down
1 change: 1 addition & 0 deletions codeqai/treesitter/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,6 @@
from codeqai.treesitter.treesitter_js import TreesitterJavascript
from codeqai.treesitter.treesitter_kt import TreesitterKotlin
from codeqai.treesitter.treesitter_py import TreesitterPython
from codeqai.treesitter.treesitter_rb import TreesitterRuby
from codeqai.treesitter.treesitter_rs import TreesitterRust
from codeqai.treesitter.treesitter_ts import TreesitterTypescript
38 changes: 38 additions & 0 deletions codeqai/treesitter/treesitter_rb.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import tree_sitter

from codeqai.constants import Language
from codeqai.treesitter.treesitter import Treesitter, TreesitterMethodNode
from codeqai.treesitter.treesitter_registry import TreesitterRegistry


class TreesitterRuby(Treesitter):
def __init__(self):
super().__init__(
Language.RUBY, "method", "identifier", "comment"
)

def parse(self, file_bytes: bytes) -> list[TreesitterMethodNode]:
return super().parse(file_bytes)

def _query_all_methods(
self,
node: tree_sitter.Node,
):
methods = []
if node.type == self.method_declaration_identifier:
doc_comment = []
doc_comment_node = node
while (
doc_comment_node.prev_named_sibling
and doc_comment_node.prev_named_sibling.type == self.doc_comment_identifier
):
doc_comment_node = doc_comment_node.prev_named_sibling
doc_comment.insert(0, doc_comment_node.text.decode())
methods.append({"method": node, "doc_comment": "\n".join(doc_comment)})
else:
for child in node.children:
methods.extend(self._query_all_methods(child))
return methods

# Register the TreesitterRuby class in the registry
TreesitterRegistry.register_treesitter(Language.RUBY, TreesitterRuby)
3 changes: 3 additions & 0 deletions codeqai/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ def get_programming_language(file_extension: str) -> Language:
".c": Language.C,
".cs": Language.C_SHARP,
".hs": Language.HASKELL,
".rb": Language.RUBY,
}
return language_mapping.get(file_extension, Language.UNKNOWN)

Expand Down Expand Up @@ -72,6 +73,8 @@ def get_langchain_language(language: Language):
return text_splitter.Language.CSHARP
elif language == Language.HASKELL:
return text_splitter.Language.HASKELL
elif language == Language.RUBY:
return text_splitter.Language.RUBY
else:
return None

Expand Down

0 comments on commit 1db7ca2

Please sign in to comment.