Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add tree-sitter tag parser for #4 #62

Draft
wants to merge 5 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: pkgstats
Title: Metrics of R Packages
Version: 0.1.6.017
Version: 0.1.6.021
Authors@R:
person("Mark", "Padgham", , "[email protected]", role = c("aut", "cre"),
comment = c(ORCID = "0000-0003-2172-5265"))
Expand All @@ -25,6 +25,8 @@ Imports:
methods,
readr,
sys,
treesitter,
treesitter.r,
withr
Suggests:
callr,
Expand Down
38 changes: 31 additions & 7 deletions R/external-calls.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

#' Map calls from within each function to external packages
#'
#' @param path Path to package being analysed
Expand All @@ -8,19 +7,37 @@
#' @noRd
external_call_network <- function (tags_r, path, pkg_name) {

calls <- extract_call_content (tags_r)
tags_are_treesitter <- "grammar_type" %in% names (tags_r)

if (tags_are_treesitter) {
calls <- data.frame (
tags_line = tags_r$start,
call = tags_r$node_text,
tag = tags_r$grammar_type,
file = paste0 ("R/", basename (tags_r$file))
)
tags_r$kind <- "function"
index <- seq_len (nrow (tags_r))
} else { # ctags
calls <- extract_call_content (tags_r)
index <- calls$tags_line

Check warning on line 23 in R/external-calls.R

View check run for this annotation

Codecov / codecov/patch

R/external-calls.R#L22-L23

Added lines #L22 - L23 were not covered by tests
}

if (length (calls) == 0L || nrow (calls) == 0L) {
return (NULL)
}

calls$kind <- tags_r$kind [calls$tags_line]
calls$start <- tags_r$start [calls$tags_line]
calls$end <- tags_r$end [calls$tags_line]
calls$kind <- tags_r$kind [index]
calls$start <- tags_r$start [index]
calls$end <- tags_r$end [index]

calls$package <- NA_character_
pkg_fns <- unique (tags_r$tag [tags_r$kind == "function"])
pkg_fns <- pkg_fns [which (!grepl ("^anonFunc", pkg_fns))]
if (tags_are_treesitter) {
pkg_fns <- unique (tags_r$fn_name)
} else {
pkg_fns <- unique (tags_r$tag [tags_r$kind == "function"])
pkg_fns <- pkg_fns [which (!grepl ("^anonFunc", pkg_fns))]

Check warning on line 39 in R/external-calls.R

View check run for this annotation

Codecov / codecov/patch

R/external-calls.R#L38-L39

Added lines #L38 - L39 were not covered by tests
}
calls$package [which (calls$call %in% pkg_fns)] <- pkg_name

calls <- add_base_recommended_pkgs (calls)
Expand All @@ -39,6 +56,13 @@

calls <- calls [which (!is.na (calls$package)), ]

# Some of these from tree-sitter flag sub-setting operations as calls;
# these are removed here:
index <- grep ("\\\"|\\(|\\)|\\]|\\]", calls$call)
if (length (index) > 0L) {
calls <- calls [-index, ]
}

rownames (calls) <- NULL

return (calls)
Expand Down
91 changes: 91 additions & 0 deletions R/tag-data-tree-sitter.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
#' Get tags using tree-sitter
#'
#' Currently extracts tags from the "R" directory only, and only for R-language files.
#'
#' @inheritParams tags_data
#' @noRd
get_treesitter_tags <- function (path) {

language <- treesitter.r::language ()
parser <- treesitter::parser (language)

tree_sitter_calls (parser, path)
}

tree_sitter_calls <- function (parser, path, d = "R") {
flist <- fs::dir_ls (fs::path (path, d), pattern = "\\.R$")
fn_calls <- lapply (flist, function (f) {
parse_list <- control_parse (f)
fn_calls <- lapply (parse_list, function (p) {
txt <- paste0 (as.character (p), collapse = "\n")
tree <- treesitter::parser_parse (parser, txt)
walk_one_tree (tree)
})
res <- do.call (rbind, fn_calls)
cbind (file = rep (f, nrow (res)), res)
})
fn_calls <- do.call (rbind, fn_calls)
rownames (fn_calls) <- NULL
return (fn_calls)
}

walk_one_tree <- function (tree) {

it <- treesitter::tree_walk (tree)

get_row_start_end <- function (it) {
point_start <- treesitter::node_start_point (it$node ())
point_end <- treesitter::node_end_point (it$node ())
c (treesitter::point_row (point_start), treesitter::point_row (point_end))
}

reached_foot <- FALSE
first_identifier <- TRUE
get_next_open <- FALSE

grammar_types <- node_text <- next_open <- fn_name <- character (0L)
row_start <- row_end <- integer (0L)

while (!reached_foot) {
field_name <- NA_to_char (it$field_name ())
grammar_type <- NA_to_char (treesitter::node_grammar_type (it$node ()))
if (field_name == "function" && !grammar_type %in% c ("call", "extract_operator")) {
grammar_types <- c (grammar_types, grammar_type)
node_text <- c (node_text, treesitter::node_text (it$node ()))
row <- get_row_start_end (it)
row_start <- c (row_start, row [1L])
row_end <- c (row_end, row [1L])
get_next_open <- TRUE
} else if (grammar_type == "identifier" && first_identifier) {
fn_name <- treesitter::node_text (it$node ())
first_identifier <- FALSE
} else if (get_next_open && field_name == "open") {
next_open <- c (next_open, grammar_type)
get_next_open <- FALSE
}

if (it$goto_first_child ()) next
if (it$goto_next_sibling ()) next

retracing <- TRUE
while (retracing) {
if (!it$goto_parent ()) {
retracing <- FALSE
reached_foot <- TRUE
}
if (it$goto_next_sibling ()) {
retracing <- FALSE
}
}
}

# This line ensures fn_name is also length 0 when no data are parsed:
fn_name <- rep (fn_name, length (grammar_types))
data.frame (
fn_name = fn_name,
grammar_type = grammar_types,
node_text = node_text,
start = row_start,
end = row_end
) [which (next_open != "["), ]
}
4 changes: 2 additions & 2 deletions R/tag-data.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

#' use ctags and gtags to parse call data
#'
#' @param path Path to local repository
Expand Down Expand Up @@ -61,7 +60,8 @@ tags_data <- function (path, has_tabs = NULL, pkg_name = NULL) {

tags_r <- withr::with_dir (path, get_ctags ("R", has_tabs))

external_calls <- external_call_network (tags_r, path, pkg_name)
tags_treesitter <- get_treesitter_tags (path)
external_calls <- external_call_network (tags_treesitter, path, pkg_name)

tags_src <- withr::with_dir (path, get_ctags ("src", has_tabs))
tags_inst <- withr::with_dir (path, get_ctags ("inst", has_tabs))
Expand Down
1 change: 1 addition & 0 deletions R/utils.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
NA_to_char <- function (i) ifelse (is.na (i), "", i)

get_Rd_metadata <- utils::getFromNamespace (".Rd_get_metadata", "tools") # nolint

Expand Down
2 changes: 1 addition & 1 deletion codemeta.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
"codeRepository": "https://github.com/ropensci-review-tools/pkgstats",
"issueTracker": "https://github.com/ropensci-review-tools/pkgstats/issues",
"license": "https://spdx.org/licenses/GPL-3.0",
"version": "0.1.6.017",
"version": "0.1.6.021",
"programmingLanguage": {
"@type": "ComputerLanguage",
"name": "R",
Expand Down