Refactor merge_text_spaces

Genentech · Sep 28, 2023 · 63430b9 · 63430b9
1 parent ffc5389
commit 63430b9
Show file tree

Hide file tree

Showing 6 changed files with 77 additions and 92 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -7,13 +7,13 @@ rd2markdown 0.0.7
 * `rd2markdown.section` now returns output as a block to make sure it is 
   wrapped with new lines and renders sections properly.
 
-* add `merge_text_whitespaces` functions that merges standalone TEXT 
+* add `merge_text_spaces` functions that merges standalone TEXT 
   spaces (`" "`) into surrounding TEXT tags. Excessive spaces are not rendered
   by markdown and therefore can be appended to other meaningful tags. (To later
   get reduced by the "clean_text_whitespace")
 
-* Add `levels` parameter to most of the tags that could be nestes in subesction
-  to make sure proper ammount of `#` is appended.
+* Add `levels` parameter to most of the tags that could be nested in subsection
+  to make sure proper amount of `#` is appended.
 
 rd2markdown 0.0.6
 -----------------

diff --git a/R/map_rd2markdown.R b/R/map_rd2markdown.R
@@ -12,8 +12,8 @@
 map_rd2markdown <- function(frags, ..., collapse = NULL) {
   out <- lapply(frags, rd2markdown, ...)
   out <- Filter(Negate(is.null), out)
-  out <- clean_text_newlines(out)
   out <- clean_text_whitespace(out)
+  out <- clean_text_newlines(out)
   if (!is.null(collapse)) out <- paste0(out, collapse = collapse)
   out
 }
@@ -77,7 +77,7 @@ clean_text_newlines <- function(x) {
 #' @keywords internal
 #'
 clean_text_whitespace <- function(x) {
-  x <- merge_text_whitespaces(x)
+  x <- merge_text_spaces(x)
   n <- length(x)
   if (n < 1) return(x)
 
@@ -114,72 +114,65 @@ clean_text_whitespace <- function(x) {
 #' @examples
 #' \dontrun{
 #' x <- list(" a ", "`b`", " ", "  ", " c ", block(), "\n", "e   \n", "f")
-#' merge_text_whitespaces(x)
-#' # list(" a ", "`b`", "    c ", block(), "\n", "e   \n", "f")
+#' merge_text_spaces(x)
+#' # list(" a ", "`b`   ", " c ", block(), "\n", "e   \n", "f")
 #'
 #' x <- list(block(), " a ", "`b`", " c ", " ", block(), "   ",  " d ", "e ", "f", block())
-#' merge_text_whitespaces(x)
+#' merge_text_spaces(x)
 #' # list(block(), "a ", "`b`", " c  ", block(), "    d ", "e ", "f", block())
 #' }
 #'
 #' @keywords internal
-merge_text_whitespaces <- function(x) {
-  ws <- vapply(x, function(y) grepl("^ *$", y), FUN.VALUE = logical(1))
-  ib <- vapply(x, function(y) is_block(y),  FUN.VALUE = logical(1))
-  ws <- ws & !ib
+merge_text_spaces <- function(x) {
+  if (length(x) == 0) return(x)
+  # assumes length x > 0
 
-  seqs <- rle(ws)
-  values <- seqs$values
-  lengths <- seqs$lengths
+  # Split to sublists on blocks
+  blocks <- vlapply(x, is_block)
+  sub_x <- cumsum(blocks)
+  if (sub_x[1] == 1) {
+    # If block was a first element, force it to the separate group
+    sub_x[1] <- 0
+  }
 
-  merged <- list()
-  i <- 1
-  while (i <= length(values)) {
-
-    # Current value was already included
-    if (lengths[i] == 0) {
-      i <- i + 1
-      next()
-    }
-    # Get index corresponding to end of current's value subsequence
-    ws_inds <- sum(lengths[seq(i)])
-    # Get subvector of the same value of ws
-    x_sub <- x[seq(ws_inds - lengths[i] + 1, ws_inds)]
-
-    if (!values[i]) {
-      merged <- append(merged, x_sub)
+  unlist(unname(lapply(split(x, sub_x), function(y) {
+    # Block in y can only ever be in the first position
+    # Let's ignore it for now
+    n_y <- length(y)
+    y_wo_blocks <- if (is_block(y[[1]])) {
+      prefix <- y[[1]]
+      y[-1]
     } else {
-      merged_ws <- paste0(x_sub, collapse = "")
-      if (isFALSE(values[i + 1]) && !is_block(x[[ws_inds + 1]])) {
-        merged_ws <- paste0(merged_ws, x[[ws_inds + 1]], collapse = "")
-        # We already include one of the next FALSE, therefore we reduce
-        # corresponding lengths value by 1 and increment current one
-        lengths[i + 1] <- lengths[i + 1] - 1
-        lengths[i] <- lengths[i] + 1
-        merged <- append(merged, list(merged_ws))
-        # If next element is a block, we will try to append to the previous.
-      } else if (isFALSE(values[i - 1])) {
-        # Previous value was FALSE, therefore we can use last value in
-        # merged to append to unless it is block.
-        if (!is_block(merged[[length(merged)]])) {
-          merged_ws <- paste0(merged[[length(merged)]], merged_ws, collapse = "")
-          merged[[length(merged)]] <- merged_ws
-        } else {
-        # Previous value was a block. Merge standalone space TEXT
-          merged_ws <- paste0(x_sub, collapse = "")
-          merged <- append(merged, list(merged_ws))
-        }
-      } else {
-        # Entire x consists of ws only
-        merged_ws <- paste0(x_sub, collapse = "")
-        merged <- append(merged, merged_ws)
-      }
+      prefix <- NULL
+      y
     }
 
-    i <- i + 1
-  }
-
-  merged
+    spaces <- grepl("^ *$", y_wo_blocks, perl = TRUE)
+    is_trailing <- cumsum(!spaces) > 1  # after at least 1 non-space character
+    groups <- cumsum(!spaces & is_trailing)
+    # collapse non-block groups, maintain first-in-group's attributes
+    y_collapsed <- unname(lapply(split(y_wo_blocks, groups), function(group) {
+      atts <- attributes(group[[1]])
+      result <- paste(group, collapse = "")
+      attributes(result) <- atts
+      result
+    }))
+
+    if (is.null(prefix)) y_collapsed else append(list(prefix), y_collapsed)
+  })), recursive = FALSE)
+}
+
+collapse_spaces <- function(x) {
+  spaces <- grepl("^ *$", x)
+  is_trailing <- cumsum(!spaces) > 1  # after at least 1 non-space character
+  groups <- cumsum(!spaces & is_trailing)
+  # collapse non-block groups, maintain first-in-group's attributes
+  unname(lapply(split(x, groups), function(group) {
+    atts <- attributes(group[[1]])
+    result <- paste(group, collapse = "")
+    attributes(result) <- atts
+    result
+  }))
 }
 
 is_consecutive <- function(x) {

diff --git a/R/rd2markdown.R b/R/rd2markdown.R
@@ -155,14 +155,11 @@ rd2markdown.title <- function(x, fragments = c(), ...) {
 
 #' @param title optional section title
 #' @param level optional level parameter. 2L by default
-#' @param child_level optional parameter that will be set as level to
-#' "child" tags parsed using `map_rd2markdown`. It allows proper rendering
-#' of sections and subsections.
 #'
 #' @exportS3Method
 #' @rdname rd2markdown
-rd2markdown.description <- function(x, fragments = c(), ..., title = NULL, level = 2L, child_level = level) {
-  out <- map_rd2markdown(x, ..., collapse = "", level = child_level)
+rd2markdown.description <- function(x, fragments = c(), ..., title = NULL, level = 2L) {
+  out <- map_rd2markdown(x, ..., collapse = "", level = level + 1)
   out <- gsub("\n{1,}$", "", out)
   out <- gsub("\n*\\\\lifecycle\\{(.*)\\}\n*", "\n\nLifecycle: *\\1*\n\n", out)
   with_md_title(out, title, level, ...)
@@ -211,7 +208,7 @@ rd2markdown.section <- function(x, fragments = c(), ..., level = 2L) {
   # We need to make sure that sections are separated with new lines signs.
   # As markdown ignroes extra new line signs when rendering docuemnts. We are
   # safe to do it greedily
-  block(rd2markdown.description(x[[2]], fragments = fragments, ..., title = title, level = level, child_level = level + 1))
+  block(rd2markdown.description(x[[2]], fragments = fragments, ..., title = title, level = level))
 }
 
 #' @exportS3Method

diff --git a/man/merge_text_whitespaces.Rd → man/merge_text_spaces.Rd b/man/merge_text_whitespaces.Rd → man/merge_text_spaces.Rd
diff --git a/man/rd2markdown.Rd b/man/rd2markdown.Rd
diff --git a/tests/testthat/test-map_rd2markdown.R b/tests/testthat/test-map_rd2markdown.R
@@ -6,29 +6,35 @@ test_that("A list of fragments can be mapped over to individually map to markdow
   expect_silent(md <- map_rd2markdown(rd, strwrap = 30L))
 })
 
-test_that("merge_text_whitespaces works as expected", {
+test_that("merge_text_spaces works as expected", {
   x <- list("    ", " ", "   ")
   expect_equal(
-    merge_text_whitespaces(x),
+    merge_text_spaces(x),
     list("        ")
   )
 
   x <- list(" a ", "`b`", "  ", "  ", " c ", " ", " ")
   expect_equal(
-    merge_text_whitespaces(x),
-    list(" a ", "`b`", "     c   ")
+    merge_text_spaces(x),
+    list(" a ", "`b`    ", " c   ")
   )
 
   x <- list(" a ", "`b`", " ", "  ", "    c ", block(), "\n", "e   \n", "f")
   expect_equal(
-    merge_text_whitespaces(x),
-    list(" a ", "`b`", "       c ", block(), "\n", "e   \n", "f")
+    merge_text_spaces(x),
+    list(" a ", "`b`   ", "    c ", block(), "\n", "e   \n", "f")
   )
 
   x <- list(" a ", "`b`", " ", "  ", " c ", block(), " ", "   ", block())
   expect_equal(
-    merge_text_whitespaces(x),
-    list(" a ", "`b`", "    c ", block(), "    ", block())
+    merge_text_spaces(x),
+    list(" a ", "`b`   ", " c ", block(), "    ", block())
+  )
+
+  x <- list(block(), " ", "      ", block())
+  expect_equal(
+    merge_text_spaces(x),
+    list(block(), "       ", block())
   )
 })