From 63430b9cc61af6baba86537677d5ea262ac62e25 Mon Sep 17 00:00:00 2001
From: maksymis <32574056+maksymiuks@users.noreply.github.com>
Date: Thu, 28 Sep 2023 15:56:01 +0200
Subject: [PATCH] Refactor merge_text_spaces

---
 NEWS.md                                       |   6 +-
 R/map_rd2markdown.R                           | 107 ++++++++----------
 R/rd2markdown.R                               |   9 +-
 ...xt_whitespaces.Rd => merge_text_spaces.Rd} |  12 +-
 man/rd2markdown.Rd                            |  13 +--
 tests/testthat/test-map_rd2markdown.R         |  22 ++--
 6 files changed, 77 insertions(+), 92 deletions(-)
 rename man/{merge_text_whitespaces.Rd => merge_text_spaces.Rd} (77%)

diff --git a/NEWS.md b/NEWS.md
index 8d8b1dc..a12970a 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -7,13 +7,13 @@ rd2markdown 0.0.7
 * `rd2markdown.section` now returns output as a block to make sure it is 
   wrapped with new lines and renders sections properly.
   
-* add `merge_text_whitespaces` functions that merges standalone TEXT 
+* add `merge_text_spaces` functions that merges standalone TEXT 
   spaces (`" "`) into surrounding TEXT tags. Excessive spaces are not rendered
   by markdown and therefore can be appended to other meaningful tags. (To later
   get reduced by the "clean_text_whitespace")
   
-* Add `levels` parameter to most of the tags that could be nestes in subesction
-  to make sure proper ammount of `#` is appended.
+* Add `levels` parameter to most of the tags that could be nested in subsection
+  to make sure proper amount of `#` is appended.
 
 rd2markdown 0.0.6
 -----------------
diff --git a/R/map_rd2markdown.R b/R/map_rd2markdown.R
index fc1f7c6..f43070b 100644
--- a/R/map_rd2markdown.R
+++ b/R/map_rd2markdown.R
@@ -12,8 +12,8 @@
 map_rd2markdown <- function(frags, ..., collapse = NULL) {
   out <- lapply(frags, rd2markdown, ...)
   out <- Filter(Negate(is.null), out)
-  out <- clean_text_newlines(out)
   out <- clean_text_whitespace(out)
+  out <- clean_text_newlines(out)
   if (!is.null(collapse)) out <- paste0(out, collapse = collapse)
   out
 }
@@ -77,7 +77,7 @@ clean_text_newlines <- function(x) {
 #' @keywords internal
 #'
 clean_text_whitespace <- function(x) {
-  x <- merge_text_whitespaces(x)
+  x <- merge_text_spaces(x)
   n <- length(x)
   if (n < 1) return(x)
 
@@ -114,72 +114,65 @@ clean_text_whitespace <- function(x) {
 #' @examples
 #' \dontrun{
 #' x <- list(" a ", "`b`", " ", "  ", " c ", block(), "\n", "e   \n", "f")
-#' merge_text_whitespaces(x)
-#' # list(" a ", "`b`", "    c ", block(), "\n", "e   \n", "f")
+#' merge_text_spaces(x)
+#' # list(" a ", "`b`   ", " c ", block(), "\n", "e   \n", "f")
 #'
 #' x <- list(block(), " a ", "`b`", " c ", " ", block(), "   ",  " d ", "e ", "f", block())
-#' merge_text_whitespaces(x)
+#' merge_text_spaces(x)
 #' # list(block(), "a ", "`b`", " c  ", block(), "    d ", "e ", "f", block())
 #' }
 #'
 #' @keywords internal
-merge_text_whitespaces <- function(x) {
-  ws <- vapply(x, function(y) grepl("^ *$", y), FUN.VALUE = logical(1))
-  ib <- vapply(x, function(y) is_block(y),  FUN.VALUE = logical(1))
-  ws <- ws & !ib
+merge_text_spaces <- function(x) {
+  if (length(x) == 0) return(x)
+  # assumes length x > 0
   
-  seqs <- rle(ws)
-  values <- seqs$values
-  lengths <- seqs$lengths
+  # Split to sublists on blocks
+  blocks <- vlapply(x, is_block)
+  sub_x <- cumsum(blocks)
+  if (sub_x[1] == 1) {
+    # If block was a first element, force it to the separate group
+    sub_x[1] <- 0
+  }
   
-  merged <- list()
-  i <- 1
-  while (i <= length(values)) {
-    
-    # Current value was already included
-    if (lengths[i] == 0) {
-      i <- i + 1
-      next()
-    }
-    # Get index corresponding to end of current's value subsequence
-    ws_inds <- sum(lengths[seq(i)])
-    # Get subvector of the same value of ws
-    x_sub <- x[seq(ws_inds - lengths[i] + 1, ws_inds)]
-    
-    if (!values[i]) {
-      merged <- append(merged, x_sub)
+  unlist(unname(lapply(split(x, sub_x), function(y) {
+    # Block in y can only ever be in the first position
+    # Let's ignore it for now
+    n_y <- length(y)
+    y_wo_blocks <- if (is_block(y[[1]])) {
+      prefix <- y[[1]]
+      y[-1]
     } else {
-      merged_ws <- paste0(x_sub, collapse = "")
-      if (isFALSE(values[i + 1]) && !is_block(x[[ws_inds + 1]])) {
-        merged_ws <- paste0(merged_ws, x[[ws_inds + 1]], collapse = "")
-        # We already include one of the next FALSE, therefore we reduce
-        # corresponding lengths value by 1 and increment current one
-        lengths[i + 1] <- lengths[i + 1] - 1
-        lengths[i] <- lengths[i] + 1
-        merged <- append(merged, list(merged_ws))
-        # If next element is a block, we will try to append to the previous.
-      } else if (isFALSE(values[i - 1])) {
-        # Previous value was FALSE, therefore we can use last value in
-        # merged to append to unless it is block.
-        if (!is_block(merged[[length(merged)]])) {
-          merged_ws <- paste0(merged[[length(merged)]], merged_ws, collapse = "")
-          merged[[length(merged)]] <- merged_ws
-        } else {
-        # Previous value was a block. Merge standalone space TEXT
-          merged_ws <- paste0(x_sub, collapse = "")
-          merged <- append(merged, list(merged_ws))
-        }
-      } else {
-        # Entire x consists of ws only
-        merged_ws <- paste0(x_sub, collapse = "")
-        merged <- append(merged, merged_ws)
-      }
+      prefix <- NULL
+      y
     }
     
-    i <- i + 1
-  }
-  
-  merged
+    spaces <- grepl("^ *$", y_wo_blocks, perl = TRUE)
+    is_trailing <- cumsum(!spaces) > 1  # after at least 1 non-space character
+    groups <- cumsum(!spaces & is_trailing)
+    # collapse non-block groups, maintain first-in-group's attributes
+    y_collapsed <- unname(lapply(split(y_wo_blocks, groups), function(group) {
+      atts <- attributes(group[[1]])
+      result <- paste(group, collapse = "")
+      attributes(result) <- atts
+      result
+    }))
+    
+    if (is.null(prefix)) y_collapsed else append(list(prefix), y_collapsed)
+  })), recursive = FALSE)
+}
+
+collapse_spaces <- function(x) {
+  spaces <- grepl("^ *$", x)
+  is_trailing <- cumsum(!spaces) > 1  # after at least 1 non-space character
+  groups <- cumsum(!spaces & is_trailing)
+  # collapse non-block groups, maintain first-in-group's attributes
+  unname(lapply(split(x, groups), function(group) {
+    atts <- attributes(group[[1]])
+    result <- paste(group, collapse = "")
+    attributes(result) <- atts
+    result
+  }))
 }
 
 is_consecutive <- function(x) {
diff --git a/R/rd2markdown.R b/R/rd2markdown.R
index 4606fd0..c39f9ea 100644
--- a/R/rd2markdown.R
+++ b/R/rd2markdown.R
@@ -155,14 +155,11 @@ rd2markdown.title <- function(x, fragments = c(), ...) {
 
 #' @param title optional section title
 #' @param level optional level parameter. 2L by default
-#' @param child_level optional parameter that will be set as level to
-#' "child" tags parsed using `map_rd2markdown`. It allows proper rendering
-#' of sections and subsections.
 #'
 #' @exportS3Method
 #' @rdname rd2markdown
-rd2markdown.description <- function(x, fragments = c(), ..., title = NULL, level = 2L, child_level = level) {
-  out <- map_rd2markdown(x, ..., collapse = "", level = child_level)
+rd2markdown.description <- function(x, fragments = c(), ..., title = NULL, level = 2L) {
+  out <- map_rd2markdown(x, ..., collapse = "", level = level + 1)
   out <- gsub("\n{1,}$", "", out)
   out <- gsub("\n*\\\\lifecycle\\{(.*)\\}\n*", "\n\nLifecycle: *\\1*\n\n", out)
   with_md_title(out, title, level, ...)
@@ -211,7 +208,7 @@ rd2markdown.section <- function(x, fragments = c(), ..., level = 2L) {
   # We need to make sure that sections are separated with new lines signs.
   # As markdown ignroes extra new line signs when rendering docuemnts. We are
   # safe to do it greedily
-  block(rd2markdown.description(x[[2]], fragments = fragments, ..., title = title, level = level, child_level = level + 1))
+  block(rd2markdown.description(x[[2]], fragments = fragments, ..., title = title, level = level))
 }
 
 #' @exportS3Method
diff --git a/man/merge_text_whitespaces.Rd b/man/merge_text_spaces.Rd
similarity index 77%
rename from man/merge_text_whitespaces.Rd
rename to man/merge_text_spaces.Rd
index 2ef08d2..90f8f8e 100644
--- a/man/merge_text_whitespaces.Rd
+++ b/man/merge_text_spaces.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/map_rd2markdown.R
-\name{merge_text_whitespaces}
-\alias{merge_text_whitespaces}
+\name{merge_text_spaces}
+\alias{merge_text_spaces}
 \title{Merge whitespace between text elements}
 \usage{
-merge_text_whitespaces(x)
+merge_text_spaces(x)
 }
 \arguments{
 \item{x}{\code{list} of Rd tags}
@@ -17,11 +17,11 @@ non-space element if possible.
 \examples{
 \dontrun{
 x <- list(" a ", "`b`", " ", "  ", " c ", block(), "\n", "e   \n", "f")
-merge_text_whitespaces(x)
-# list(" a ", "`b`", "    c ", block(), "\n", "e   \n", "f")
+merge_text_spaces(x)
+# list(" a ", "`b`   ", " c ", block(), "\n", "e   \n", "f")
 
 x <- list(block(), " a ", "`b`", " c ", " ", block(), "   ",  " d ", "e ", "f", block())
-merge_text_whitespaces(x)
+merge_text_spaces(x)
 # list(block(), "a ", "`b`", " c  ", block(), "    d ", "e ", "f", block())
 }
 
diff --git a/man/rd2markdown.Rd b/man/rd2markdown.Rd
index 5940100..8a276a8 100644
--- a/man/rd2markdown.Rd
+++ b/man/rd2markdown.Rd
@@ -86,14 +86,7 @@ rd2markdown(x, fragments = c(), ...)
 
 \method{rd2markdown}{title}(x, fragments = c(), ...)
 
-\method{rd2markdown}{description}(
-  x,
-  fragments = c(),
-  ...,
-  title = NULL,
-  level = 2L,
-  child_level = level
-)
+\method{rd2markdown}{description}(x, fragments = c(), ..., title = NULL, level = 2L)
 
 \method{rd2markdown}{author}(x, fragments = c(), ..., level = 2L)
 
@@ -179,10 +172,6 @@ markdown-formatted help.}
 
 \item{level}{optional level parameter. 2L by default}
 
-\item{child_level}{optional parameter that will be set as level to
-"child" tags parsed using \code{map_rd2markdown}. It allows proper rendering
-of sections and subsections.}
-
 \item{language}{language to use as code fence syntax highlighter}
 
 \item{topic}{usually, a \link{name} or character string specifying the
diff --git a/tests/testthat/test-map_rd2markdown.R b/tests/testthat/test-map_rd2markdown.R
index 1b37430..0ed8948 100644
--- a/tests/testthat/test-map_rd2markdown.R
+++ b/tests/testthat/test-map_rd2markdown.R
@@ -6,29 +6,35 @@ test_that("A list of fragments can be mapped over to individually map to markdow
   expect_silent(md <- map_rd2markdown(rd, strwrap = 30L))
 })
 
-test_that("merge_text_whitespaces works as expected", {
+test_that("merge_text_spaces works as expected", {
   x <- list("    ", " ", "   ")
   expect_equal(
-    merge_text_whitespaces(x),
+    merge_text_spaces(x),
     list("        ")
   )
   
   x <- list(" a ", "`b`", "  ", "  ", " c ", " ", " ")
   expect_equal(
-    merge_text_whitespaces(x),
-    list(" a ", "`b`", "     c   ")
+    merge_text_spaces(x),
+    list(" a ", "`b`    ", " c   ")
   )
   
   x <- list(" a ", "`b`", " ", "  ", "    c ", block(), "\n", "e   \n", "f")
   expect_equal(
-    merge_text_whitespaces(x),
-    list(" a ", "`b`", "       c ", block(), "\n", "e   \n", "f")
+    merge_text_spaces(x),
+    list(" a ", "`b`   ", "    c ", block(), "\n", "e   \n", "f")
   )
   
   x <- list(" a ", "`b`", " ", "  ", " c ", block(), " ", "   ", block())
   expect_equal(
-    merge_text_whitespaces(x),
-    list(" a ", "`b`", "    c ", block(), "    ", block())
+    merge_text_spaces(x),
+    list(" a ", "`b`   ", " c ", block(), "    ", block())
+  )
+
+  x <- list(block(), " ", "      ", block())
+  expect_equal(
+    merge_text_spaces(x),
+    list(block(), "       ", block())
   )
 })