[WIP] More flexible dims (#993)

* [dims] allow more flexible handling of dims * [write] hackish enforce dims solution * [write] A bit less of a hack * cleanup * fix logic * [doc] update man pages * cleanup * Handle both equally "A1;B2" and "A1,B2" * [dims] Fix col/row order * [tests] add enforce test * cleanups * cleanup * [tests] fix dims_to_dataframe() test. It is not pretty and simply returns all cells separated by a comma * another attempt to fix this * [utils] add argument `single` to `rowcol_to_dims()` * [tests] more tests * [tests] fix tests * [doc] update roxygen2 * fix lintr * update NEWS * Make the behavior a bit more consistent
JanMarvin · Apr 21, 2024 · 886a141 · 886a141
1 parent 385d7a3
commit 886a141
Show file tree

Hide file tree

Showing 18 changed files with 483 additions and 74 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -5,6 +5,14 @@
 * Helper to read sensitivity labels from files and apply them to workbooks. 
 [983](https://github.com/JanMarvin/openxlsx2/pull/983)
 
+* It is now possible to pass non consecutive dims like `"A1:B1,C2:D2"` to various style helpers like `wb_add_fill()`. In addition it is now possible to write a data set into a predefined dims region using `enforce = TRUE`. This handles either `","` or `";"` as cell separator. [993](https://github.com/JanMarvin/openxlsx2/pull/993)
+
+```r
+# force a dataset into a specific cell dimension
+wb <- wb_workbook()$add_worksheet()
+wb$add_data(dims = "I2:J2;A1:B2;G5:H6", x = matrix(1:8, 4, 2), enforce = TRUE)
+```
+
 ## Fixes
 
 * Allow writing data frames with zero rows. [987](https://github.com/JanMarvin/openxlsx2/pull/987)

diff --git a/R/RcppExports.R b/R/RcppExports.R
@@ -28,8 +28,16 @@ copy <- function(x) {
     .Call(`_openxlsx2_copy`, x)
 }
 
-dims_to_df <- function(rows, cols, fill) {
-    .Call(`_openxlsx2_dims_to_df`, rows, cols, fill)
+validate_dims <- function(input) {
+    .Call(`_openxlsx2_validate_dims`, input)
+}
+
+needed_cells <- function(range) {
+    .Call(`_openxlsx2_needed_cells`, range)
+}
+
+dims_to_df <- function(rows, cols, filled, fill) {
+    .Call(`_openxlsx2_dims_to_df`, rows, cols, filled, fill)
 }
 
 long_to_wide <- function(z, tt, zz) {
@@ -40,8 +48,8 @@ is_charnum <- function(x) {
     .Call(`_openxlsx2_is_charnum`, x)
 }
 
-wide_to_long <- function(z, vtyps, zz, ColNames, start_col, start_row, ref, string_nums, na_null, na_missing, na_strings, inline_strings, c_cm) {
-    invisible(.Call(`_openxlsx2_wide_to_long`, z, vtyps, zz, ColNames, start_col, start_row, ref, string_nums, na_null, na_missing, na_strings, inline_strings, c_cm))
+wide_to_long <- function(z, vtyps, zz, ColNames, start_col, start_row, ref, string_nums, na_null, na_missing, na_strings, inline_strings, c_cm, dims) {
+    invisible(.Call(`_openxlsx2_wide_to_long`, z, vtyps, zz, ColNames, start_col, start_row, ref, string_nums, na_null, na_missing, na_strings, inline_strings, c_cm, dims))
 }
 
 #' @param colnames a vector of the names of the data frame

diff --git a/R/class-workbook-wrappers.R b/R/class-workbook-wrappers.R
@@ -115,6 +115,7 @@ wb_save <- function(wb, file = NULL, overwrite = TRUE, path = NULL) {
 #'   looks if `options(openxlsx2.na.strings)` is set. Otherwise [na_strings()]
 #'   uses the special `#N/A` value within the workbook.
 #' @param inline_strings write characters as inline strings
+#' @param enforce enforce that selected dims is filled. For this to work, `dims` must match `x`
 #' @param ... additional arguments
 #' @export
 #' @details Formulae written using [wb_add_formula()] to a Workbook object will
@@ -216,6 +217,7 @@ wb_add_data <- function(
     remove_cell_style = FALSE,
     na.strings        = na_strings(),
     inline_strings    = TRUE,
+    enforce           = FALSE,
     ...
 ) {
   assert_workbook(wb)
@@ -235,6 +237,7 @@ wb_add_data <- function(
     remove_cell_style = remove_cell_style,
     na.strings        = na.strings,
     inline_strings    = inline_strings,
+    enforce           = enforce,
     ...               = ...
   )
 }
@@ -332,7 +335,7 @@ wb_add_data_table <- function(
     remove_cell_style = remove_cell_style,
     na.strings        = na.strings,
     inline_strings    = inline_strings,
-    total_row        = total_row,
+    total_row         = total_row,
     ...               = ...
   )
 }
@@ -568,6 +571,7 @@ wb_add_slicer <- function(
 #'   Add this, if you see "@" inserted into your formulas.
 #' @param apply_cell_style Should we write cell styles to the workbook?
 #' @param remove_cell_style Should we keep the cell style?
+#' @param enforce enforce dims
 #' @param ... additional arguments
 #' @return The workbook, invisibly.
 #' @family workbook wrappers
@@ -599,6 +603,7 @@ wb_add_formula <- function(
     cm                = FALSE,
     apply_cell_style  = TRUE,
     remove_cell_style = FALSE,
+    enforce           = FALSE,
     ...
 ) {
   assert_workbook(wb)
@@ -612,6 +617,7 @@ wb_add_formula <- function(
     cm                = cm,
     apply_cell_style  = apply_cell_style,
     remove_cell_style = remove_cell_style,
+    enforce           = enforce,
     ...               = ...
   )
 }

diff --git a/R/class-workbook.R b/R/class-workbook.R
@@ -1278,6 +1278,7 @@ wbWorkbook <- R6::R6Class(
     #' @param na.strings Value used for replacing `NA` values from `x`. Default
     #'   `na_strings()` uses the special `#N/A` value within the workbook.
     #' @param inline_strings write characters as inline strings
+    #' @param enforce enforce that selected dims is filled. For this to work, `dims` must match `x`
     #' @param return The `wbWorkbook` object
     add_data = function(
         sheet            = current_sheet(),
@@ -1295,6 +1296,7 @@ wbWorkbook <- R6::R6Class(
         remove_cell_style = FALSE,
         na.strings        = na_strings(),
         inline_strings    = TRUE,
+        enforce           = FALSE,
         ...
       ) {
 
@@ -1324,7 +1326,8 @@ wbWorkbook <- R6::R6Class(
         apply_cell_style  = apply_cell_style,
         remove_cell_style = remove_cell_style,
         na.strings        = na.strings,
-        inline_strings    = inline_strings
+        inline_strings    = inline_strings,
+        enforce           = enforce
       )
       invisible(self)
     },
@@ -1885,6 +1888,7 @@ wbWorkbook <- R6::R6Class(
     #' @param cm cm
     #' @param apply_cell_style applyCellStyle
     #' @param remove_cell_style if writing into existing cells, should the cell style be removed?
+    #' @param enforce enforce dims
     #' @return The `wbWorkbook` object
     add_formula = function(
         sheet             = current_sheet(),
@@ -1896,6 +1900,7 @@ wbWorkbook <- R6::R6Class(
         cm                = FALSE,
         apply_cell_style  = TRUE,
         remove_cell_style = FALSE,
+        enforce           = FALSE,
         ...
     ) {
 
@@ -1910,7 +1915,8 @@ wbWorkbook <- R6::R6Class(
         array           = array,
         cm              = cm,
         applyCellStyle  = apply_cell_style,
-        removeCellStyle = remove_cell_style
+        removeCellStyle = remove_cell_style,
+        enforce         = enforce
       )
       invisible(self)
     },

diff --git a/R/utils.R b/R/utils.R
@@ -163,6 +163,7 @@ random_string <- function(n = 1, length = 16, pattern = "[A-Za-z0-9]", keep_seed
 #' @param as_integer If the output should be returned as integer, (defaults to string)
 #' @param row a numeric vector of rows
 #' @param col a numeric or character vector of cols
+#' @param single argument indicating if [rowcol_to_dims()] returns a single cell dimension
 #' @returns
 #'   * A `dims` string for `_to_dim` i.e  "A1:A1"
 #'   * A list of rows and columns for `to_rowcol`
@@ -178,8 +179,10 @@ NULL
 dims_to_rowcol <- function(x, as_integer = FALSE) {
 
   dims <- x
-  if (length(x) == 1 && grepl(";", x))
-    dims <- unlist(strsplit(x, ";"))
+  if (length(x) == 1) {
+    if (grepl(";", x)) dims <- unlist(strsplit(x, ";"))
+    if (grepl(",", x)) dims <- unlist(strsplit(x, ","))
+  }
 
   cols_out <- NULL
   rows_out <- NULL
@@ -224,7 +227,7 @@ dims_to_rowcol <- function(x, as_integer = FALSE) {
 
 #' @rdname dims_helper
 #' @export
-rowcol_to_dims <- function(row, col) {
+rowcol_to_dims <- function(row, col, single = TRUE) {
 
   # no assert for col. will output character anyways
   # assert_class(row, "numeric") - complains if integer
@@ -238,7 +241,11 @@ rowcol_to_dims <- function(row, col) {
   max_row <- max(row)
 
   # we will always return something like "A1:A1", even for single cells
-  stringi::stri_join(min_col, min_row, ":", max_col, max_row)
+  if (single) {
+    return(stringi::stri_join(min_col, min_row, ":", max_col, max_row))
+  } else {
+    return(paste0(vapply(int2col(col_int), FUN = function(x) stringi::stri_join(x, min_row, ":", x, max_row), ""), collapse = ","))
+  }
 
 }
 

diff --git a/R/wb_functions.R b/R/wb_functions.R
@@ -8,18 +8,28 @@
 #' @export
 dims_to_dataframe <- function(dims, fill = FALSE) {
 
-  if (grepl(";", dims)) {
+  has_dim_sep <- FALSE
+  if (any(grepl(";", dims))) {
     dims <- unlist(strsplit(dims, ";"))
+    has_dim_sep <- TRUE
+  }
+  if (any(grepl(",", dims))) {
+    dims <- unlist(strsplit(dims, ","))
+    has_dim_sep <- TRUE
   }
 
   rows_out <- NULL
   cols_out <- NULL
+  filled   <- NULL
   for (dim in dims) {
 
     if (!grepl(":", dim)) {
       dim <- paste0(dim, ":", dim)
     }
 
+    if (length(dims) > 1)
+      filled <- c(filled, needed_cells(dim))
+
     if (identical(dim, "Inf:-Inf")) {
       # This should probably be fixed elsewhere?
       stop("dims are inf:-inf")
@@ -40,46 +50,64 @@ dims_to_dataframe <- function(dims, fill = FALSE) {
     }
   }
 
-  # create data frame from rows/
+  if (has_dim_sep) {
+    cols_out <- int2col(sort(col2int(cols_out)))
+    rows_out <- sort(rows_out)
+  }
+
   dims_to_df(
-    rows = rows_out,
-    cols = cols_out,
-    fill = fill
+    rows   = rows_out,
+    cols   = cols_out,
+    filled = filled,
+    fill   = fill
   )
 }
 
 #' Create dimensions from dataframe
 #'
 #' Use [wb_dims()]
 #' @param df dataframe with spreadsheet columns and rows
+#' @param dim_break split the dims?
 #' @examples
 #'  df <- dims_to_dataframe("A1:D5;F1:F6;D8", fill = TRUE)
 #'  dataframe_to_dims(df)
 #' @keywords internal
 #' @export
-dataframe_to_dims <- function(df) {
-
-  # get continuous sequences of columns and rows in df
-  v <- as.integer(rownames(df))
-  rows <- split(v, cumsum(diff(c(-Inf, v)) != 1))
-
-  v <- col2int(colnames(df))
-  cols <- split(colnames(df), cumsum(diff(c(-Inf, v)) != 1))
-
-  # combine columns and rows to construct dims
-  out <- NULL
-  for (col in seq_along(cols)) {
-    for (row in seq_along(rows)) {
-      tmp <- paste0(
-        cols[[col]][[1]], rows[[row]][[1]],
-        ":",
-        rev(cols[[col]])[[1]],  rev(rows[[row]])[[1]]
-      )
-      out <- c(out, tmp)
-    }
-  }
+dataframe_to_dims <- function(df, dim_break = FALSE) {
+
+  if (dim_break) {
+
+    dims <- dims_to_dataframe(dataframe_to_dims(df, dim_break = FALSE), fill = TRUE)
+
+    mm <- as.matrix(df)
+    mm[mm != "" | is.na(mm)] <- 1
+    mm[mm == ""] <- 0
+
+    matrix <- matrix(as.numeric(mm), nrow(mm), ncol(mm))
+    dimnames(matrix) <- list(rownames(mm), colnames(mm))
 
-  paste0(out, collapse = ";")
+    # remove columns and rows not in df
+    dims <- dims[, colnames(dims) %in% colnames(matrix)]
+    dims <- dims[rownames(dims) %in% rownames(matrix), ]
+
+    out <- dims[matrix == 1]
+
+    return(paste0(out, collapse = ","))
+
+  } else {
+
+    rows <- as.integer(rownames(df))
+    cols <- colnames(df)
+
+    tmp <- paste0(
+      cols[[1]][[1]], rows[[1]][[1]],
+      ":",
+      rev(cols)[[1]][[1]], rev(rows)[[1]][[1]]
+    )
+
+    return(tmp)
+
+  }
 }
 
 #' function to estimate the column type.