Merge branch 'master' into cran

jonclayden · Dec 4, 2023 · 676a68a · 676a68a
2 parents 7ba9e21 + f24b696
commit 676a68a
Show file tree

Hide file tree

Showing 7 changed files with 45 additions and 8 deletions.
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -21,7 +21,7 @@ jobs:
     # Steps represent a sequence of tasks that will be executed as part of the job
     steps:
     # Check out the repository under $GITHUB_WORKSPACE
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v3
 
     # Install and set up R
     - uses: r-lib/actions/setup-r@v2
@@ -48,7 +48,7 @@ jobs:
     # Upload the check directory as an artefact on failure
     - name: Upload check results
       if: failure()
-      uses: actions/upload-artifact@v1.0.0
+      uses: actions/upload-artifact@v3
       with:
         name: ${{ matrix.os }}-results
         path: Rcheck

diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: ore
-Version: 1.7.3.1
-Date: 2023-01-16
+Version: 1.7.4.1
+Date: 2023-12-04
 Title: An R Interface to the Onigmo Regular Expression Library
 Author: Jon Clayden, based on Onigmo by K. Kosako and K. Takata
 Maintainer: Jon Clayden <code@clayden.org>

diff --git a/NEWS b/NEWS
@@ -2,6 +2,15 @@ Significant changes to the "ore" package are laid out below for each release.
 
 ===============================================================================
 
+VERSION 1.7.4
+
+- Named groups would not be propagated to match matrices unless the regex was
+  pre-compiled using `ore()`. This has been corrected.
+- A compiler warning about a `printf`-type format specification has been
+  resolved.
+
+===============================================================================
+
 VERSION 1.7.3.1
 
 - A potential mismatch between the C compiler configured for R and the one used

diff --git a/inst/tinytest/test-10-match.R b/inst/tinytest/test-10-match.R
@@ -58,3 +58,9 @@ expect_equal(results[,1,2], c("h","s",NA,"e"))
 expect_equal(unlist(matches(results)), c("Th","is","is","te","st"))
 expect_equal(groups(results)[[1]], matrix(c("T","h","i","s"),ncol=2,byrow=TRUE))
 expect_stdout(print(results), "5 matches in 4 strings")
+
+# Check named groups
+regexString <- "(?<numbers>\\d+)"
+regex <- ore(regexString)
+expect_equal(dimnames(groups(ore_search(regex, "1.7"))), list(NULL,"numbers"))
+expect_equal(dimnames(groups(ore_search(regexString, "1.7"))), list(NULL,"numbers"))
diff --git a/src/compile.c b/src/compile.c
@@ -150,7 +150,8 @@ static char * ore_build_pattern (SEXP pattern_)
     return pattern;
 }
 
-static Rboolean ore_group_name_vector (SEXP vec, regex_t *regex)
+// Insert group names into an R character vector of appropriate size
+Rboolean ore_group_name_vector (SEXP vec, regex_t *regex)
 {
     const int n_groups = onig_number_of_captures(regex);
 

diff --git a/src/compile.h b/src/compile.h
@@ -10,6 +10,8 @@ regex_t * ore_retrieve (SEXP regex_, encoding_t *encoding);
 
 void ore_free (regex_t *regex, SEXP source);
 
+Rboolean ore_group_name_vector (SEXP vec, regex_t *regex);
+
 SEXP ore_build (SEXP pattern_, SEXP options_, SEXP encoding_name_, SEXP syntax_name_);
 
 #endif
diff --git a/src/match.c b/src/match.c
@@ -252,7 +252,6 @@ void ore_char_matrix (SEXP mat, const char **data, const int n_regions, const in
 SEXP ore_search_all (SEXP regex_, SEXP text_, SEXP all_, SEXP start_, SEXP simplify_, SEXP incremental_)
 {
     // Convert R objects to C types
-    SEXP group_names = getAttrib(regex_, install("groupNames"));
     const Rboolean all = asLogical(all_) == TRUE;
     const Rboolean simplify = asLogical(simplify_) == TRUE;
     const Rboolean incremental = (asLogical(incremental_) == TRUE) && !all;
@@ -271,6 +270,26 @@ SEXP ore_search_all (SEXP regex_, SEXP text_, SEXP all_, SEXP start_, SEXP simpl
     text_t *text = ore_text(text_);
     regex_t *regex = ore_retrieve(regex_, text->encoding);
 
+    SEXP group_names = R_NilValue;
+    Rboolean group_names_protected = FALSE;
+    if (inherits(regex_, "ore"))
+        group_names = getAttrib(regex_, install("groupNames"));
+    else
+    {
+        const int n_groups = onig_number_of_captures(regex);
+        if (n_groups > 0)
+        {
+            PROTECT(group_names = NEW_CHARACTER(n_groups));
+            if (ore_group_name_vector(group_names, regex))
+                group_names_protected = TRUE;
+            else
+            {
+                UNPROTECT(1);
+                group_names = R_NilValue;
+            }
+        }
+    }
+
     // Obtain the length of the start vector (which will be recycled if necessary)
     const int start_len = length(start_);
 
@@ -299,7 +318,7 @@ SEXP ore_search_all (SEXP regex_, SEXP text_, SEXP all_, SEXP start_, SEXP simpl
         }
         else if (!ore_consistent_encodings(text_element->encoding->onig_enc, regex->enc))
         {
-            warning("Encoding of text element %d does not match the regex", i+1);
+            warning("Encoding of text element %lu does not match the regex", (unsigned long) i+1);
             SET_ELEMENT(results, i, R_NilValue);
             continue;
         }
@@ -440,7 +459,7 @@ SEXP ore_search_all (SEXP regex_, SEXP text_, SEXP all_, SEXP start_, SEXP simpl
     ore_free(regex, regex_);
     ore_text_done(text);
 
-    UNPROTECT(using_file ? 1 : 2);
+    UNPROTECT(2 + group_names_protected - using_file);
 
     // Return just the first (and only) element of the full list, if requested
     if (simplify && text->length == 1)