Skip to content

Commit

Permalink
Merge branch 'master' into cran
Browse files Browse the repository at this point in the history
  • Loading branch information
jonclayden committed Dec 4, 2023
2 parents 7ba9e21 + f24b696 commit 676a68a
Show file tree
Hide file tree
Showing 7 changed files with 45 additions and 8 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ jobs:
# Steps represent a sequence of tasks that will be executed as part of the job
steps:
# Check out the repository under $GITHUB_WORKSPACE
- uses: actions/checkout@v2
- uses: actions/checkout@v3

# Install and set up R
- uses: r-lib/actions/setup-r@v2
Expand All @@ -48,7 +48,7 @@ jobs:
# Upload the check directory as an artefact on failure
- name: Upload check results
if: failure()
uses: actions/upload-artifact@v1.0.0
uses: actions/upload-artifact@v3
with:
name: ${{ matrix.os }}-results
path: Rcheck
Expand Down
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: ore
Version: 1.7.3.1
Date: 2023-01-16
Version: 1.7.4.1
Date: 2023-12-04
Title: An R Interface to the Onigmo Regular Expression Library
Author: Jon Clayden, based on Onigmo by K. Kosako and K. Takata
Maintainer: Jon Clayden <code@clayden.org>
Expand Down
9 changes: 9 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,15 @@ Significant changes to the "ore" package are laid out below for each release.

===============================================================================

VERSION 1.7.4

- Named groups would not be propagated to match matrices unless the regex was
pre-compiled using `ore()`. This has been corrected.
- A compiler warning about a `printf`-type format specification has been
resolved.

===============================================================================

VERSION 1.7.3.1

- A potential mismatch between the C compiler configured for R and the one used
Expand Down
6 changes: 6 additions & 0 deletions inst/tinytest/test-10-match.R
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,9 @@ expect_equal(results[,1,2], c("h","s",NA,"e"))
expect_equal(unlist(matches(results)), c("Th","is","is","te","st"))
expect_equal(groups(results)[[1]], matrix(c("T","h","i","s"),ncol=2,byrow=TRUE))
expect_stdout(print(results), "5 matches in 4 strings")

# Check named groups
regexString <- "(?<numbers>\\d+)"
regex <- ore(regexString)
expect_equal(dimnames(groups(ore_search(regex, "1.7"))), list(NULL,"numbers"))
expect_equal(dimnames(groups(ore_search(regexString, "1.7"))), list(NULL,"numbers"))
3 changes: 2 additions & 1 deletion src/compile.c
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,8 @@ static char * ore_build_pattern (SEXP pattern_)
return pattern;
}

static Rboolean ore_group_name_vector (SEXP vec, regex_t *regex)
// Insert group names into an R character vector of appropriate size
Rboolean ore_group_name_vector (SEXP vec, regex_t *regex)
{
const int n_groups = onig_number_of_captures(regex);

Expand Down
2 changes: 2 additions & 0 deletions src/compile.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ regex_t * ore_retrieve (SEXP regex_, encoding_t *encoding);

void ore_free (regex_t *regex, SEXP source);

Rboolean ore_group_name_vector (SEXP vec, regex_t *regex);

SEXP ore_build (SEXP pattern_, SEXP options_, SEXP encoding_name_, SEXP syntax_name_);

#endif
25 changes: 22 additions & 3 deletions src/match.c
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,6 @@ void ore_char_matrix (SEXP mat, const char **data, const int n_regions, const in
SEXP ore_search_all (SEXP regex_, SEXP text_, SEXP all_, SEXP start_, SEXP simplify_, SEXP incremental_)
{
// Convert R objects to C types
SEXP group_names = getAttrib(regex_, install("groupNames"));
const Rboolean all = asLogical(all_) == TRUE;
const Rboolean simplify = asLogical(simplify_) == TRUE;
const Rboolean incremental = (asLogical(incremental_) == TRUE) && !all;
Expand All @@ -271,6 +270,26 @@ SEXP ore_search_all (SEXP regex_, SEXP text_, SEXP all_, SEXP start_, SEXP simpl
text_t *text = ore_text(text_);
regex_t *regex = ore_retrieve(regex_, text->encoding);

SEXP group_names = R_NilValue;
Rboolean group_names_protected = FALSE;
if (inherits(regex_, "ore"))
group_names = getAttrib(regex_, install("groupNames"));
else
{
const int n_groups = onig_number_of_captures(regex);
if (n_groups > 0)
{
PROTECT(group_names = NEW_CHARACTER(n_groups));
if (ore_group_name_vector(group_names, regex))
group_names_protected = TRUE;
else
{
UNPROTECT(1);
group_names = R_NilValue;
}
}
}

// Obtain the length of the start vector (which will be recycled if necessary)
const int start_len = length(start_);

Expand Down Expand Up @@ -299,7 +318,7 @@ SEXP ore_search_all (SEXP regex_, SEXP text_, SEXP all_, SEXP start_, SEXP simpl
}
else if (!ore_consistent_encodings(text_element->encoding->onig_enc, regex->enc))
{
warning("Encoding of text element %d does not match the regex", i+1);
warning("Encoding of text element %lu does not match the regex", (unsigned long) i+1);
SET_ELEMENT(results, i, R_NilValue);
continue;
}
Expand Down Expand Up @@ -440,7 +459,7 @@ SEXP ore_search_all (SEXP regex_, SEXP text_, SEXP all_, SEXP start_, SEXP simpl
ore_free(regex, regex_);
ore_text_done(text);

UNPROTECT(using_file ? 1 : 2);
UNPROTECT(2 + group_names_protected - using_file);

// Return just the first (and only) element of the full list, if requested
if (simplify && text->length == 1)
Expand Down

0 comments on commit 676a68a

Please sign in to comment.