From 832762a40431b7e3ffa6bd359302a528572c1b53 Mon Sep 17 00:00:00 2001 From: sonohgong <99127857+sonohgong@users.noreply.github.com> Date: Thu, 24 Feb 2022 17:46:55 +0100 Subject: [PATCH] limit amount of matches during replacement Skip matches beyond the original range when replacing a buffer with captures. --- crates/matcher/src/lib.rs | 28 ++++++++++++++++++++++++++++ crates/printer/src/util.rs | 18 ++++-------------- 2 files changed, 32 insertions(+), 14 deletions(-) diff --git a/crates/matcher/src/lib.rs b/crates/matcher/src/lib.rs index 5b43b0d85..d80680100 100644 --- a/crates/matcher/src/lib.rs +++ b/crates/matcher/src/lib.rs @@ -41,6 +41,7 @@ implementations. use std::fmt; use std::io; use std::ops; +use std::ops::Range; use std::u64; use crate::interpolate::interpolate; @@ -942,6 +943,33 @@ pub trait Matcher { Ok(()) } + /// Same as replace_with_captures_at, but limits the replacements to the + /// given range. Matches beyond the end of the range are skipped. + fn replace_with_captures_in_range( + &self, + haystack: &[u8], + range: &Range, + caps: &mut Self::Captures, + dst: &mut Vec, + mut append: F, + ) -> Result<(), Self::Error> + where + F: FnMut(&Self::Captures, &mut Vec) -> bool, + { + let mut last_match = range.start; + self.captures_iter_at(haystack, range.start, caps, |caps| { + let m = caps.get(0).unwrap(); + if m.start >= range.end { + return false; + } + dst.extend(&haystack[last_match..m.start]); + last_match = m.end; + append(caps, dst) + })?; + dst.extend(&haystack[last_match..range.end]); + Ok(()) + } + /// Returns true if and only if the matcher matches the given haystack. /// /// By default, this method is implemented by calling `shortest_match`. diff --git a/crates/printer/src/util.rs b/crates/printer/src/util.rs index 6875ee8b6..76eeb28ac 100644 --- a/crates/printer/src/util.rs +++ b/crates/printer/src/util.rs @@ -1,4 +1,5 @@ use std::borrow::Cow; +use std::cmp::max; use std::fmt; use std::io; use std::path::Path; @@ -63,13 +64,9 @@ impl Replacer { // See the giant comment in 'find_iter_at_in_context' below for why we // do this dance. let is_multi_line = searcher.multi_line_with_matcher(&matcher); - let mut extension_len = 0; if is_multi_line { if subject[range.end..].len() >= MAX_LOOK_AHEAD { - extension_len = MAX_LOOK_AHEAD; subject = &subject[..range.end + MAX_LOOK_AHEAD]; - } else { - extension_len = subject.len() - range.end; } } else { // When searching a single line, we should remove the line @@ -87,9 +84,9 @@ impl Replacer { matches.clear(); matcher - .replace_with_captures_at( + .replace_with_captures_in_range( subject, - range.start, + &range, caps, dst, |caps, dst| { @@ -107,14 +104,7 @@ impl Replacer { ) .map_err(io::Error::error_message)?; - if is_multi_line { - // Remove the subject buffer beyond the range.end. - // NOTE: could this be a bug with the current replace functionality? - // As an example, running `rg -U '\.\nA' --replace 'BB' -C 2` in this repo produces - // spurious extra lines of output in the replacement, as the extra bytes where - // not removed. This seems to fix that, but it's not pretty and might be wrong. - dst.truncate(dst.len() - extension_len); - } else { + if !is_multi_line { // Restore the line terminator. dst.extend(searcher.line_terminator().as_bytes()); }