From a7005f3bd7009fcda927fa50dedc90af50a237c8 Mon Sep 17 00:00:00 2001 From: sonohgong <99127857+sonohgong@users.noreply.github.com> Date: Tue, 22 Feb 2022 20:07:07 +0100 Subject: [PATCH 1/2] attempt to hack together a diff output --- crates/core/app.rs | 44 ++++ crates/core/args.rs | 22 +- crates/core/search.rs | 23 +- crates/printer/src/diff.rs | 438 +++++++++++++++++++++++++++++++++++++ crates/printer/src/lib.rs | 2 + crates/printer/src/util.rs | 16 ++ 6 files changed, 541 insertions(+), 4 deletions(-) create mode 100644 crates/printer/src/diff.rs diff --git a/crates/core/app.rs b/crates/core/app.rs index 215a2b74f..aef8b4962 100644 --- a/crates/core/app.rs +++ b/crates/core/app.rs @@ -566,6 +566,7 @@ pub fn all_args_and_flags() -> Vec { flag_crlf(&mut args); flag_debug(&mut args); flag_dfa_size_limit(&mut args); + flag_diff(&mut args); flag_encoding(&mut args); flag_engine(&mut args); flag_field_context_separator(&mut args); @@ -1170,6 +1171,49 @@ The argument accepts the same size suffixes as allowed in with the args.push(arg); } +fn flag_diff(args: &mut Vec) { + const SHORT: &str = "Output search & replace results in unidiff format."; + const LONG: &str = long!( + "\ +Enable printing search & replace results in unified diff (unidiff) format. +There doesn't seem to be a particular standard for this format other than: +https://www.gnu.org/software/diffutils/manual/html_node/Detailed-Unified.html +This implemenation is based on the output of the diff utility when run on +multiple files, i.e. with an extra line of information between each file +about what command produce the diff. + +When this flag is provided together with the --replace flag, ripgrep will +emit unidiff format representing the matches as removed lines in a file +and the replacements as added lines in the same file. + +The output can be viewed with a variety of diff pagers, or applied to files +with e.g. `git apply -p0 --unidiff-zero` or `patch -u -p0'. + +The diff output currently does not support additional context, +so the --context flag will have no effect either. + +The unidiff format is only supported for showing search results. It cannot +be used with other flags that emit other types of output, such as --files, +--files-with-matches, --files-without-match, --count or --count-matches. +ripgrep will report an error if any of the aforementioned flags are used in +concert with --diff. + +Other flags that control aspects of the standard output such as +--only-matching, --heading, --max-columns, etc., have no effect +when --diff is set. + +" + ); + let arg = RGArg::switch("diff").help(SHORT).long_help(LONG).conflicts(&[ + "count", + "count-matches", + "files", + "files-with-matches", + "files-without-match", + ]); + args.push(arg); +} + fn flag_encoding(args: &mut Vec) { const SHORT: &str = "Specify the text encoding of files to search."; const LONG: &str = long!( diff --git a/crates/core/args.rs b/crates/core/args.rs index 6623cbd54..19c9a7c7b 100644 --- a/crates/core/args.rs +++ b/crates/core/args.rs @@ -17,8 +17,8 @@ use grep::pcre2::{ RegexMatcherBuilder as PCRE2RegexMatcherBuilder, }; use grep::printer::{ - default_color_specs, ColorSpecs, JSONBuilder, Standard, StandardBuilder, - Stats, Summary, SummaryBuilder, SummaryKind, JSON, + default_color_specs, ColorSpecs, Diff, DiffBuilder, JSONBuilder, Standard, + StandardBuilder, Stats, Summary, SummaryBuilder, SummaryKind, JSON, }; use grep::regex::{ RegexMatcher as RustRegexMatcher, @@ -209,6 +209,9 @@ impl Args { OutputKind::JSON => { self.matches().printer_json(wtr).map(Printer::JSON) } + OutputKind::Diff => { + self.matches().printer_diff(wtr).map(Printer::Diff) + } } } } @@ -367,6 +370,8 @@ enum OutputKind { Summary, /// Emit match information in the JSON Lines format. JSON, + /// Emit search & replace information in unified diff format. + Diff, } /// The sort criteria, if present. @@ -751,6 +756,14 @@ impl ArgMatches { Ok(builder.build(wtr)) } + /// Build a Diff printer that writes results to the given writer. + fn printer_diff(&self, wtr: W) -> Result> { + let mut builder = DiffBuilder::new(); + //FIXME: `--diff` requires the `--replace` option. + builder.replacement(self.replacement().unwrap()); + Ok(builder.build(wtr)) + } + /// Build a Standard printer that writes results to the given writer. /// /// The given paths are used to configure aspects of the printer. @@ -1156,6 +1169,9 @@ impl ArgMatches { if self.output_kind() == OutputKind::JSON { return true; } + if self.output_kind() == OutputKind::Diff { + return true; + } // A few things can imply counting line numbers. In particular, we // generally want to show line numbers by default when printing to a @@ -1263,6 +1279,8 @@ impl ArgMatches { return OutputKind::Summary; } else if self.is_present("json") { return OutputKind::JSON; + } else if self.is_present("diff") { + return OutputKind::Diff; } let (count, count_matches) = self.counts(); diff --git a/crates/core/search.rs b/crates/core/search.rs index 6dc51cfe9..aa00d2406 100644 --- a/crates/core/search.rs +++ b/crates/core/search.rs @@ -8,7 +8,7 @@ use grep::cli; use grep::matcher::Matcher; #[cfg(feature = "pcre2")] use grep::pcre2::RegexMatcher as PCRE2RegexMatcher; -use grep::printer::{Standard, Stats, Summary, JSON}; +use grep::printer::{Diff, Standard, Stats, Summary, JSON}; use grep::regex::RegexMatcher as RustRegexMatcher; use grep::searcher::{BinaryDetection, Searcher}; use ignore::overrides::Override; @@ -227,6 +227,8 @@ pub enum Printer { Summary(Summary), /// A JSON printer, which emits results in the JSON Lines format. JSON(JSON), + /// A Diff printer, which represents search & replace in the unified diff format. + Diff(Diff), } impl Printer { @@ -237,7 +239,7 @@ impl Printer { ) -> io::Result<()> { match *self { Printer::JSON(_) => self.print_stats_json(total_duration, stats), - Printer::Standard(_) | Printer::Summary(_) => { + Printer::Standard(_) | Printer::Summary(_) | Printer::Diff(_) => { self.print_stats_human(total_duration, stats) } } @@ -303,6 +305,7 @@ impl Printer { Printer::Standard(ref mut p) => p.get_mut(), Printer::Summary(ref mut p) => p.get_mut(), Printer::JSON(ref mut p) => p.get_mut(), + Printer::Diff(ref mut p) => p.get_mut(), } } } @@ -502,6 +505,14 @@ fn search_path( stats: Some(sink.stats().clone()), }) } + Printer::Diff(ref mut p) => { + let mut sink = p.sink_with_path(&matcher, path); + searcher.search_path(&matcher, path, &mut sink)?; + Ok(SearchResult { + has_match: sink.has_match(), + stats: Some(sink.stats().clone()), + }) + } } } @@ -539,6 +550,14 @@ fn search_reader( stats: Some(sink.stats().clone()), }) } + Printer::Diff(ref mut p) => { + let mut sink = p.sink_with_path(&matcher, path); + searcher.search_reader(&matcher, &mut rdr, &mut sink)?; + Ok(SearchResult { + has_match: sink.has_match(), + stats: Some(sink.stats().clone()), + }) + } } } diff --git a/crates/printer/src/diff.rs b/crates/printer/src/diff.rs new file mode 100644 index 000000000..78959fa3f --- /dev/null +++ b/crates/printer/src/diff.rs @@ -0,0 +1,438 @@ +use std::io::{self, Write}; +use std::path::Path; +use std::sync::Arc; +use std::time::Instant; + +use grep_matcher::{Match, Matcher}; +use grep_searcher::{ + LineIter, LineStep, Searcher, Sink, SinkFinish, SinkMatch, +}; + +use crate::counter::CounterWriter; +use crate::stats::Stats; +use crate::util::{find_iter_at_in_context, Replacer}; +use crate::PrinterPath; + +/// The configuration for the Diff printer. +/// +/// This is manipulated by the DiffBuilder and then referenced by the actual +/// implementation. Once a printer is built, the configuration is frozen and +/// cannot changed. +#[derive(Debug, Clone)] +struct Config { + replacement: Arc>, +} + +impl Default for Config { + fn default() -> Config { + Config { replacement: Arc::new(vec![]) } + } +} + +/// A builder for a Diff lines printer. +/// +/// The builder permits configuring how the printer behaves. The Diff printer +/// requires a replacement to be meaningful, and the output is pretty much +/// non-configurable. +/// +/// Line numbers need to be present, but context lines are not dealt with at +/// the moment, as they require some kind of logic to buffer the output until +/// the header is known (since the amount of context lines affect its contents +/// and needs to be printed before the context lines). +/// +/// Once a `Diff` printer is built, its configuration cannot be changed. +#[derive(Clone, Debug)] +pub struct DiffBuilder { + config: Config, +} + +impl DiffBuilder { + /// Return a new builder for configuring the Diff printer. + pub fn new() -> DiffBuilder { + DiffBuilder { config: Config::default() } + } + + /// Create a Diff printer that writes results to the given writer. + pub fn build(&self, wtr: W) -> Diff { + Diff { + config: self.config.clone(), + wtr: CounterWriter::new(wtr), + matches: vec![], + } + } + + /// Set the bytes that will be used to replace each occurrence of a match + /// found. + /// + /// The replacement bytes given may include references to capturing groups, + /// which may either be in index form (e.g., `$2`) or can reference named + /// capturing groups if present in the original pattern (e.g., `$foo`). + /// + /// For documentation on the full format, please see the `Capture` trait's + /// `interpolate` method in the + /// [grep-printer](https://docs.rs/grep-printer) crate. + pub fn replacement(&mut self, replacement: Vec) -> &mut DiffBuilder { + self.config.replacement = Arc::new(replacement); + self + } +} + +/// The Diff printer, which emits search & replace info in unified diff format. +#[derive(Debug)] +pub struct Diff { + config: Config, + wtr: CounterWriter, + matches: Vec, +} + +impl Diff { + /// Return a Diff lines printer with a default configuration that writes + /// matches to the given writer. + pub fn new(wtr: W) -> Diff { + DiffBuilder::new().build(wtr) + } + + /// Return an implementation of `Sink` associated with a file path. + /// + /// When the printer is associated with a path, then it may, depending on + /// its configuration, print the path along with the matches found. + pub fn sink_with_path<'p, 's, M, P>( + &'s mut self, + matcher: M, + path: &'p P, + ) -> DiffSink<'p, 's, M, W> + where + M: Matcher, + P: ?Sized + AsRef, + { + DiffSink { + matcher, + replacer: Replacer::new(), + diff: self, + path: path.as_ref(), + start_time: Instant::now(), + match_count: 0, + b_line_offset: 0, + after_context_remaining: 0, + binary_byte_offset: None, + begin_printed: false, + stats: Stats::new(), + } + } + + /// Write the given line in the diff output as a removed line. + /// The line needs to include the (original) line terminator. + fn write_unidiff_removed(&mut self, line: &[u8]) -> io::Result<()> { + self.wtr.write(&[b'-'])?; + self.wtr.write(line)?; + Ok(()) + } + + /// Write the given line in the diff output as an added line. + /// The line needs to include the (original) terminator. + fn write_unidiff_added(&mut self, line: &[u8]) -> io::Result<()> { + self.wtr.write(&[b'+'])?; + self.wtr.write(line)?; + Ok(()) + } + + /// Write an empty line that separates the diff entries. + fn write_unidiff_hunk_header( + &mut self, + a_ln: u64, + a_count: u64, + b_ln: u64, + b_count: u64, + ) -> io::Result<()> { + self.wtr.write( + format!("@@ -{},{} +{},{} @@\n", a_ln, a_count, b_ln, b_count) + .as_bytes(), + )?; + Ok(()) + } + + /// Write an empty line that separates the diff entries: + /// ripgrep + /// --- path/to/a + /// +++ path/to/b + fn write_unidiff_header(&mut self, path: &[u8]) -> io::Result<()> { + self.wtr.write(b"ripgrep\n")?; + self.wtr.write(b"--- ")?; + self.wtr.write(path)?; + self.wtr.write(&[b'\n'])?; + self.wtr.write(b"+++ ")?; + self.wtr.write(path)?; + self.wtr.write(&[b'\n'])?; + Ok(()) + } +} + +impl Diff { + /// Returns true if and only if this printer has written at least one byte + /// to the underlying writer during any of the previous searches. + pub fn has_written(&self) -> bool { + self.wtr.total_count() > 0 + } + + /// Return a mutable reference to the underlying writer. + pub fn get_mut(&mut self) -> &mut W { + self.wtr.get_mut() + } + + /// Consume this printer and return back ownership of the underlying + /// writer. + pub fn into_inner(self) -> W { + self.wtr.into_inner() + } +} + +/// An implementation of `Sink` associated with a matcher and an optional file +/// path for the Diff printer. +/// +/// This type is generic over a few type parameters: +/// +/// * `'p` refers to the lifetime of the file path, if one is provided. When +/// no file path is given, then this is `'static`. +/// * `'s` refers to the lifetime of the +/// [`Diff`](struct.Diff.html) +/// printer that this type borrows. +/// * `M` refers to the type of matcher used by +/// `grep_searcher::Searcher` that is reporting results to this sink. +/// * `W` refers to the underlying writer that this printer is writing its +/// output to. +#[derive(Debug)] +pub struct DiffSink<'p, 's, M: Matcher, W> { + matcher: M, + replacer: Replacer, + diff: &'s mut Diff, + path: &'p Path, + start_time: Instant, + match_count: u64, + b_line_offset: i64, + after_context_remaining: u64, + binary_byte_offset: Option, + begin_printed: bool, + stats: Stats, +} + +impl<'p, 's, M: Matcher, W: io::Write> DiffSink<'p, 's, M, W> { + /// Returns true if and only if this printer received a match in the + /// previous search. + /// + /// This is unaffected by the result of searches before the previous + /// search. + pub fn has_match(&self) -> bool { + self.match_count > 0 + } + + /// Return the total number of matches reported to this sink. + /// + /// This corresponds to the number of times `Sink::matched` is called. + pub fn match_count(&self) -> u64 { + self.match_count + } + + /// If binary data was found in the previous search, this returns the + /// offset at which the binary data was first detected. + /// + /// The offset returned is an absolute offset relative to the entire + /// set of bytes searched. + /// + /// This is unaffected by the result of searches before the previous + /// search. e.g., If the search prior to the previous search found binary + /// data but the previous search found no binary data, then this will + /// return `None`. + pub fn binary_byte_offset(&self) -> Option { + self.binary_byte_offset + } + + /// Return a reference to the stats produced by the printer for all + /// searches executed on this sink. + pub fn stats(&self) -> &Stats { + &self.stats + } + + /// Execute the matcher over the given bytes and record the match + /// locations if the current configuration demands match granularity. + fn record_matches( + &mut self, + searcher: &Searcher, + bytes: &[u8], + range: std::ops::Range, + ) -> io::Result<()> { + self.diff.matches.clear(); + // If printing requires knowing the location of each individual match, + // then compute and stored those right now for use later. While this + // adds an extra copy for storing the matches, we do amortize the + // allocation for it and this greatly simplifies the printing logic to + // the extent that it's easy to ensure that we never do more than + // one search to find the matches. + let matches = &mut self.diff.matches; + find_iter_at_in_context( + searcher, + &self.matcher, + bytes, + range.clone(), + |m| { + let (s, e) = (m.start() - range.start, m.end() - range.start); + matches.push(Match::new(s, e)); + true + }, + )?; + // Don't report empty matches appearing at the end of the bytes. + if !matches.is_empty() + && matches.last().unwrap().is_empty() + && matches.last().unwrap().start() >= bytes.len() + { + matches.pop().unwrap(); + } + Ok(()) + } + + /// If the configuration specifies a replacement, then this executes the + /// replacement, lazily allocating memory if necessary. + /// + /// To access the result of a replacement, use `replacer.replacement()`. + fn replace( + &mut self, + searcher: &Searcher, + bytes: &[u8], + range: std::ops::Range, + ) -> io::Result<()> { + self.replacer.clear(); + let replacement = (*self.diff.config.replacement).as_ref(); + self.replacer.replace_all( + searcher, + &self.matcher, + bytes, + range, + replacement, + )?; + Ok(()) + } + + /// Write the header information which contains the path of the + /// source and destination file of the diff. + fn write_header(&mut self) -> io::Result<()> { + if self.begin_printed { + return Ok(()); + } + let ppath = PrinterPath::with_separator(self.path, None); + self.diff.write_unidiff_header(&ppath.as_bytes())?; + self.begin_printed = true; + Ok(()) + } +} + +impl<'p, 's, M: Matcher, W: io::Write> Sink for DiffSink<'p, 's, M, W> { + type Error = io::Error; + + fn matched( + &mut self, + searcher: &Searcher, + mat: &SinkMatch<'_>, + ) -> Result { + self.write_header()?; + + self.match_count += 1; + // When we've exceeded our match count, then the remaining context + // lines should not be reset, but instead, decremented. This avoids a + // bug where we display more matches than a configured limit. The main + // idea here is that 'matched' might be called again while printing + // an after-context line. In that case, we should treat this as a + // contextual line rather than a matching line for the purposes of + // termination. + self.after_context_remaining = searcher.after_context() as u64; + + self.record_matches( + searcher, + mat.buffer(), + mat.bytes_range_in_buffer(), + )?; + self.replace(searcher, mat.buffer(), mat.bytes_range_in_buffer())?; + self.stats.add_matches(self.diff.matches.len() as u64); + self.stats.add_matched_lines(mat.lines().count() as u64); + + // Entire search (a) and replacement (b) contents. + let a_bytes = mat.bytes(); + let (b_bytes, _) = self.replacer.replacement().unwrap(); + + // To get the correct number of lines removed added without any + // assumptions about single or multi line search/replace, just + // loop over lines here and count them. + let a_line_number = mat.line_number().unwrap(); + let b_line_number = + (self.b_line_offset + (a_line_number as i64)) as u64; + let line_term = searcher.line_terminator().as_byte(); + let mut a_stepper = LineStep::new(line_term, 0, a_bytes.len()); + let mut b_stepper = LineStep::new(line_term, 0, b_bytes.len()); + let mut a_count: u64 = 0; + let mut b_count: u64 = 0; + while let Some(_) = a_stepper.next(a_bytes) { + a_count += 1; + } + while let Some(_) = b_stepper.next(b_bytes) { + b_count += 1; + } + + // When a replacement has different line count, the offset for later + // replacements is affected as the destination line count is relative + // to the already inserted new lines. + self.b_line_offset += (b_count as i64) - (a_count as i64); + + // header of a replacement contains the line number offset in + // the source (a) and destination (b) files, as well as the + // number of lines removed (a_count) / added (b_count). + self.diff.write_unidiff_hunk_header( + a_line_number, + a_count, + b_line_number, + b_count, + )?; + + // When printing the actual lines, a -/+ sign is prefixed for + // each line, so we need to output our match/replace chunks line + // by line and insert the proper prefix. + let a_lines = LineIter::new(line_term, a_bytes); + for line in a_lines { + self.diff.write_unidiff_removed(line)?; + } + let b_lines = LineIter::new(line_term, b_bytes); + for line in b_lines { + self.diff.write_unidiff_added(line)?; + } + + Ok(true) + } + + fn begin(&mut self, _searcher: &Searcher) -> Result { + self.diff.wtr.reset_count(); + self.start_time = Instant::now(); + self.match_count = 0; + self.b_line_offset = 0; + self.after_context_remaining = 0; + self.binary_byte_offset = None; + Ok(true) + } + + fn finish( + &mut self, + _searcher: &Searcher, + finish: &SinkFinish, + ) -> Result<(), io::Error> { + if !self.begin_printed { + return Ok(()); + } + + self.binary_byte_offset = finish.binary_byte_offset(); + self.stats.add_elapsed(self.start_time.elapsed()); + self.stats.add_searches(1); + if self.match_count > 0 { + self.stats.add_searches_with_match(1); + } + self.stats.add_bytes_searched(finish.byte_count()); + self.stats.add_bytes_printed(self.diff.wtr.count()); + + Ok(()) + } +} diff --git a/crates/printer/src/lib.rs b/crates/printer/src/lib.rs index 29e0a45b0..f65993c1d 100644 --- a/crates/printer/src/lib.rs +++ b/crates/printer/src/lib.rs @@ -67,6 +67,7 @@ fn example() -> Result<(), Box> { pub use crate::color::{ default_color_specs, ColorError, ColorSpecs, UserColorSpec, }; +pub use crate::diff::{Diff, DiffBuilder, DiffSink}; #[cfg(feature = "serde1")] pub use crate::json::{JSONBuilder, JSONSink, JSON}; pub use crate::standard::{Standard, StandardBuilder, StandardSink}; @@ -90,6 +91,7 @@ mod macros; mod color; mod counter; +mod diff; #[cfg(feature = "serde1")] mod json; #[cfg(feature = "serde1")] diff --git a/crates/printer/src/util.rs b/crates/printer/src/util.rs index 434deec7c..6875ee8b6 100644 --- a/crates/printer/src/util.rs +++ b/crates/printer/src/util.rs @@ -63,9 +63,13 @@ impl Replacer { // See the giant comment in 'find_iter_at_in_context' below for why we // do this dance. let is_multi_line = searcher.multi_line_with_matcher(&matcher); + let mut extension_len = 0; if is_multi_line { if subject[range.end..].len() >= MAX_LOOK_AHEAD { + extension_len = MAX_LOOK_AHEAD; subject = &subject[..range.end + MAX_LOOK_AHEAD]; + } else { + extension_len = subject.len() - range.end; } } else { // When searching a single line, we should remove the line @@ -102,6 +106,18 @@ impl Replacer { }, ) .map_err(io::Error::error_message)?; + + if is_multi_line { + // Remove the subject buffer beyond the range.end. + // NOTE: could this be a bug with the current replace functionality? + // As an example, running `rg -U '\.\nA' --replace 'BB' -C 2` in this repo produces + // spurious extra lines of output in the replacement, as the extra bytes where + // not removed. This seems to fix that, but it's not pretty and might be wrong. + dst.truncate(dst.len() - extension_len); + } else { + // Restore the line terminator. + dst.extend(searcher.line_terminator().as_bytes()); + } } Ok(()) } From 787df77a8772f69a672a17118c1fa8f889cf097a Mon Sep 17 00:00:00 2001 From: sonohgong <99127857+sonohgong@users.noreply.github.com> Date: Thu, 24 Feb 2022 17:46:55 +0100 Subject: [PATCH 2/2] limit amount of matches during replacement Skip matches beyond the original range when replacing a buffer with captures. --- crates/matcher/src/lib.rs | 30 ++++++++++++++++++++++++++++++ crates/printer/src/util.rs | 18 ++++-------------- 2 files changed, 34 insertions(+), 14 deletions(-) diff --git a/crates/matcher/src/lib.rs b/crates/matcher/src/lib.rs index 5b43b0d85..76fda4948 100644 --- a/crates/matcher/src/lib.rs +++ b/crates/matcher/src/lib.rs @@ -38,9 +38,11 @@ implementations. #![deny(missing_docs)] +use std::cmp::min; use std::fmt; use std::io; use std::ops; +use std::ops::Range; use std::u64; use crate::interpolate::interpolate; @@ -942,6 +944,34 @@ pub trait Matcher { Ok(()) } + /// Same as replace_with_captures_at, but limits the replacements to the + /// given range. Matches beyond the end of the range are skipped. + fn replace_with_captures_in_range( + &self, + haystack: &[u8], + range: &Range, + caps: &mut Self::Captures, + dst: &mut Vec, + mut append: F, + ) -> Result<(), Self::Error> + where + F: FnMut(&Self::Captures, &mut Vec) -> bool, + { + let mut last_match = range.start; + self.captures_iter_at(haystack, range.start, caps, |caps| { + let m = caps.get(0).unwrap(); + if m.start >= range.end { + return false; + } + dst.extend(&haystack[last_match..m.start]); + last_match = m.end; + append(caps, dst) + })?; + let end = min(haystack.len(), range.end); + dst.extend(&haystack[last_match..end]); + Ok(()) + } + /// Returns true if and only if the matcher matches the given haystack. /// /// By default, this method is implemented by calling `shortest_match`. diff --git a/crates/printer/src/util.rs b/crates/printer/src/util.rs index 6875ee8b6..76eeb28ac 100644 --- a/crates/printer/src/util.rs +++ b/crates/printer/src/util.rs @@ -1,4 +1,5 @@ use std::borrow::Cow; +use std::cmp::max; use std::fmt; use std::io; use std::path::Path; @@ -63,13 +64,9 @@ impl Replacer { // See the giant comment in 'find_iter_at_in_context' below for why we // do this dance. let is_multi_line = searcher.multi_line_with_matcher(&matcher); - let mut extension_len = 0; if is_multi_line { if subject[range.end..].len() >= MAX_LOOK_AHEAD { - extension_len = MAX_LOOK_AHEAD; subject = &subject[..range.end + MAX_LOOK_AHEAD]; - } else { - extension_len = subject.len() - range.end; } } else { // When searching a single line, we should remove the line @@ -87,9 +84,9 @@ impl Replacer { matches.clear(); matcher - .replace_with_captures_at( + .replace_with_captures_in_range( subject, - range.start, + &range, caps, dst, |caps, dst| { @@ -107,14 +104,7 @@ impl Replacer { ) .map_err(io::Error::error_message)?; - if is_multi_line { - // Remove the subject buffer beyond the range.end. - // NOTE: could this be a bug with the current replace functionality? - // As an example, running `rg -U '\.\nA' --replace 'BB' -C 2` in this repo produces - // spurious extra lines of output in the replacement, as the extra bytes where - // not removed. This seems to fix that, but it's not pretty and might be wrong. - dst.truncate(dst.len() - extension_len); - } else { + if !is_multi_line { // Restore the line terminator. dst.extend(searcher.line_terminator().as_bytes()); }