From a7005f3bd7009fcda927fa50dedc90af50a237c8 Mon Sep 17 00:00:00 2001
From: sonohgong <99127857+sonohgong@users.noreply.github.com>
Date: Tue, 22 Feb 2022 20:07:07 +0100
Subject: [PATCH 1/2] attempt to hack together a diff output

---
 crates/core/app.rs         |  44 ++++
 crates/core/args.rs        |  22 +-
 crates/core/search.rs      |  23 +-
 crates/printer/src/diff.rs | 438 +++++++++++++++++++++++++++++++++++++
 crates/printer/src/lib.rs  |   2 +
 crates/printer/src/util.rs |  16 ++
 6 files changed, 541 insertions(+), 4 deletions(-)
 create mode 100644 crates/printer/src/diff.rs
diff --git a/crates/core/app.rs b/crates/core/app.rs
index 215a2b74f..aef8b4962 100644
--- a/crates/core/app.rs
+++ b/crates/core/app.rs
@@ -566,6 +566,7 @@ pub fn all_args_and_flags() -> Vec<RGArg> {
     flag_crlf(&mut args);
     flag_debug(&mut args);
     flag_dfa_size_limit(&mut args);
+    flag_diff(&mut args);
     flag_encoding(&mut args);
     flag_engine(&mut args);
     flag_field_context_separator(&mut args);
@@ -1170,6 +1171,49 @@ The argument accepts the same size suffixes as allowed in with the
     args.push(arg);
 }
 
+fn flag_diff(args: &mut Vec<RGArg>) {
+    const SHORT: &str = "Output search & replace results in unidiff format.";
+    const LONG: &str = long!(
+        "\
+Enable printing search & replace results in unified diff (unidiff) format.
+There doesn't seem to be a particular standard for this format other than:
+https://www.gnu.org/software/diffutils/manual/html_node/Detailed-Unified.html
+This implemenation is based on the output of the diff utility when run on
+multiple files, i.e. with an extra line of information between each file
+about what command produce the diff.
+
+When this flag is provided together with the --replace flag, ripgrep will
+emit unidiff format representing the matches as removed lines in a file
+and the replacements as added lines in the same file.
+
+The output can be viewed with a variety of diff pagers, or applied to files
+with e.g. `git apply -p0 --unidiff-zero` or `patch -u -p0'.
+
+The diff output currently does not support additional context,
+so the --context flag will have no effect either.
+
+The unidiff format is only supported for showing search results. It cannot
+be used with other flags that emit other types of output, such as --files,
+--files-with-matches, --files-without-match, --count or --count-matches.
+ripgrep will report an error if any of the aforementioned flags are used in
+concert with --diff.
+
+Other flags that control aspects of the standard output such as
+--only-matching, --heading, --max-columns, etc., have no effect
+when --diff is set.
+
+"
+    );
+    let arg = RGArg::switch("diff").help(SHORT).long_help(LONG).conflicts(&[
+        "count",
+        "count-matches",
+        "files",
+        "files-with-matches",
+        "files-without-match",
+    ]);
+    args.push(arg);
+}
+
 fn flag_encoding(args: &mut Vec<RGArg>) {
     const SHORT: &str = "Specify the text encoding of files to search.";
     const LONG: &str = long!(
diff --git a/crates/core/args.rs b/crates/core/args.rs
index 6623cbd54..19c9a7c7b 100644
--- a/crates/core/args.rs
+++ b/crates/core/args.rs
@@ -17,8 +17,8 @@ use grep::pcre2::{
     RegexMatcherBuilder as PCRE2RegexMatcherBuilder,
 };
 use grep::printer::{
-    default_color_specs, ColorSpecs, JSONBuilder, Standard, StandardBuilder,
-    Stats, Summary, SummaryBuilder, SummaryKind, JSON,
+    default_color_specs, ColorSpecs, Diff, DiffBuilder, JSONBuilder, Standard,
+    StandardBuilder, Stats, Summary, SummaryBuilder, SummaryKind, JSON,
 };
 use grep::regex::{
     RegexMatcher as RustRegexMatcher,
@@ -209,6 +209,9 @@ impl Args {
             OutputKind::JSON => {
                 self.matches().printer_json(wtr).map(Printer::JSON)
             }
+            OutputKind::Diff => {
+                self.matches().printer_diff(wtr).map(Printer::Diff)
+            }
         }
     }
 }
@@ -367,6 +370,8 @@ enum OutputKind {
     Summary,
     /// Emit match information in the JSON Lines format.
     JSON,
+    /// Emit search & replace information in unified diff format.
+    Diff,
 }
 
 /// The sort criteria, if present.
@@ -751,6 +756,14 @@ impl ArgMatches {
         Ok(builder.build(wtr))
     }
 
+    /// Build a Diff printer that writes results to the given writer.
+    fn printer_diff<W: io::Write>(&self, wtr: W) -> Result<Diff<W>> {
+        let mut builder = DiffBuilder::new();
+        //FIXME: `--diff` requires the `--replace` option.
+        builder.replacement(self.replacement().unwrap());
+        Ok(builder.build(wtr))
+    }
+
     /// Build a Standard printer that writes results to the given writer.
     ///
     /// The given paths are used to configure aspects of the printer.
@@ -1156,6 +1169,9 @@ impl ArgMatches {
         if self.output_kind() == OutputKind::JSON {
             return true;
         }
+        if self.output_kind() == OutputKind::Diff {
+            return true;
+        }
 
         // A few things can imply counting line numbers. In particular, we
         // generally want to show line numbers by default when printing to a
@@ -1263,6 +1279,8 @@ impl ArgMatches {
             return OutputKind::Summary;
         } else if self.is_present("json") {
             return OutputKind::JSON;
+        } else if self.is_present("diff") {
+            return OutputKind::Diff;
         }
 
         let (count, count_matches) = self.counts();
diff --git a/crates/core/search.rs b/crates/core/search.rs
index 6dc51cfe9..aa00d2406 100644
--- a/crates/core/search.rs
+++ b/crates/core/search.rs
@@ -8,7 +8,7 @@ use grep::cli;
 use grep::matcher::Matcher;
 #[cfg(feature = "pcre2")]
 use grep::pcre2::RegexMatcher as PCRE2RegexMatcher;
-use grep::printer::{Standard, Stats, Summary, JSON};
+use grep::printer::{Diff, Standard, Stats, Summary, JSON};
 use grep::regex::RegexMatcher as RustRegexMatcher;
 use grep::searcher::{BinaryDetection, Searcher};
 use ignore::overrides::Override;
@@ -227,6 +227,8 @@ pub enum Printer<W> {
     Summary(Summary<W>),
     /// A JSON printer, which emits results in the JSON Lines format.
     JSON(JSON<W>),
+    /// A Diff printer, which represents search & replace in the unified diff format.
+    Diff(Diff<W>),
 }
 
 impl<W: WriteColor> Printer<W> {
@@ -237,7 +239,7 @@ impl<W: WriteColor> Printer<W> {
     ) -> io::Result<()> {
         match *self {
             Printer::JSON(_) => self.print_stats_json(total_duration, stats),
-            Printer::Standard(_) | Printer::Summary(_) => {
+            Printer::Standard(_) | Printer::Summary(_) | Printer::Diff(_) => {
                 self.print_stats_human(total_duration, stats)
             }
         }
@@ -303,6 +305,7 @@ impl<W: WriteColor> Printer<W> {
             Printer::Standard(ref mut p) => p.get_mut(),
             Printer::Summary(ref mut p) => p.get_mut(),
             Printer::JSON(ref mut p) => p.get_mut(),
+            Printer::Diff(ref mut p) => p.get_mut(),
         }
     }
 }
@@ -502,6 +505,14 @@ fn search_path<M: Matcher, W: WriteColor>(
                 stats: Some(sink.stats().clone()),
             })
         }
+        Printer::Diff(ref mut p) => {
+            let mut sink = p.sink_with_path(&matcher, path);
+            searcher.search_path(&matcher, path, &mut sink)?;
+            Ok(SearchResult {
+                has_match: sink.has_match(),
+                stats: Some(sink.stats().clone()),
+            })
+        }
     }
 }
 
@@ -539,6 +550,14 @@ fn search_reader<M: Matcher, R: io::Read, W: WriteColor>(
                 stats: Some(sink.stats().clone()),
             })
         }
+        Printer::Diff(ref mut p) => {
+            let mut sink = p.sink_with_path(&matcher, path);
+            searcher.search_reader(&matcher, &mut rdr, &mut sink)?;
+            Ok(SearchResult {
+                has_match: sink.has_match(),
+                stats: Some(sink.stats().clone()),
+            })
+        }
     }
 }
 
diff --git a/crates/printer/src/diff.rs b/crates/printer/src/diff.rs
new file mode 100644
index 000000000..78959fa3f
--- /dev/null
+++ b/crates/printer/src/diff.rs
@@ -0,0 +1,438 @@
+use std::io::{self, Write};
+use std::path::Path;
+use std::sync::Arc;
+use std::time::Instant;
+
+use grep_matcher::{Match, Matcher};
+use grep_searcher::{
+    LineIter, LineStep, Searcher, Sink, SinkFinish, SinkMatch,
+};
+
+use crate::counter::CounterWriter;
+use crate::stats::Stats;
+use crate::util::{find_iter_at_in_context, Replacer};
+use crate::PrinterPath;
+
+/// The configuration for the Diff printer.
+///
+/// This is manipulated by the DiffBuilder and then referenced by the actual
+/// implementation. Once a printer is built, the configuration is frozen and
+/// cannot changed.
+#[derive(Debug, Clone)]
+struct Config {
+    replacement: Arc<Vec<u8>>,
+}
+
+impl Default for Config {
+    fn default() -> Config {
+        Config { replacement: Arc::new(vec![]) }
+    }
+}
+
+/// A builder for a Diff lines printer.
+///
+/// The builder permits configuring how the printer behaves. The Diff printer
+/// requires a replacement to be meaningful, and the output is pretty much
+/// non-configurable.
+///
+/// Line numbers need to be present, but context lines are not dealt with at
+/// the moment, as they require some kind of logic to buffer the output until
+/// the header is known (since the amount of context lines affect its contents
+/// and needs to be printed before the context lines).
+///
+/// Once a `Diff` printer is built, its configuration cannot be changed.
+#[derive(Clone, Debug)]
+pub struct DiffBuilder {
+    config: Config,
+}
+
+impl DiffBuilder {
+    /// Return a new builder for configuring the Diff printer.
+    pub fn new() -> DiffBuilder {
+        DiffBuilder { config: Config::default() }
+    }
+
+    /// Create a Diff printer that writes results to the given writer.
+    pub fn build<W: io::Write>(&self, wtr: W) -> Diff<W> {
+        Diff {
+            config: self.config.clone(),
+            wtr: CounterWriter::new(wtr),
+            matches: vec![],
+        }
+    }
+
+    /// Set the bytes that will be used to replace each occurrence of a match
+    /// found.
+    ///
+    /// The replacement bytes given may include references to capturing groups,
+    /// which may either be in index form (e.g., `$2`) or can reference named
+    /// capturing groups if present in the original pattern (e.g., `$foo`).
+    ///
+    /// For documentation on the full format, please see the `Capture` trait's
+    /// `interpolate` method in the
+    /// [grep-printer](https://docs.rs/grep-printer) crate.
+    pub fn replacement(&mut self, replacement: Vec<u8>) -> &mut DiffBuilder {
+        self.config.replacement = Arc::new(replacement);
+        self
+    }
+}
+
+/// The Diff printer, which emits search & replace info in unified diff format.
+#[derive(Debug)]
+pub struct Diff<W> {
+    config: Config,
+    wtr: CounterWriter<W>,
+    matches: Vec<Match>,
+}
+
+impl<W: io::Write> Diff<W> {
+    /// Return a Diff lines printer with a default configuration that writes
+    /// matches to the given writer.
+    pub fn new(wtr: W) -> Diff<W> {
+        DiffBuilder::new().build(wtr)
+    }
+
+    /// Return an implementation of `Sink` associated with a file path.
+    ///
+    /// When the printer is associated with a path, then it may, depending on
+    /// its configuration, print the path along with the matches found.
+    pub fn sink_with_path<'p, 's, M, P>(
+        &'s mut self,
+        matcher: M,
+        path: &'p P,
+    ) -> DiffSink<'p, 's, M, W>
+    where
+        M: Matcher,
+        P: ?Sized + AsRef<Path>,
+    {
+        DiffSink {
+            matcher,
+            replacer: Replacer::new(),
+            diff: self,
+            path: path.as_ref(),
+            start_time: Instant::now(),
+            match_count: 0,
+            b_line_offset: 0,
+            after_context_remaining: 0,
+            binary_byte_offset: None,
+            begin_printed: false,
+            stats: Stats::new(),
+        }
+    }
+
+    /// Write the given line in the diff output as a removed line.
+    /// The line needs to include the (original) line terminator.
+    fn write_unidiff_removed(&mut self, line: &[u8]) -> io::Result<()> {
+        self.wtr.write(&[b'-'])?;
+        self.wtr.write(line)?;
+        Ok(())
+    }
+
+    /// Write the given line in the diff output as an added line.
+    /// The line needs to include the (original) terminator.
+    fn write_unidiff_added(&mut self, line: &[u8]) -> io::Result<()> {
+        self.wtr.write(&[b'+'])?;
+        self.wtr.write(line)?;
+        Ok(())
+    }
+
+    /// Write an empty line that separates the diff entries.
+    fn write_unidiff_hunk_header(
+        &mut self,
+        a_ln: u64,
+        a_count: u64,
+        b_ln: u64,
+        b_count: u64,
+    ) -> io::Result<()> {
+        self.wtr.write(
+            format!("@@ -{},{} +{},{} @@\n", a_ln, a_count, b_ln, b_count)
+                .as_bytes(),
+        )?;
+        Ok(())
+    }
+
+    /// Write an empty line that separates the diff entries:
+    ///   ripgrep
+    ///   --- path/to/a
+    ///   +++ path/to/b
+    fn write_unidiff_header(&mut self, path: &[u8]) -> io::Result<()> {
+        self.wtr.write(b"ripgrep\n")?;
+        self.wtr.write(b"--- ")?;
+        self.wtr.write(path)?;
+        self.wtr.write(&[b'\n'])?;
+        self.wtr.write(b"+++ ")?;
+        self.wtr.write(path)?;
+        self.wtr.write(&[b'\n'])?;
+        Ok(())
+    }
+}
+
+impl<W> Diff<W> {
+    /// Returns true if and only if this printer has written at least one byte
+    /// to the underlying writer during any of the previous searches.
+    pub fn has_written(&self) -> bool {
+        self.wtr.total_count() > 0
+    }
+
+    /// Return a mutable reference to the underlying writer.
+    pub fn get_mut(&mut self) -> &mut W {
+        self.wtr.get_mut()
+    }
+
+    /// Consume this printer and return back ownership of the underlying
+    /// writer.
+    pub fn into_inner(self) -> W {
+        self.wtr.into_inner()
+    }
+}
+
+/// An implementation of `Sink` associated with a matcher and an optional file
+/// path for the Diff printer.
+///
+/// This type is generic over a few type parameters:
+///
+/// * `'p` refers to the lifetime of the file path, if one is provided. When
+///   no file path is given, then this is `'static`.
+/// * `'s` refers to the lifetime of the
+///   [`Diff`](struct.Diff.html)
+///   printer that this type borrows.
+/// * `M` refers to the type of matcher used by
+///   `grep_searcher::Searcher` that is reporting results to this sink.
+/// * `W` refers to the underlying writer that this printer is writing its
+///   output to.
+#[derive(Debug)]
+pub struct DiffSink<'p, 's, M: Matcher, W> {
+    matcher: M,
+    replacer: Replacer<M>,
+    diff: &'s mut Diff<W>,
+    path: &'p Path,
+    start_time: Instant,
+    match_count: u64,
+    b_line_offset: i64,
+    after_context_remaining: u64,
+    binary_byte_offset: Option<u64>,
+    begin_printed: bool,
+    stats: Stats,
+}
+
+impl<'p, 's, M: Matcher, W: io::Write> DiffSink<'p, 's, M, W> {
+    /// Returns true if and only if this printer received a match in the
+    /// previous search.
+    ///
+    /// This is unaffected by the result of searches before the previous
+    /// search.
+    pub fn has_match(&self) -> bool {
+        self.match_count > 0
+    }
+
+    /// Return the total number of matches reported to this sink.
+    ///
+    /// This corresponds to the number of times `Sink::matched` is called.
+    pub fn match_count(&self) -> u64 {
+        self.match_count
+    }
+
+    /// If binary data was found in the previous search, this returns the
+    /// offset at which the binary data was first detected.
+    ///
+    /// The offset returned is an absolute offset relative to the entire
+    /// set of bytes searched.
+    ///
+    /// This is unaffected by the result of searches before the previous
+    /// search. e.g., If the search prior to the previous search found binary
+    /// data but the previous search found no binary data, then this will
+    /// return `None`.
+    pub fn binary_byte_offset(&self) -> Option<u64> {
+        self.binary_byte_offset
+    }
+
+    /// Return a reference to the stats produced by the printer for all
+    /// searches executed on this sink.
+    pub fn stats(&self) -> &Stats {
+        &self.stats
+    }
+
+    /// Execute the matcher over the given bytes and record the match
+    /// locations if the current configuration demands match granularity.
+    fn record_matches(
+        &mut self,
+        searcher: &Searcher,
+        bytes: &[u8],
+        range: std::ops::Range<usize>,
+    ) -> io::Result<()> {
+        self.diff.matches.clear();
+        // If printing requires knowing the location of each individual match,
+        // then compute and stored those right now for use later. While this
+        // adds an extra copy for storing the matches, we do amortize the
+        // allocation for it and this greatly simplifies the printing logic to
+        // the extent that it's easy to ensure that we never do more than
+        // one search to find the matches.
+        let matches = &mut self.diff.matches;
+        find_iter_at_in_context(
+            searcher,
+            &self.matcher,
+            bytes,
+            range.clone(),
+            |m| {
+                let (s, e) = (m.start() - range.start, m.end() - range.start);
+                matches.push(Match::new(s, e));
+                true
+            },
+        )?;
+        // Don't report empty matches appearing at the end of the bytes.
+        if !matches.is_empty()
+            && matches.last().unwrap().is_empty()
+            && matches.last().unwrap().start() >= bytes.len()
+        {
+            matches.pop().unwrap();
+        }
+        Ok(())
+    }
+
+    /// If the configuration specifies a replacement, then this executes the
+    /// replacement, lazily allocating memory if necessary.
+    ///
+    /// To access the result of a replacement, use `replacer.replacement()`.
+    fn replace(
+        &mut self,
+        searcher: &Searcher,
+        bytes: &[u8],
+        range: std::ops::Range<usize>,
+    ) -> io::Result<()> {
+        self.replacer.clear();
+        let replacement = (*self.diff.config.replacement).as_ref();
+        self.replacer.replace_all(
+            searcher,
+            &self.matcher,
+            bytes,
+            range,
+            replacement,
+        )?;
+        Ok(())
+    }
+
+    /// Write the header information which contains the path of the
+    /// source and destination file of the diff.
+    fn write_header(&mut self) -> io::Result<()> {
+        if self.begin_printed {
+            return Ok(());
+        }
+        let ppath = PrinterPath::with_separator(self.path, None);
+        self.diff.write_unidiff_header(&ppath.as_bytes())?;
+        self.begin_printed = true;
+        Ok(())
+    }
+}
+
+impl<'p, 's, M: Matcher, W: io::Write> Sink for DiffSink<'p, 's, M, W> {
+    type Error = io::Error;
+
+    fn matched(
+        &mut self,
+        searcher: &Searcher,
+        mat: &SinkMatch<'_>,
+    ) -> Result<bool, io::Error> {
+        self.write_header()?;
+
+        self.match_count += 1;
+        // When we've exceeded our match count, then the remaining context
+        // lines should not be reset, but instead, decremented. This avoids a
+        // bug where we display more matches than a configured limit. The main
+        // idea here is that 'matched' might be called again while printing
+        // an after-context line. In that case, we should treat this as a
+        // contextual line rather than a matching line for the purposes of
+        // termination.
+        self.after_context_remaining = searcher.after_context() as u64;
+
+        self.record_matches(
+            searcher,
+            mat.buffer(),
+            mat.bytes_range_in_buffer(),
+        )?;
+        self.replace(searcher, mat.buffer(), mat.bytes_range_in_buffer())?;
+        self.stats.add_matches(self.diff.matches.len() as u64);
+        self.stats.add_matched_lines(mat.lines().count() as u64);
+
+        // Entire search (a) and replacement (b) contents.
+        let a_bytes = mat.bytes();
+        let (b_bytes, _) = self.replacer.replacement().unwrap();
+
+        // To get the correct number of lines removed added without any
+        // assumptions about single or multi line search/replace, just
+        // loop over lines here and count them.
+        let a_line_number = mat.line_number().unwrap();
+        let b_line_number =
+            (self.b_line_offset + (a_line_number as i64)) as u64;
+        let line_term = searcher.line_terminator().as_byte();
+        let mut a_stepper = LineStep::new(line_term, 0, a_bytes.len());
+        let mut b_stepper = LineStep::new(line_term, 0, b_bytes.len());
+        let mut a_count: u64 = 0;
+        let mut b_count: u64 = 0;
+        while let Some(_) = a_stepper.next(a_bytes) {
+            a_count += 1;
+        }
+        while let Some(_) = b_stepper.next(b_bytes) {
+            b_count += 1;
+        }
+
+        // When a replacement has different line count, the offset for later
+        // replacements is affected as the destination line count is relative
+        // to the already inserted new lines.
+        self.b_line_offset += (b_count as i64) - (a_count as i64);
+
+        // header of a replacement contains the line number offset in
+        // the source (a) and destination (b) files, as well as the
+        // number of lines removed (a_count) / added (b_count).
+        self.diff.write_unidiff_hunk_header(
+            a_line_number,
+            a_count,
+            b_line_number,
+            b_count,
+        )?;
+
+        // When printing the actual lines, a -/+ sign is prefixed for
+        // each line, so we need to output our match/replace chunks line
+        // by line and insert the proper prefix.
+        let a_lines = LineIter::new(line_term, a_bytes);
+        for line in a_lines {
+            self.diff.write_unidiff_removed(line)?;
+        }
+        let b_lines = LineIter::new(line_term, b_bytes);
+        for line in b_lines {
+            self.diff.write_unidiff_added(line)?;
+        }
+
+        Ok(true)
+    }
+
+    fn begin(&mut self, _searcher: &Searcher) -> Result<bool, io::Error> {
+        self.diff.wtr.reset_count();
+        self.start_time = Instant::now();
+        self.match_count = 0;
+        self.b_line_offset = 0;
+        self.after_context_remaining = 0;
+        self.binary_byte_offset = None;
+        Ok(true)
+    }
+
+    fn finish(
+        &mut self,
+        _searcher: &Searcher,
+        finish: &SinkFinish,
+    ) -> Result<(), io::Error> {
+        if !self.begin_printed {
+            return Ok(());
+        }
+
+        self.binary_byte_offset = finish.binary_byte_offset();
+        self.stats.add_elapsed(self.start_time.elapsed());
+        self.stats.add_searches(1);
+        if self.match_count > 0 {
+            self.stats.add_searches_with_match(1);
+        }
+        self.stats.add_bytes_searched(finish.byte_count());
+        self.stats.add_bytes_printed(self.diff.wtr.count());
+
+        Ok(())
+    }
+}
diff --git a/crates/printer/src/lib.rs b/crates/printer/src/lib.rs
index 29e0a45b0..f65993c1d 100644
--- a/crates/printer/src/lib.rs
+++ b/crates/printer/src/lib.rs
@@ -67,6 +67,7 @@ fn example() -> Result<(), Box<Error>> {
 pub use crate::color::{
     default_color_specs, ColorError, ColorSpecs, UserColorSpec,
 };
+pub use crate::diff::{Diff, DiffBuilder, DiffSink};
 #[cfg(feature = "serde1")]
 pub use crate::json::{JSONBuilder, JSONSink, JSON};
 pub use crate::standard::{Standard, StandardBuilder, StandardSink};
@@ -90,6 +91,7 @@ mod macros;
 
 mod color;
 mod counter;
+mod diff;
 #[cfg(feature = "serde1")]
 mod json;
 #[cfg(feature = "serde1")]
diff --git a/crates/printer/src/util.rs b/crates/printer/src/util.rs
index 434deec7c..6875ee8b6 100644
--- a/crates/printer/src/util.rs
+++ b/crates/printer/src/util.rs
@@ -63,9 +63,13 @@ impl<M: Matcher> Replacer<M> {
         // See the giant comment in 'find_iter_at_in_context' below for why we
         // do this dance.
         let is_multi_line = searcher.multi_line_with_matcher(&matcher);
+        let mut extension_len = 0;
         if is_multi_line {
             if subject[range.end..].len() >= MAX_LOOK_AHEAD {
+                extension_len = MAX_LOOK_AHEAD;
                 subject = &subject[..range.end + MAX_LOOK_AHEAD];
+            } else {
+                extension_len = subject.len() - range.end;
             }
         } else {
             // When searching a single line, we should remove the line
@@ -102,6 +106,18 @@ impl<M: Matcher> Replacer<M> {
                     },
                 )
                 .map_err(io::Error::error_message)?;
+
+            if is_multi_line {
+                // Remove the subject buffer beyond the range.end.
+                // NOTE: could this be a bug with the current replace functionality?
+                // As an example, running `rg -U '\.\nA' --replace 'BB' -C 2` in this repo produces
+                // spurious extra lines of output in the replacement, as the extra bytes where
+                // not removed. This seems to fix that, but it's not pretty and might be wrong.
+                dst.truncate(dst.len() - extension_len);
+            } else {
+                // Restore the line terminator.
+                dst.extend(searcher.line_terminator().as_bytes());
+            }
         }
         Ok(())
     }

From 787df77a8772f69a672a17118c1fa8f889cf097a Mon Sep 17 00:00:00 2001
From: sonohgong <99127857+sonohgong@users.noreply.github.com>
Date: Thu, 24 Feb 2022 17:46:55 +0100
Subject: [PATCH 2/2] limit amount of matches during replacement

Skip matches beyond the original range when
replacing a buffer with captures.
---
 crates/matcher/src/lib.rs  | 30 ++++++++++++++++++++++++++++++
 crates/printer/src/util.rs | 18 ++++--------------
 2 files changed, 34 insertions(+), 14 deletions(-)

diff --git a/crates/matcher/src/lib.rs b/crates/matcher/src/lib.rs
index 5b43b0d85..76fda4948 100644
--- a/crates/matcher/src/lib.rs
+++ b/crates/matcher/src/lib.rs
@@ -38,9 +38,11 @@ implementations.
 
 #![deny(missing_docs)]
 
+use std::cmp::min;
 use std::fmt;
 use std::io;
 use std::ops;
+use std::ops::Range;
 use std::u64;
 
 use crate::interpolate::interpolate;
@@ -942,6 +944,34 @@ pub trait Matcher {
         Ok(())
     }
 
+    /// Same as replace_with_captures_at, but limits the replacements to the
+    /// given range. Matches beyond the end of the range are skipped.
+    fn replace_with_captures_in_range<F>(
+        &self,
+        haystack: &[u8],
+        range: &Range<usize>,
+        caps: &mut Self::Captures,
+        dst: &mut Vec<u8>,
+        mut append: F,
+    ) -> Result<(), Self::Error>
+    where
+        F: FnMut(&Self::Captures, &mut Vec<u8>) -> bool,
+    {
+        let mut last_match = range.start;
+        self.captures_iter_at(haystack, range.start, caps, |caps| {
+            let m = caps.get(0).unwrap();
+            if m.start >= range.end {
+                return false;
+            }
+            dst.extend(&haystack[last_match..m.start]);
+            last_match = m.end;
+            append(caps, dst)
+        })?;
+        let end = min(haystack.len(), range.end);
+        dst.extend(&haystack[last_match..end]);
+        Ok(())
+    }
+
     /// Returns true if and only if the matcher matches the given haystack.
     ///
     /// By default, this method is implemented by calling `shortest_match`.
diff --git a/crates/printer/src/util.rs b/crates/printer/src/util.rs
index 6875ee8b6..76eeb28ac 100644
--- a/crates/printer/src/util.rs
+++ b/crates/printer/src/util.rs
@@ -1,4 +1,5 @@
 use std::borrow::Cow;
+use std::cmp::max;
 use std::fmt;
 use std::io;
 use std::path::Path;
@@ -63,13 +64,9 @@ impl<M: Matcher> Replacer<M> {
         // See the giant comment in 'find_iter_at_in_context' below for why we
         // do this dance.
         let is_multi_line = searcher.multi_line_with_matcher(&matcher);
-        let mut extension_len = 0;
         if is_multi_line {
             if subject[range.end..].len() >= MAX_LOOK_AHEAD {
-                extension_len = MAX_LOOK_AHEAD;
                 subject = &subject[..range.end + MAX_LOOK_AHEAD];
-            } else {
-                extension_len = subject.len() - range.end;
             }
         } else {
             // When searching a single line, we should remove the line
@@ -87,9 +84,9 @@ impl<M: Matcher> Replacer<M> {
             matches.clear();
 
             matcher
-                .replace_with_captures_at(
+                .replace_with_captures_in_range(
                     subject,
-                    range.start,
+                    &range,
                     caps,
                     dst,
                     |caps, dst| {
@@ -107,14 +104,7 @@ impl<M: Matcher> Replacer<M> {
                 )
                 .map_err(io::Error::error_message)?;
 
-            if is_multi_line {
-                // Remove the subject buffer beyond the range.end.
-                // NOTE: could this be a bug with the current replace functionality?
-                // As an example, running `rg -U '\.\nA' --replace 'BB' -C 2` in this repo produces
-                // spurious extra lines of output in the replacement, as the extra bytes where
-                // not removed. This seems to fix that, but it's not pretty and might be wrong.
-                dst.truncate(dst.len() - extension_len);
-            } else {
+            if !is_multi_line {
                 // Restore the line terminator.
                 dst.extend(searcher.line_terminator().as_bytes());
             }