Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Attempt at a unified diff output format for use with --replace #2149

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
44 changes: 44 additions & 0 deletions crates/core/app.rs
Expand Up @@ -566,6 +566,7 @@ pub fn all_args_and_flags() -> Vec<RGArg> {
flag_crlf(&mut args);
flag_debug(&mut args);
flag_dfa_size_limit(&mut args);
flag_diff(&mut args);
flag_encoding(&mut args);
flag_engine(&mut args);
flag_field_context_separator(&mut args);
Expand Down Expand Up @@ -1170,6 +1171,49 @@ The argument accepts the same size suffixes as allowed in with the
args.push(arg);
}

fn flag_diff(args: &mut Vec<RGArg>) {
const SHORT: &str = "Output search & replace results in unidiff format.";
const LONG: &str = long!(
"\
Enable printing search & replace results in unified diff (unidiff) format.
There doesn't seem to be a particular standard for this format other than:
https://www.gnu.org/software/diffutils/manual/html_node/Detailed-Unified.html
This implemenation is based on the output of the diff utility when run on
multiple files, i.e. with an extra line of information between each file
about what command produce the diff.

When this flag is provided together with the --replace flag, ripgrep will
emit unidiff format representing the matches as removed lines in a file
and the replacements as added lines in the same file.

The output can be viewed with a variety of diff pagers, or applied to files
with e.g. `git apply -p0 --unidiff-zero` or `patch -u -p0'.

The diff output currently does not support additional context,
so the --context flag will have no effect either.

The unidiff format is only supported for showing search results. It cannot
be used with other flags that emit other types of output, such as --files,
--files-with-matches, --files-without-match, --count or --count-matches.
ripgrep will report an error if any of the aforementioned flags are used in
concert with --diff.

Other flags that control aspects of the standard output such as
--only-matching, --heading, --max-columns, etc., have no effect
when --diff is set.

"
);
let arg = RGArg::switch("diff").help(SHORT).long_help(LONG).conflicts(&[
"count",
"count-matches",
"files",
"files-with-matches",
"files-without-match",
]);
args.push(arg);
}

fn flag_encoding(args: &mut Vec<RGArg>) {
const SHORT: &str = "Specify the text encoding of files to search.";
const LONG: &str = long!(
Expand Down
22 changes: 20 additions & 2 deletions crates/core/args.rs
Expand Up @@ -17,8 +17,8 @@ use grep::pcre2::{
RegexMatcherBuilder as PCRE2RegexMatcherBuilder,
};
use grep::printer::{
default_color_specs, ColorSpecs, JSONBuilder, Standard, StandardBuilder,
Stats, Summary, SummaryBuilder, SummaryKind, JSON,
default_color_specs, ColorSpecs, Diff, DiffBuilder, JSONBuilder, Standard,
StandardBuilder, Stats, Summary, SummaryBuilder, SummaryKind, JSON,
};
use grep::regex::{
RegexMatcher as RustRegexMatcher,
Expand Down Expand Up @@ -209,6 +209,9 @@ impl Args {
OutputKind::JSON => {
self.matches().printer_json(wtr).map(Printer::JSON)
}
OutputKind::Diff => {
self.matches().printer_diff(wtr).map(Printer::Diff)
}
}
}
}
Expand Down Expand Up @@ -367,6 +370,8 @@ enum OutputKind {
Summary,
/// Emit match information in the JSON Lines format.
JSON,
/// Emit search & replace information in unified diff format.
Diff,
}

/// The sort criteria, if present.
Expand Down Expand Up @@ -751,6 +756,14 @@ impl ArgMatches {
Ok(builder.build(wtr))
}

/// Build a Diff printer that writes results to the given writer.
fn printer_diff<W: io::Write>(&self, wtr: W) -> Result<Diff<W>> {
let mut builder = DiffBuilder::new();
//FIXME: `--diff` requires the `--replace` option.
builder.replacement(self.replacement().unwrap());
Ok(builder.build(wtr))
}

/// Build a Standard printer that writes results to the given writer.
///
/// The given paths are used to configure aspects of the printer.
Expand Down Expand Up @@ -1156,6 +1169,9 @@ impl ArgMatches {
if self.output_kind() == OutputKind::JSON {
return true;
}
if self.output_kind() == OutputKind::Diff {
return true;
}

// A few things can imply counting line numbers. In particular, we
// generally want to show line numbers by default when printing to a
Expand Down Expand Up @@ -1263,6 +1279,8 @@ impl ArgMatches {
return OutputKind::Summary;
} else if self.is_present("json") {
return OutputKind::JSON;
} else if self.is_present("diff") {
return OutputKind::Diff;
}

let (count, count_matches) = self.counts();
Expand Down
23 changes: 21 additions & 2 deletions crates/core/search.rs
Expand Up @@ -8,7 +8,7 @@ use grep::cli;
use grep::matcher::Matcher;
#[cfg(feature = "pcre2")]
use grep::pcre2::RegexMatcher as PCRE2RegexMatcher;
use grep::printer::{Standard, Stats, Summary, JSON};
use grep::printer::{Diff, Standard, Stats, Summary, JSON};
use grep::regex::RegexMatcher as RustRegexMatcher;
use grep::searcher::{BinaryDetection, Searcher};
use ignore::overrides::Override;
Expand Down Expand Up @@ -227,6 +227,8 @@ pub enum Printer<W> {
Summary(Summary<W>),
/// A JSON printer, which emits results in the JSON Lines format.
JSON(JSON<W>),
/// A Diff printer, which represents search & replace in the unified diff format.
Diff(Diff<W>),
}

impl<W: WriteColor> Printer<W> {
Expand All @@ -237,7 +239,7 @@ impl<W: WriteColor> Printer<W> {
) -> io::Result<()> {
match *self {
Printer::JSON(_) => self.print_stats_json(total_duration, stats),
Printer::Standard(_) | Printer::Summary(_) => {
Printer::Standard(_) | Printer::Summary(_) | Printer::Diff(_) => {
self.print_stats_human(total_duration, stats)
}
}
Expand Down Expand Up @@ -303,6 +305,7 @@ impl<W: WriteColor> Printer<W> {
Printer::Standard(ref mut p) => p.get_mut(),
Printer::Summary(ref mut p) => p.get_mut(),
Printer::JSON(ref mut p) => p.get_mut(),
Printer::Diff(ref mut p) => p.get_mut(),
}
}
}
Expand Down Expand Up @@ -502,6 +505,14 @@ fn search_path<M: Matcher, W: WriteColor>(
stats: Some(sink.stats().clone()),
})
}
Printer::Diff(ref mut p) => {
let mut sink = p.sink_with_path(&matcher, path);
searcher.search_path(&matcher, path, &mut sink)?;
Ok(SearchResult {
has_match: sink.has_match(),
stats: Some(sink.stats().clone()),
})
}
}
}

Expand Down Expand Up @@ -539,6 +550,14 @@ fn search_reader<M: Matcher, R: io::Read, W: WriteColor>(
stats: Some(sink.stats().clone()),
})
}
Printer::Diff(ref mut p) => {
let mut sink = p.sink_with_path(&matcher, path);
searcher.search_reader(&matcher, &mut rdr, &mut sink)?;
Ok(SearchResult {
has_match: sink.has_match(),
stats: Some(sink.stats().clone()),
})
}
}
}

Expand Down
30 changes: 30 additions & 0 deletions crates/matcher/src/lib.rs
Expand Up @@ -38,9 +38,11 @@ implementations.

#![deny(missing_docs)]

use std::cmp::min;
use std::fmt;
use std::io;
use std::ops;
use std::ops::Range;
use std::u64;

use crate::interpolate::interpolate;
Expand Down Expand Up @@ -942,6 +944,34 @@ pub trait Matcher {
Ok(())
}

/// Same as replace_with_captures_at, but limits the replacements to the
/// given range. Matches beyond the end of the range are skipped.
fn replace_with_captures_in_range<F>(
&self,
haystack: &[u8],
range: &Range<usize>,
caps: &mut Self::Captures,
dst: &mut Vec<u8>,
mut append: F,
) -> Result<(), Self::Error>
where
F: FnMut(&Self::Captures, &mut Vec<u8>) -> bool,
{
let mut last_match = range.start;
self.captures_iter_at(haystack, range.start, caps, |caps| {
let m = caps.get(0).unwrap();
if m.start >= range.end {
return false;
}
dst.extend(&haystack[last_match..m.start]);
last_match = m.end;
append(caps, dst)
})?;
let end = min(haystack.len(), range.end);
dst.extend(&haystack[last_match..end]);
Ok(())
}

/// Returns true if and only if the matcher matches the given haystack.
///
/// By default, this method is implemented by calling `shortest_match`.
Expand Down