Skip to content

Commit

Permalink
Move all gitignore matching to separate crate.
Browse files Browse the repository at this point in the history
This PR introduces a new sub-crate, `ignore`, which primarily provides a
fast recursive directory iterator that respects ignore files like
gitignore and other configurable filtering rules based on globs or even
file types.

This results in a substantial source of complexity moved out of ripgrep's
core and into a reusable component that others can now (hopefully)
benefit from.

While much of the ignore code carried over from ripgrep's core, a
substantial portion of it was rewritten with the following goals in
mind:

1. Reuse matchers built from gitignore files across directory iteration.
2. Design the matcher data structure to be amenable for parallelizing
   directory iteration. (Indeed, writing the parallel iterator is the
   next step.)

Fixes #9, #44, #45
  • Loading branch information
BurntSushi committed Oct 30, 2016
1 parent 12b2b1f commit d79add3
Show file tree
Hide file tree
Showing 30 changed files with 3,765 additions and 1,760 deletions.
57 changes: 34 additions & 23 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 2 additions & 4 deletions Cargo.toml
Expand Up @@ -27,19 +27,17 @@ path = "tests/tests.rs"
deque = "0.3"
docopt = "0.6"
env_logger = "0.3"
globset = { version = "0.1.0", path = "globset" }
grep = { version = "0.1.3", path = "grep" }
ignore = { version = "0.1.0", path = "ignore" }
lazy_static = "0.2"
libc = "0.2"
log = "0.3"
memchr = "0.1"
memmap = "0.2"
memmap = "0.5"
num_cpus = "1"
regex = "0.1.77"
rustc-serialize = "0.3"
term = "0.4"
thread_local = "0.2.7"
walkdir = "0.1"

[target.'cfg(windows)'.dependencies]
kernel32-sys = "0.2"
Expand Down
8 changes: 5 additions & 3 deletions appveyor.yml
Expand Up @@ -30,6 +30,7 @@ test_script:
- cargo test --verbose
- cargo test --verbose --manifest-path grep/Cargo.toml
- cargo test --verbose --manifest-path globset/Cargo.toml
- cargo test --verbose --manifest-path ignore/Cargo.toml

before_deploy:
# Generate artifacts for release
Expand Down Expand Up @@ -59,7 +60,8 @@ deploy:

branches:
only:
- appveyor
- /\d+\.\d+\.\d+/
except:
- master
# - appveyor
# - /\d+\.\d+\.\d+/
# except:
# - master
2 changes: 2 additions & 0 deletions ci/script.sh
Expand Up @@ -23,6 +23,8 @@ run_test_suite() {
cargo test --target $TARGET --verbose --manifest-path grep/Cargo.toml
cargo build --target $TARGET --verbose --manifest-path globset/Cargo.toml
cargo test --target $TARGET --verbose --manifest-path globset/Cargo.toml
cargo build --target $TARGET --verbose --manifest-path ignore/Cargo.toml
cargo test --target $TARGET --verbose --manifest-path ignore/Cargo.toml

# sanity check the file type
file target/$TARGET/debug/rg
Expand Down
3 changes: 3 additions & 0 deletions globset/Cargo.toml
Expand Up @@ -28,3 +28,6 @@ regex = "0.1.77"

[dev-dependencies]
glob = "0.2"

[features]
simd-accel = ["regex/simd-accel"]
3 changes: 3 additions & 0 deletions globset/benches/bench.rs
Expand Up @@ -11,6 +11,9 @@ extern crate lazy_static;
extern crate regex;
extern crate test;

use std::ffi::OsStr;
use std::path::Path;

use globset::{Candidate, Glob, GlobMatcher, GlobSet, GlobSetBuilder};

const EXT: &'static str = "some/a/bigger/path/to/the/crazy/needle.txt";
Expand Down
51 changes: 48 additions & 3 deletions globset/src/lib.rs
Expand Up @@ -226,10 +226,21 @@ type Fnv = hash::BuildHasherDefault<fnv::FnvHasher>;
/// single pass.
#[derive(Clone, Debug)]
pub struct GlobSet {
len: usize,
strats: Vec<GlobSetMatchStrategy>,
}

impl GlobSet {
/// Returns true if this set is empty, and therefore matches nothing.
pub fn is_empty(&self) -> bool {
self.len == 0
}

/// Returns the number of globs in this set.
pub fn len(&self) -> usize {
self.len
}

/// Returns true if any glob in this set matches the path given.
pub fn is_match<P: AsRef<Path>>(&self, path: P) -> bool {
self.is_match_candidate(&Candidate::new(path.as_ref()))
Expand All @@ -240,6 +251,9 @@ impl GlobSet {
/// This takes a Candidate as input, which can be used to amortize the
/// cost of preparing a path for matching.
pub fn is_match_candidate(&self, path: &Candidate) -> bool {
if self.is_empty() {
return false;
}
for strat in &self.strats {
if strat.is_match(path) {
return true;
Expand All @@ -250,9 +264,6 @@ impl GlobSet {

/// Returns the sequence number of every glob pattern that matches the
/// given path.
///
/// This takes a Candidate as input, which can be used to amortize the
/// cost of preparing a path for matching.
pub fn matches<P: AsRef<Path>>(&self, path: P) -> Vec<usize> {
self.matches_candidate(&Candidate::new(path.as_ref()))
}
Expand All @@ -264,6 +275,9 @@ impl GlobSet {
/// cost of preparing a path for matching.
pub fn matches_candidate(&self, path: &Candidate) -> Vec<usize> {
let mut into = vec![];
if self.is_empty() {
return into;
}
self.matches_candidate_into(path, &mut into);
into
}
Expand All @@ -274,12 +288,32 @@ impl GlobSet {
/// `into` is is cleared before matching begins, and contains the set of
/// sequence numbers (in ascending order) after matching ends. If no globs
/// were matched, then `into` will be empty.
pub fn matches_into<P: AsRef<Path>>(
&self,
path: P,
into: &mut Vec<usize>,
) {
self.matches_candidate_into(&Candidate::new(path.as_ref()), into);
}

/// Adds the sequence number of every glob pattern that matches the given
/// path to the vec given.
///
/// `into` is is cleared before matching begins, and contains the set of
/// sequence numbers (in ascending order) after matching ends. If no globs
/// were matched, then `into` will be empty.
///
/// This takes a Candidate as input, which can be used to amortize the
/// cost of preparing a path for matching.
pub fn matches_candidate_into(
&self,
path: &Candidate,
into: &mut Vec<usize>,
) {
into.clear();
if self.is_empty() {
return;
}
for strat in &self.strats {
strat.matches_into(path, into);
}
Expand All @@ -288,6 +322,9 @@ impl GlobSet {
}

fn new(pats: &[Glob]) -> Result<GlobSet, Error> {
if pats.is_empty() {
return Ok(GlobSet { len: 0, strats: vec![] });
}
let mut lits = LiteralStrategy::new();
let mut base_lits = BasenameLiteralStrategy::new();
let mut exts = ExtensionStrategy::new();
Expand Down Expand Up @@ -330,6 +367,7 @@ impl GlobSet {
prefixes.literals.len(), suffixes.literals.len(),
required_exts.0.len(), regexes.literals.len());
Ok(GlobSet {
len: pats.len(),
strats: vec![
GlobSetMatchStrategy::Extension(exts),
GlobSetMatchStrategy::BasenameLiteral(base_lits),
Expand Down Expand Up @@ -750,4 +788,11 @@ mod tests {
assert_eq!(0, matches[0]);
assert_eq!(2, matches[1]);
}

#[test]
fn empty_set_works() {
let set = GlobSetBuilder::new().build().unwrap();
assert!(!set.is_match(""));
assert!(!set.is_match("a"));
}
}
6 changes: 2 additions & 4 deletions globset/src/pathutil.rs
Expand Up @@ -89,16 +89,14 @@ pub fn path_bytes(path: &Path) -> Cow<[u8]> {
os_str_bytes(path.as_os_str())
}

/// Return the raw bytes of the given OS string, transcoded to UTF-8 if
/// necessary.
/// Return the raw bytes of the given OS string, possibly transcoded to UTF-8.
#[cfg(unix)]
pub fn os_str_bytes(s: &OsStr) -> Cow<[u8]> {
use std::os::unix::ffi::OsStrExt;
Cow::Borrowed(s.as_bytes())
}

/// Return the raw bytes of the given OS string, transcoded to UTF-8 if
/// necessary.
/// Return the raw bytes of the given OS string, possibly transcoded to UTF-8.
#[cfg(not(unix))]
pub fn os_str_bytes(s: &OsStr) -> Cow<[u8]> {
// TODO(burntsushi): On Windows, OS strings are WTF-8, which is a superset
Expand Down
2 changes: 1 addition & 1 deletion grep/Cargo.toml
Expand Up @@ -15,6 +15,6 @@ license = "Unlicense/MIT"
[dependencies]
log = "0.3"
memchr = "0.1"
memmap = "0.2"
memmap = "0.5"
regex = "0.1.77"
regex-syntax = "0.3.5"

0 comments on commit d79add3

Please sign in to comment.