Compare commits

...

179 Commits

Author SHA1 Message Date
9d164f4b2c modified metadata for rgs
Some checks failed
ci / test (beta, ubuntu-latest, beta) (push) Has been cancelled
ci / test (macos, macos-latest, nightly) (push) Has been cancelled
ci / test (nightly, ubuntu-latest, nightly) (push) Has been cancelled
ci / test (pinned, ubuntu-latest, 1.85.0) (push) Has been cancelled
ci / test (stable, ubuntu-latest, stable) (push) Has been cancelled
ci / test (stable-aarch64, ubuntu-latest, stable, aarch64-unknown-linux-gnu) (push) Has been cancelled
ci / test (stable-arm-gnueabihf, ubuntu-latest, stable, armv7-unknown-linux-gnueabihf) (push) Has been cancelled
ci / test (stable-arm-musleabi, ubuntu-latest, stable, armv7-unknown-linux-musleabi) (push) Has been cancelled
ci / test (stable-arm-musleabihf, ubuntu-latest, stable, armv7-unknown-linux-musleabihf) (push) Has been cancelled
ci / test (stable-musl, ubuntu-latest, stable, x86_64-unknown-linux-musl) (push) Has been cancelled
ci / test (stable-powerpc64, ubuntu-latest, stable, powerpc64-unknown-linux-gnu) (push) Has been cancelled
ci / test (stable-riscv64, ubuntu-latest, stable, riscv64gc-unknown-linux-gnu) (push) Has been cancelled
ci / test (stable-s390x, ubuntu-latest, stable, s390x-unknown-linux-gnu) (push) Has been cancelled
ci / test (stable-x86, ubuntu-latest, stable, i686-unknown-linux-gnu) (push) Has been cancelled
ci / test (win-gnu, windows-latest, nightly-x86_64-gnu) (push) Has been cancelled
ci / test (win-msvc, windows-latest, nightly) (push) Has been cancelled
ci / test (winaarch64-msvc, windows-11-arm, nightly) (push) Has been cancelled
ci / wasm (push) Has been cancelled
ci / rustfmt (push) Has been cancelled
ci / docs (push) Has been cancelled
ci / Compile Fuzz Test Targets (push) Has been cancelled
2026-01-13 20:45:15 -05:00
b60b31706a Merge pull request 'rgs-dev done' (#1) from rgs-dev into master
Some checks failed
ci / test (beta, ubuntu-latest, beta) (push) Has been cancelled
ci / test (macos, macos-latest, nightly) (push) Has been cancelled
ci / test (nightly, ubuntu-latest, nightly) (push) Has been cancelled
ci / test (pinned, ubuntu-latest, 1.85.0) (push) Has been cancelled
ci / test (stable, ubuntu-latest, stable) (push) Has been cancelled
ci / test (stable-aarch64, ubuntu-latest, stable, aarch64-unknown-linux-gnu) (push) Has been cancelled
ci / test (stable-arm-gnueabihf, ubuntu-latest, stable, armv7-unknown-linux-gnueabihf) (push) Has been cancelled
ci / test (stable-arm-musleabi, ubuntu-latest, stable, armv7-unknown-linux-musleabi) (push) Has been cancelled
ci / test (stable-arm-musleabihf, ubuntu-latest, stable, armv7-unknown-linux-musleabihf) (push) Has been cancelled
ci / test (stable-musl, ubuntu-latest, stable, x86_64-unknown-linux-musl) (push) Has been cancelled
ci / test (stable-powerpc64, ubuntu-latest, stable, powerpc64-unknown-linux-gnu) (push) Has been cancelled
ci / test (stable-riscv64, ubuntu-latest, stable, riscv64gc-unknown-linux-gnu) (push) Has been cancelled
ci / test (stable-s390x, ubuntu-latest, stable, s390x-unknown-linux-gnu) (push) Has been cancelled
ci / test (stable-x86, ubuntu-latest, stable, i686-unknown-linux-gnu) (push) Has been cancelled
ci / test (win-gnu, windows-latest, nightly-x86_64-gnu) (push) Has been cancelled
ci / test (win-msvc, windows-latest, nightly) (push) Has been cancelled
ci / test (winaarch64-msvc, windows-11-arm, nightly) (push) Has been cancelled
ci / wasm (push) Has been cancelled
ci / rustfmt (push) Has been cancelled
ci / docs (push) Has been cancelled
ci / Compile Fuzz Test Targets (push) Has been cancelled
Reviewed-on: #1
2026-01-14 01:36:52 +00:00
0994661424 added docs and migrated name to rgs, migrated repo, added squash-lines feature
Some checks failed
ci / test (beta, ubuntu-latest, beta) (pull_request) Has been cancelled
ci / test (macos, macos-latest, nightly) (pull_request) Has been cancelled
ci / test (nightly, ubuntu-latest, nightly) (pull_request) Has been cancelled
ci / test (pinned, ubuntu-latest, 1.85.0) (pull_request) Has been cancelled
ci / test (stable, ubuntu-latest, stable) (pull_request) Has been cancelled
ci / test (stable-aarch64, ubuntu-latest, stable, aarch64-unknown-linux-gnu) (pull_request) Has been cancelled
ci / test (stable-arm-gnueabihf, ubuntu-latest, stable, armv7-unknown-linux-gnueabihf) (pull_request) Has been cancelled
ci / test (stable-arm-musleabi, ubuntu-latest, stable, armv7-unknown-linux-musleabi) (pull_request) Has been cancelled
ci / test (stable-arm-musleabihf, ubuntu-latest, stable, armv7-unknown-linux-musleabihf) (pull_request) Has been cancelled
ci / test (stable-musl, ubuntu-latest, stable, x86_64-unknown-linux-musl) (pull_request) Has been cancelled
ci / test (stable-powerpc64, ubuntu-latest, stable, powerpc64-unknown-linux-gnu) (pull_request) Has been cancelled
ci / test (stable-riscv64, ubuntu-latest, stable, riscv64gc-unknown-linux-gnu) (pull_request) Has been cancelled
ci / test (stable-s390x, ubuntu-latest, stable, s390x-unknown-linux-gnu) (pull_request) Has been cancelled
ci / test (stable-x86, ubuntu-latest, stable, i686-unknown-linux-gnu) (pull_request) Has been cancelled
ci / test (win-gnu, windows-latest, nightly-x86_64-gnu) (pull_request) Has been cancelled
ci / test (win-msvc, windows-latest, nightly) (pull_request) Has been cancelled
ci / test (winaarch64-msvc, windows-11-arm, nightly) (pull_request) Has been cancelled
ci / wasm (pull_request) Has been cancelled
ci / rustfmt (pull_request) Has been cancelled
ci / docs (pull_request) Has been cancelled
ci / Compile Fuzz Test Targets (pull_request) Has been cancelled
2026-01-13 20:35:39 -05:00
ad6ec1b4c5 rgs: added multiline window limit and in-file result indexing (work in progress) 2025-12-23 04:01:55 -05:00
xtqqczze
cd1f981bea fix: derive Default when possible
Some checks failed
ci / test (beta, ubuntu-latest, beta) (push) Has been cancelled
ci / test (macos, macos-latest, nightly) (push) Has been cancelled
ci / test (nightly, ubuntu-latest, nightly) (push) Has been cancelled
ci / test (pinned, ubuntu-latest, 1.85.0) (push) Has been cancelled
ci / test (stable, ubuntu-latest, stable) (push) Has been cancelled
ci / test (stable-aarch64, ubuntu-latest, stable, aarch64-unknown-linux-gnu) (push) Has been cancelled
ci / test (stable-arm-gnueabihf, ubuntu-latest, stable, armv7-unknown-linux-gnueabihf) (push) Has been cancelled
ci / test (stable-arm-musleabi, ubuntu-latest, stable, armv7-unknown-linux-musleabi) (push) Has been cancelled
ci / test (stable-arm-musleabihf, ubuntu-latest, stable, armv7-unknown-linux-musleabihf) (push) Has been cancelled
ci / test (stable-musl, ubuntu-latest, stable, x86_64-unknown-linux-musl) (push) Has been cancelled
ci / test (stable-powerpc64, ubuntu-latest, stable, powerpc64-unknown-linux-gnu) (push) Has been cancelled
ci / test (stable-riscv64, ubuntu-latest, stable, riscv64gc-unknown-linux-gnu) (push) Has been cancelled
ci / test (stable-s390x, ubuntu-latest, stable, s390x-unknown-linux-gnu) (push) Has been cancelled
ci / test (stable-x86, ubuntu-latest, stable, i686-unknown-linux-gnu) (push) Has been cancelled
ci / test (win-gnu, windows-latest, nightly-x86_64-gnu) (push) Has been cancelled
ci / test (win-msvc, windows-latest, nightly) (push) Has been cancelled
ci / test (winaarch64-msvc, windows-11-arm, nightly) (push) Has been cancelled
ci / wasm (push) Has been cancelled
ci / rustfmt (push) Has been cancelled
ci / docs (push) Has been cancelled
ci / Compile Fuzz Test Targets (push) Has been cancelled
Ref https://rust-lang.github.io/rust-clippy/master/index.html#/derivable_impls
2025-11-29 14:11:38 -05:00
Andrew Gallant
57c190d56e ignore-0.4.25 2025-10-30 13:30:14 -04:00
Ian McKellar
85edf4c796 ignore: only stat .jj if we actually care
I was comparing the work being done by fd and find and noticed (with
`strace -f -c -S` calls) that fd was doing a ton of failed `statx`
calls. Upon closer inspection it was stating `.jj` even though I
was passing `--no-ignore`. Eventually I turned up this check in
`Ignore::add_child_path` that was doing stat on `.jj` regardless of
whether the options request it.

With this patch it'll only stat `.jj` if that's relevant to the query.

PR #3212
2025-10-30 13:29:58 -04:00
Andrew Gallant
36b7597693 changelog: start next section 2025-10-22 09:02:40 -04:00
Andrew Gallant
a132e56b8c pkg/brew: update tap 2025-10-22 09:01:12 -04:00
Andrew Gallant
af60c2de9d 15.1.0
Some checks failed
release / create-release (push) Has been cancelled
release / build-release (linux, ubuntu-latest, nightly, x86_64-linux-musl-strip, x86_64-unknown-linux-musl) (push) Has been cancelled
release / build-release (macos, macos-latest, nightly, aarch64-apple-darwin) (push) Has been cancelled
release / build-release (macos, macos-latest, nightly, x86_64-apple-darwin) (push) Has been cancelled
release / build-release (stable-aarch64, ubuntu-latest, qemu-aarch64, stable, aarch64-linux-gnu-strip, aarch64-unknown-linux-gnu) (push) Has been cancelled
release / build-release (stable-arm-gnueabihf, ubuntu-latest, qemu-arm, stable, arm-linux-gnueabihf-strip, armv7-unknown-linux-gnueabihf) (push) Has been cancelled
release / build-release (stable-arm-musleabi, ubuntu-latest, qemu-arm, stable, arm-linux-musleabi-strip, armv7-unknown-linux-musleabi) (push) Has been cancelled
release / build-release (stable-arm-musleabihf, ubuntu-latest, qemu-arm, stable, arm-linux-musleabihf-strip, armv7-unknown-linux-musleabihf) (push) Has been cancelled
release / build-release (stable-s390x, ubuntu-latest, qemu-s390x, stable, s390x-linux-gnu-strip, s390x-unknown-linux-gnu) (push) Has been cancelled
release / build-release (stable-x86, ubuntu-latest, i386, stable, x86_64-linux-gnu-strip, i686-unknown-linux-gnu) (push) Has been cancelled
release / build-release (win-gnu, windows-latest, nightly-x86_64-gnu, x86_64-pc-windows-gnu) (push) Has been cancelled
release / build-release (win-msvc, windows-latest, nightly, x86_64-pc-windows-msvc) (push) Has been cancelled
release / build-release (win32-msvc, windows-latest, nightly, i686-pc-windows-msvc) (push) Has been cancelled
release / build-release (winaarch64-msvc, windows-11-arm, nightly, aarch64-pc-windows-msvc) (push) Has been cancelled
release / build-release-deb (push) Has been cancelled
2025-10-22 08:30:04 -04:00
Andrew Gallant
a63671efb0 deps: bump to grep 0.4.1 2025-10-22 08:29:19 -04:00
Andrew Gallant
2ea06d69aa grep-0.4.1 2025-10-22 08:28:53 -04:00
Andrew Gallant
85006b08d6 deps: bump to grep-printer 0.3.1 2025-10-22 08:28:32 -04:00
Andrew Gallant
423afb8513 grep-printer-0.3.1 2025-10-22 08:28:06 -04:00
Andrew Gallant
4694800be5 deps: bump to grep-searcher 0.1.16 2025-10-22 08:26:22 -04:00
Andrew Gallant
86e0ab12ef grep-searcher-0.1.16 2025-10-22 08:25:01 -04:00
Andrew Gallant
7189950799 deps: bump to globset 0.4.18 2025-10-22 08:24:51 -04:00
Andrew Gallant
0b0e013f5a globset-0.4.18 2025-10-22 08:23:57 -04:00
Andrew Gallant
cac9870a02 doc: update date in man page template 2025-10-22 08:23:05 -04:00
Andrew Gallant
bee13375ed deps: update everything 2025-10-22 08:21:56 -04:00
Andrew Gallant
f5be160839 changelog: 15.1.0 2025-10-22 08:21:34 -04:00
Jorge Gomez
24e88dc15b ignore/types: add ssa type
This PR adds support for [.ssa](https://en.wikipedia.org/wiki/Static_single-assignment_form) files as read by [qbe](https://c9x.me/compile/):

See: https://c9x.me/compile/doc/il.html#Input-Files
2025-10-22 08:18:30 -04:00
Andrew Gallant
5748f81bb1 printer: use doc_cfg instead of doc_auto_cfg
Fixes #3202
2025-10-22 07:47:07 -04:00
Andrew Gallant
d47663b1b4 searcher: fix regression with --line-buffered flag
In my fix for #3184, I actually had two fixes. One was a tweak to how we
read data and the other was a tweak to how we determined how much of the
buffer we needed to keep around. It turns out that fixing #3184 only
required the latter fix, found in commit
d4b77a8d89. The former fix also helped the
specific case of #3184, but it ended up regressing `--line-buffered`.

Specifically, previous to 8c6595c215 (the
first fix), we would do one `read` syscall. This call might not fill our
caller provided buffer. And in particular, `stdin` seemed to fill fewer
bytes than reading from a file. So the "fix" was to put `read` in a loop
and keep calling it until the caller provided buffer was full or until
the stream was exhausted. This helped alleviate #3184 by amortizing
`read` syscalls better.

But of course, in retrospect, this change is clearly contrary to how
`--line-buffered` works. We specifically do _not_ want to wait around
until the buffer is full. We want to read what we can, search it and
move on.

So this reverts the first fix but leaves the second, which still
keeps #3184 fixed and also fixes #3194 (the regression).

This reverts commit 8c6595c215.

Fixes #3194
2025-10-19 11:06:39 -04:00
Enoch
38d630261a printer: add Cursor hyperlink alias
This is similar to the other aliases used by
VS Code forks.

PR #3192
2025-10-17 14:59:17 -04:00
Andrew Gallant
b3dc4b0998 globset: improve debug log
This shows the regex that the glob was compiled to.
2025-10-17 10:27:19 -04:00
Andrew Gallant
f09b55b8e7 changelog: start next section 2025-10-15 23:32:00 -04:00
Andrew Gallant
0551c6b931 pkg/brew: update tap 2025-10-15 23:31:35 -04:00
Andrew Gallant
3a612f88b8 15.0.0
Some checks failed
release / create-release (push) Has been cancelled
release / build-release (linux, ubuntu-latest, nightly, x86_64-linux-musl-strip, x86_64-unknown-linux-musl) (push) Has been cancelled
release / build-release (macos, macos-latest, nightly, aarch64-apple-darwin) (push) Has been cancelled
release / build-release (macos, macos-latest, nightly, x86_64-apple-darwin) (push) Has been cancelled
release / build-release (stable-aarch64, ubuntu-latest, qemu-aarch64, stable, aarch64-linux-gnu-strip, aarch64-unknown-linux-gnu) (push) Has been cancelled
release / build-release (stable-arm-gnueabihf, ubuntu-latest, qemu-arm, stable, arm-linux-gnueabihf-strip, armv7-unknown-linux-gnueabihf) (push) Has been cancelled
release / build-release (stable-arm-musleabi, ubuntu-latest, qemu-arm, stable, arm-linux-musleabi-strip, armv7-unknown-linux-musleabi) (push) Has been cancelled
release / build-release (stable-arm-musleabihf, ubuntu-latest, qemu-arm, stable, arm-linux-musleabihf-strip, armv7-unknown-linux-musleabihf) (push) Has been cancelled
release / build-release (stable-s390x, ubuntu-latest, qemu-s390x, stable, s390x-linux-gnu-strip, s390x-unknown-linux-gnu) (push) Has been cancelled
release / build-release (stable-x86, ubuntu-latest, i386, stable, x86_64-linux-gnu-strip, i686-unknown-linux-gnu) (push) Has been cancelled
release / build-release (win-gnu, windows-latest, nightly-x86_64-gnu, x86_64-pc-windows-gnu) (push) Has been cancelled
release / build-release (win-msvc, windows-latest, nightly, x86_64-pc-windows-msvc) (push) Has been cancelled
release / build-release (win32-msvc, windows-latest, nightly, i686-pc-windows-msvc) (push) Has been cancelled
release / build-release (winaarch64-msvc, windows-11-arm, nightly, aarch64-pc-windows-msvc) (push) Has been cancelled
release / build-release-deb (push) Has been cancelled
2025-10-15 23:07:50 -04:00
Andrew Gallant
ca2e34f37c grep-0.4.0 2025-10-15 23:06:34 -04:00
Andrew Gallant
a6092beee4 deps: bump to grep-printer 0.3.0 2025-10-15 23:05:10 -04:00
Andrew Gallant
a0d61a063f grep-printer-0.3.0 2025-10-15 23:04:24 -04:00
Andrew Gallant
c22fc0f13c deps: bump to grep-searcher 0.1.15 2025-10-15 23:02:59 -04:00
Andrew Gallant
087f82273d grep-searcher-0.1.15 2025-10-15 23:02:33 -04:00
Andrew Gallant
a3a30896be deps: bump to grep-pcre2 0.1.9 2025-10-15 23:01:31 -04:00
Andrew Gallant
7397ab7d97 grep-pcre2-0.1.9 2025-10-15 23:01:07 -04:00
Andrew Gallant
cf1dab0d5a deps: bump to grep-regex 0.1.14 2025-10-15 23:00:58 -04:00
Andrew Gallant
e523c6bf32 grep-regex-0.1.14 2025-10-15 23:00:22 -04:00
Andrew Gallant
720376ead6 deps: bump to grep-matcher 0.1.8 2025-10-15 23:00:12 -04:00
Andrew Gallant
a5ba50ceaf grep-matcher-0.1.8 2025-10-15 22:59:35 -04:00
Andrew Gallant
a766f79710 deps: bump to grep-cli 0.1.12 2025-10-15 22:59:17 -04:00
Andrew Gallant
4aafe45760 grep-cli-0.1.12 2025-10-15 22:58:42 -04:00
Andrew Gallant
c03e49b8c5 deps: bump to ignore 0.4.24 2025-10-15 22:58:35 -04:00
Andrew Gallant
70ae7354e1 ignore-0.4.24 2025-10-15 22:57:50 -04:00
Andrew Gallant
19c2a6e0d9 deps: bump to globset 0.4.17 2025-10-15 22:57:28 -04:00
Andrew Gallant
064b36b115 globset-0.4.17 2025-10-15 22:55:55 -04:00
Andrew Gallant
365384a5c1 doc: move CHANGELOG update before dependency updates
It seems better to write this first. Especially so it gets included into
crate publishes.
2025-10-15 22:54:51 -04:00
Andrew Gallant
72a5291b4e doc: update date in man page template 2025-10-15 22:54:11 -04:00
Andrew Gallant
62e676843a deps: update everything 2025-10-15 22:53:30 -04:00
Andrew Gallant
3780168c13 changelog: 15.0.0 2025-10-15 22:53:30 -04:00
Andrew Gallant
4c953731c4 release: finally switch to LTO for release binaries
There seems to be a modest improvement on some workloads:

```
$ time rg -co '\w+' sixteenth.txt
158520346

real    8.457
user    8.426
sys     0.020
maxmem  779 MB
faults  0

$ time rg-lto -co '\w+' sixteenth.txt
158520346

real    8.200
user    8.178
sys     0.012
maxmem  778 MB
faults  0
```

I've somewhat reversed course on my previous thoughts here. The
improvement isn't much, but the hit to compile times in CI isn't
terrible. Mostly I'm doing this out of "good sense," and I think it's
generally unlikely to make it more difficult for me to diagnose
performance problems. (Since I still use the default `release` profile
locally, since it's about an order of magnitude quicker to compile.)

Ref #325, Ref #413, Ref #1187, Ref #1255
2025-10-15 22:51:41 -04:00
Andrew Gallant
79d393a302 release: remove riscv64 and powerpc64 artifacts
Their CI workflows broke for different reasons.

I perceive these as niche platforms that aren't worth blocking
a release on. And not worth my time investigating CI problems.
2025-10-15 22:42:51 -04:00
Andrew Gallant
85eaf95833 ci: testing release 2025-10-15 22:41:46 -04:00
Andrew Gallant
63209ae0b9 printer: fix --stats for --json
Somehow, the JSON printer seems to have never emitted correct summary
statistics. And I believe #3178 is the first time anyone has ever
reported it. I believe this bug has persisted for years. That's
surprising.

Anyway, the problem here was that we were bailing out of `finish()` on
the sink if we weren't supposed to print anything. But we bailed out
before we tallied our summary statistics. Obviously we shouldn't do
that.

Fixes #3178
2025-10-15 21:21:20 -04:00
Andrew Gallant
b610d1cb15 ignore: fix global gitignore bug that arises with absolute paths
The `ignore` crate currently handles two different kinds of "global"
gitignore files: gitignores from `~/.gitconfig`'s `core.excludesFile`
and gitignores passed in via `WalkBuilder::add_ignore` (corresponding to
ripgrep's `--ignore-file` flag).

In contrast to any other kind of gitignore file, these gitignore files
should have their patterns interpreted relative to the current working
directory. (Arguably there are other choices we could make here, e.g.,
based on the paths given. But the `ignore` infrastructure can't handle
that, and it's not clearly correct to me.) Normally, a gitignore file
has its patterns interpreted relative to where the gitignore file is.
This relative interpretation matters for patterns like `/foo`, which are
anchored to _some_ directory.

Previously, we would generally get the global gitignores correct because
it's most common to use ripgrep without providing a path. Thus, it
searches the current working directory. In this case, no stripping of
the paths is needed in order for the gitignore patterns to be applied
directly.

But if one provides an absolute path (or something else) to ripgrep to
search, the paths aren't stripped correctly. Indeed, in the core, I had
just given up and not provided a "root" path to these global gitignores.
So it had no hope of getting this correct.

We fix this assigning the CWD to the `Gitignore` values created from
global gitignore files. This was a painful thing to do because we'd
ideally:

1. Call `std::env::current_dir()` at most once for each traversal.
2. Provide a way to avoid the library calling `std::env::current_dir()`
   at all. (Since this is global process state and folks might want to
   set it to different values for $reasons.)

The `ignore` crate's internals are a total mess. But I think I've
addressed the above 2 points in a semver compatible manner.

Fixes #3179
2025-10-15 19:44:23 -04:00
Luke Hannan
9ec08522be ignore/types: add lowercase R extensions
PR #3186
2025-10-14 15:15:07 -04:00
Andrew Gallant
d4b77a8d89 searcher: fix a performance bug with -A/--after-context
Previously (with the previous commit):

```
$ cat bigger.txt | (time rg ZQZQZQZQZQ -A999) | wc -l

real    2.321
user    0.674
sys     0.735
maxmem  30 MB
faults  0
1000

$ cat bigger.txt | (time rg ZQZQZQZQZQ -A9999) | wc -l

real    2.513
user    0.823
sys     0.686
maxmem  30 MB
faults  0
10000

$ cat bigger.txt | (time rg ZQZQZQZQZQ -A99999) | wc -l

real    5.067
user    3.254
sys     0.676
maxmem  30 MB
faults  0
100000

$ cat bigger.txt | (time rg ZQZQZQZQZQ -A999999) | wc -l

real    6.658
user    4.841
sys     0.778
maxmem  51 MB
faults  0
1000000
```

Now with this commit:

```
$ cat bigger.txt | (time rg ZQZQZQZQZQ -A999) | wc -l

real    1.845
user    0.328
sys     0.757
maxmem  30 MB
faults  0
1000

$ cat bigger.txt | (time rg ZQZQZQZQZQ -A9999) | wc -l

real    1.917
user    0.334
sys     0.771
maxmem  30 MB
faults  0
10000

$ cat bigger.txt | (time rg ZQZQZQZQZQ -A99999) | wc -l

real    1.972
user    0.319
sys     0.812
maxmem  30 MB
faults  0
100000

$ cat bigger.txt | (time rg ZQZQZQZQZQ -A999999) | wc -l

real    2.005
user    0.333
sys     0.855
maxmem  30 MB
faults  0
1000000
```

And compare to GNU grep:

```
$ cat bigger.txt | (time grep ZQZQZQZQZQ -A999) | wc -l

real    1.488
user    0.143
sys     0.866
maxmem  30 MB
faults  0
1000

$ cat bigger.txt | (time grep ZQZQZQZQZQ -A9999) | wc -l

real    1.697
user    0.170
sys     0.986
maxmem  30 MB
faults  1
10000

$ cat bigger.txt | (time grep ZQZQZQZQZQ -A99999) | wc -l

real    1.515
user    0.166
sys     0.856
maxmem  29 MB
faults  0
100000

$ cat bigger.txt | (time grep ZQZQZQZQZQ -A999999) | wc -l

real    1.490
user    0.174
sys     0.851
maxmem  30 MB
faults  0
1000000
```

Interestingly, GNU grep is still a bit faster. But both commands remain
roughly invariant in search time as `-A` is increased.

There is definitely something "odd" about searching `stdin`, where it
seems substantially slower. We can also observe with GNU grep:

```
$ (time grep ZQZQZQZQZQ -A999999 bigger.txt) | wc -l

real    0.692
user    0.184
sys     0.506
maxmem  30 MB
faults  0
1000000

$ cat bigger.txt | (time grep ZQZQZQZQZQ -A999999) | wc -l

real    1.700
user    0.201
sys     0.954
maxmem  30 MB
faults  0
1000000

$ (time rg ZQZQZQZQZQ -A999999 bigger.txt) | wc -l

real    0.640
user    0.428
sys     0.209
maxmem  7734 MB
faults  0
1000000

$ (time rg ZQZQZQZQZQ --no-mmap -A999999 bigger.txt) | wc -l

real    0.866
user    0.282
sys     0.581
maxmem  30 MB
faults  0
1000000

$ cat bigger.txt | (time rg ZQZQZQZQZQ -A999999) | wc -l

real    1.991
user    0.338
sys     0.819
maxmem  30 MB
faults  0
1000000
```

I wonder if this is related to my discovery in the previous commit where
`read` calls on `stdin` seem to never return anything more than ~64K. Oh
well, I'm satisfied at this point, especially given that GNU grep seems
to do a lot worse than ripgrep with bigger values of
`-B/--before-context`:

```
$ cat bigger.txt | (time grep ZQZQZQZQZQ -B9) | wc -l

real    1.568
user    0.170
sys     0.885
maxmem  30 MB
faults  0
1

$ cat bigger.txt | (time grep ZQZQZQZQZQ -B99) | wc -l

real    1.734
user    0.338
sys     0.879
maxmem  30 MB
faults  0
1

$ cat bigger.txt | (time grep ZQZQZQZQZQ -B999) | wc -l

real    2.349
user    1.723
sys     0.620
maxmem  30 MB
faults  0
1

$ cat bigger.txt | (time grep ZQZQZQZQZQ -B9999) | wc -l

real    16.459
user    15.848
sys     0.586
maxmem  30 MB
faults  0
1

$ time grep ZQZQZQZQZQ -B99999 bigger.txt
ZQZQZQZQZQ

real    1:45.06
user    1:44.12
sys     0.772
maxmem  30 MB
faults  0
```

The above pattern occurs regardless of whether you put `bigger.txt` on
stdin or whether you search it directly.

And now ripgrep:

```
$ cat bigger.txt | (time rg ZQZQZQZQZQ -B9) | wc -l

real    1.965
user    0.326
sys     0.814
maxmem  29 MB
faults  0
1

$ cat bigger.txt | (time rg ZQZQZQZQZQ -B99) | wc -l

real    1.941
user    0.423
sys     0.813
maxmem  29 MB
faults  0
1

$ cat bigger.txt | (time rg ZQZQZQZQZQ -B999) | wc -l

real    2.372
user    0.759
sys     0.703
maxmem  30 MB
faults  0
1

$ cat bigger.txt | (time rg ZQZQZQZQZQ -B9999) | wc -l

real    2.638
user    0.895
sys     0.665
maxmem  29 MB
faults  0
1

$ cat bigger.txt | (time rg ZQZQZQZQZQ -B99999) | wc -l

real    5.172
user    3.282
sys     0.748
maxmem  29 MB
faults  0
1
```

NOTE: To get `bigger.txt`:

```
$ curl -LO 'https://burntsushi.net/stuff/opensubtitles/2018/en/sixteenth.txt.gz'
$ gzip -d sixteenth.txt.gz
$ (echo ZQZQZQZQZQ && for ((i=0;i<10;i++)); do cat sixteenth.txt; done) > bigger.txt
```
2025-10-14 14:27:43 -04:00
Andrew Gallant
8c6595c215 searcher: fix performance bug with -A/--after-context when searching stdin
This was a crazy subtle bug where ripgrep could slow down exponentially
as increasingly larger values of `-A/--after-context` were used. But,
interestingly, this would only occur when searching `stdin` and _not_
when searching the same data as a regular file.

This confounded me because ripgrep, pretty early on, erases the
difference between searching a single file and `stdin`. So it wasn't
like there were different code paths. And I mistakenly assumed that they
would otherwise behave the same as they are just treated as streams.

But... it turns out that running `read` on a `stdin` versus a regular
file seems to behave differently. At least on my Linux system, with
`stdin`, `read` never seems to fill the buffer with more than 64K. But
with a regular file, `read` pretty reliably fills the caller's buffer
with as much space as declared.

Of course, it is expected that `read` doesn't *have* to fill up the
caller's buffer, and ripgrep is generally fine with that. But when
`-A/--after-context` is used with a very large value---big enough that
the default buffer capacity is too small---then more heap memory needs
to be allocated to correctly handle all cases. This can result in
passing buffers bigger than 64K to `read`.

While we *correctly* handle `read` calls that don't fill the buffer,
it turns out that if we don't fill the buffer, then we get into a
pathological case where we aren't processing as many bytes as we could.
That is, because of the `-A/--after-context` causing us to keep a lot of
bytes around while we roll the buffer and because reading from `stdin`
gives us fewer bytes than normal, we weren't amortizing our `read` calls
as well as we should have been. Indeed, our buffer capacity increases
specifically take this amortization into account, but we weren't taking
advantage of it.

We fix this by putting `read` into an inner loop that ensures our
buffer gets filled up. This fixes the performance bug:

```
$ (time rg ZQZQZQZQZQ bigger.txt --no-mmap -A9999) | wc -l

real    1.330
user    0.767
sys     0.559
maxmem  29 MB
faults  0
10000

$ cat bigger.txt | (time rg ZQZQZQZQZQ --no-mmap -A9999) | wc -l

real    2.355
user    0.860
sys     0.613
maxmem  29 MB
faults  0
10000

$ (time rg ZQZQZQZQZQ bigger.txt --no-mmap -A99999) | wc -l

real    3.636
user    3.091
sys     0.537
maxmem  29 MB
faults  0
100000

$ cat bigger.txt | (time rg ZQZQZQZQZQ --no-mmap -A99999) | wc -l

real    4.918
user    3.236
sys     0.710
maxmem  29 MB
faults  0
100000

$ (time rg ZQZQZQZQZQ bigger.txt --no-mmap -A999999) | wc -l

real    5.430
user    4.666
sys     0.750
maxmem  51 MB
faults  0
1000000

$ cat bigger.txt | (time rg ZQZQZQZQZQ --no-mmap -A999999) | wc -l

real    6.894
user    4.907
sys     0.850
maxmem  51 MB
faults  0
1000000
```

For comparison, here is GNU grep:

```
$ cat bigger.txt | (time grep ZQZQZQZQZQ -A9999) | wc -l

real    1.466
user    0.159
sys     0.839
maxmem  29 MB
faults  0
10000

$ cat bigger.txt | (time grep ZQZQZQZQZQ -A99999) | wc -l

real    1.663
user    0.166
sys     0.941
maxmem  29 MB
faults  0
100000

$ cat bigger.txt | (time grep ZQZQZQZQZQ -A999999) | wc -l

real    1.631
user    0.204
sys     0.910
maxmem  29 MB
faults  0
1000000
```

GNU grep is still notably faster. We'll fix that in the next commit.

Fixes #3184
2025-10-14 14:27:43 -04:00
Andrew Gallant
de2567a4c7 printer: fix panic in replacements in look-around corner case
The abstraction boundary fuck up is the gift that keeps on giving. It
turns out that the invariant that the match would never exceed the range
given is not always true. So we kludge around it.

Also, update the CHANGELOG to include the fix for #2111.

Fixes #3180
2025-10-12 17:25:19 -04:00
Andrew Gallant
916415857f core: don't build decompression reader unless we intend to use it
Building it can consume resources. In particular, on Windows, the
various binaries are eagerly resolved.

I think this originally wasn't done. The eager resolution was added
later for security purposes. But the "eager" part isn't actually
necessary.

It would probably be better to change the decompression reader to do
lazy resolution only when the binary is needed. But this will at least
avoid doing anything when the `-z/--search-zip` flag isn't used. But
when it is, ripgrep will still eagerly resolve all possible binaries.

Fixes #2111
2025-10-12 16:31:20 -04:00
Andrew Gallant
5c42c8c48f test: add regression test for fixed bug
It turns out that #2094 was fixed in my `--max-count` refactor a few
commits back. This commit adds a regression test for it.

Closes #2094
2025-10-12 12:45:34 -04:00
Andrew Gallant
f0faa91c68 doc: clarify --ignore-file precedence
Fixes #2777
2025-10-10 22:06:59 -04:00
Andrew Gallant
a5d9e03c68 test: attempt to fix flaky time-reliant test
Fixes #2794
2025-10-10 22:06:59 -04:00
Andrew Gallant
924ba101ee test: fix Command::current_dir API
Every single call site wants to pass a path relative to the directory
the command was created for. So just make it do that automatically,
similar to `Dir::create` and friends.
2025-10-10 22:06:59 -04:00
Andrew Gallant
293ef80eaf test: add another regression test for gitignore matching bug
I believe this was also fixed by #2933.

Closes #2770
2025-10-10 22:06:59 -04:00
Andrew Gallant
fa80aab6b0 test: add regression test for fixed gitignore bug
I believe this was actually fixed by #2933.

Closes #3067
2025-10-10 22:06:59 -04:00
mariano-m13
7c2161d687 release: add binaries for riscv64gc-unknown-linux-gnu target
Note that we skip lz4/brotli/zstd tests on RISC-V.

The CI runs RISC-V tests using cross/QEMU emulation. The decompression
tools (lz4, brotli, zstd) are x86_64 binaries on the host that cannot
execute in the RISC-V QEMU environment.

Skip these three tests at compile-time on RISC-V to avoid test failures.
The -z/--search-zip functionality itself works correctly on real RISC-V
hardware where native decompression tools are available.

PR #3165
2025-10-10 20:50:28 -04:00
Andrew Gallant
096f79ab98 deps: update everything
This includes an update to `regex 1.12.1`, which fixes a couple of
outstanding bugs in ripgrep.

Fixes #2750, Fixes #3135
2025-10-10 20:13:29 -04:00
Andrew Gallant
0407e104f6 ignore: fix problem with searching whitelisted hidden files
... specifically, when the whitelist comes from a _parent_ gitignore
file.

Our handling of parent gitignores is pretty ham-fisted and has been a
source of some unfortunate bugs. The problem is that we need to strip
the parent path from the path we're searching in order to correctly
apply the globs. But getting this stripping correct seems to be a subtle
affair.

Fixes #3173
2025-10-08 21:16:59 -04:00
Andrew Gallant
bb88a1ac45 deps: semver compatible updated to dependencies 2025-10-05 10:52:46 -04:00
Alvaro Parker
2924d0c4c0 ignore: add min_depth option
This mimics the eponymous option in `walkdir`.

Closes #3158, PR #3162
2025-10-05 10:05:26 -04:00
Andrew Gallant
9d8016d10c printer: finish removal of max_matches
This finishes what I started in commit
a6e0be3c90.
Specifically, the `max_matches` configuration has been moved to the
`grep-searcher` crate and *removed* from the `grep-printer` crate. The
commit message has the details for why we're doing this, but the short
story is to fix #3076.

Note that this is a breaking change for `grep-printer`, so this will
require a semver incompatible release.
2025-10-04 09:19:53 -04:00
Andrew Gallant
9802945e63 doc: update the CentOS, RHEL and Rocky Linux installation instructions
I've split the previously singular "CentOS/RHEL/Rocky" section into 3
sections. They each benefit from having their own steps.

I've also copied steps from [EPEL Getting Started] documentation,
including steps that don't seem to be required because it seems to be
best practice (although I do not understand it). Notably, this is not
required for CentOS Stream:

```
dnf config-manager --set-enabled crb
```

And this is not required for Red Hat:

```
subscription-manager repos --enable codeready-builder-for-rhel-10-$(arch)-rpms
```

And neither are available on Rocky Linux 10. Hence, all 3 have slightly
different instructions.

It has been suggested (see [here][suggest1] and [here][suggest2]) that
the installation instructions should just link to the [EPEL Getting
Started] documentation and just contain this step:

```
sudo dnf install ripgrep
```

However, this is not sufficient to actually install ripgrep from a
base installation of these Linux distributions. I tested this via the
`dokken/centos-stream-10:sha-d1e294f`, `rockylinux/rockylinux:10` and
`redhat/ubi10` Docker images on DockerHub.

While this does mean ripgrep's installation instructions can become out
of sync from upstream, this is *always* a risk regardless of platform.
The instructions are provided on a best effort basis and generally
should work on the latest release of said platform. If the instructions
result in unhelpful errors (like `dnf install ripgrep` does if you
don't enable EPEL), then that isn't being maximally helpful to users.
I'd rather attempt to give the entire set of instructions and risk
being out of sync.

Also, since the installation instructions include URLs with version
numbers in them, I made the section names include version numbers as
well.

Note: I found using the `dokken/centos-stream-10:sha-d1e294f` Docker
image to be somewhat odd, as I could not find any official CentOS
Docker images. [This][DockerHub-CentOS] is still the first hit on
Google, but all of its tags have been deleted and the image is
deprecated. I was profoundly confused by this given that the [EPEL
Getting Started] documentation *specifically* cites CentOS 10. In fact,
it is citing CentOS *Stream* 10, which is something wholly distinct
from CentOS. What an absolute **clusterfuck**. If I had just read this
paragraph on Wikipedia from the beginning, I would have saved myself a
lot of confusion:

> In December 2020, Red Hat unilaterally terminated CentOS development in favor
> of CentOS Stream 9, a distribution positioned upstream of RHEL. In March
> 2021, CloudLinux (makers of CloudLinux OS) released a RHEL derivative called
> AlmaLinux. Later in May 2021, one of the CentOS founders (Gregory Kurtzer)
> created the competing Rocky Linux project as a successor to the original
> mission of CentOS.

Ref #2981, Ref #2924

[EPEL Getting Started]: https://docs.fedoraproject.org/en-US/epel/getting-started/
[suggest1]: https://github.com/BurntSushi/ripgrep/pull/2981#issuecomment-3204114293
[suggest2]: https://github.com/BurntSushi/ripgrep/issues/2924#issuecomment-3326357254
[DockerHub-CentOS]: https://hub.docker.com/_/centos
2025-09-24 10:02:46 -04:00
Andrew Gallant
fdea9723ca doc: clarify a case where -m/--max-count is not strictly respected
In #2843, it's requested that these trailing contextual lines should be
displayed as non-matching because they exceed the limit. While
reasonable, I think that:

1. This would be a weird complication to the implementation.
2. This would overall be less intuitive and more complex. Today, there
   is never a case where ripgrep emits a matching line in a way where
   the match isn't highlighted.

Closes #2843
2025-09-22 22:12:15 -04:00
Andrew Gallant
c45ec16360 doc: clarify --multiline --count
Specifically, it is only equivalent to `--count-matches` when the
pattern(s) given can match over multiple lines.

We could have instead made `--multiline --count` always equivalent to
`--multiline --count-matches`, but this seems plausibly less useful.
Indeed, I think it's generally a good thing that users can enable
`-U/--multiline` but still use patterns that only match a single line.
Changing how that behaves would I think be more surprising.

Either way we slice this, it's unfortunately pretty subtle.

Fixes #2852
2025-09-22 22:00:15 -04:00
Andrew Gallant
e42432cc5d ignore: clarify WalkBuilder::filter_entry
Fixes #2913
2025-09-22 21:49:29 -04:00
Andrew Gallant
6e77339f30 cli: tweak docs for resolve_binary
Fixes #2928
2025-09-22 21:38:08 -04:00
Andrew Gallant
1b07c6616a cli: document that -c/--count can be inconsistent with -l/--files-with-matches
This is unfortunate, but is a known bug that I don't think can be fixed
without either making `-l/--files-with-matches` much slower or changing
what "binary filtering" means by default.

In this PR, we document this inconsistency since users may find it quite
surprising. The actual work-around is to disable binary filtering with
the `--binary` flag.

We add a test confirming this behavior.

Closes #3131
2025-09-22 20:24:53 -04:00
Andrew Gallant
c1fc6a5eb8 release: build aarch64 artifacts for macos on GitHub Actions
GitHub now supports this natively, so there's no need for me to do it
any more.

Fixes #3155
2025-09-22 11:56:33 -04:00
Andrew Gallant
8b5d3d1c1e printer: hack in a fix for -l/--files-with-matches when using --pcre2 --multiline with look-around
The underlying issue here is #2528, which was introduced by commit
efd9cfb2fc which fixed another bug.

For the specific case of "did a file match," we can always assume the
match count is at least 1 here. But this doesn't fix the underlying
problem.

Fixes #3139
2025-09-22 09:12:16 -04:00
Andrew Gallant
491bf3f6d5 deps: update everything else 2025-09-21 11:39:04 -04:00
Andrew Gallant
81bed78654 deps: update to PCRE2 10.46
This is for completely static builds of ripgrep.
2025-09-21 11:39:04 -04:00
Andrew Gallant
1b6177bc5c cargo: set MSRV to 1.85
I believe the current stable version of Debian packages 1.85 rustc. So
if the next release of ripgrep uses a higher MSRV, then I think Debian
won't be able to package it.

It also turned out that I wasn't using anything from beyond Rust 1.85
anyway.

It's likely that I could make use of let-chains in various places, but I
don't think it's worth combing through the code to switch to them at
this point.
2025-09-21 09:51:15 -04:00
Lucas Trzesniewski
a7b7d81d66 lint: fix a few Clippy errors
PR #3151
2025-09-21 09:15:48 -04:00
Andrew Gallant
bb8172fe9b style: apply rustfmt
Maybe 2024 changes?

Note that we now set `edition = "2024"` explicitly in `rustfmt.toml`.
Without this, it seems like it's possible in some cases for rustfmt to
run under an older edition's style. Not sure how though.
2025-09-19 21:08:19 -04:00
Isaac
64174b8e68 printer: preserve line terminator when using --crlf and --replace
Ref #3097, Closes #3100
2025-09-19 21:08:19 -04:00
mostafa
f596a5d875 globset: add allow_unclosed_class toggle
When enabled, patterns like `[abc`, `[]`, `[!]` are treated as if the
opening `[` is just a literal. This is in contrast the default behavior,
which prioritizes better error messages, of returning a parse error.

Fixes #3127, Closes #3145
2025-09-19 21:08:19 -04:00
Thomas ten Cate
556623684e ignore/types: add GDScript files (*.gd) for the Godot Engine
Closes #3142
2025-09-19 21:08:19 -04:00
Pavel Safronov
a6e0be3c90 searcher: move "max matches" from printer to searcher
This is a bit of a brutal change, but I believe is necessary in order to
fix a bug in how we handle the "max matches" limit in multi-line mode
while simultaneously handling context lines correctly.

The main problem here is that "max matches" refers to the shorter of
"one match per line" or "a single match." In typical grep, matches
*can't* span multiple lines, so there's never a difference. But in
multi-line mode, they can. So match counts necessarily must be handled
differently for multi-line mode.

The printer was previously responsible for this. But for $reasons, the
printer is fundamentally not in charge of how matches are found and
reported.

See my comments in #3094 for even more context.

This is a breaking change for `grep-printer`.

Fixes #3076, Closes #3094
2025-09-19 21:08:19 -04:00
Andrew Gallant
a60e62d9ac rust: move to Rust 2024
I'd like to use let chains.

Probably this isn't necessary to do for every crate, but I don't feel
like maintaining a mismash.
2025-09-19 21:08:19 -04:00
Cristián Maureira-Fredes
3f565b58cc ignore/types: add Qt types for resource files and ui declaration
qrc[1] are the resource files for data related to user interfaces, and
ui[2] is the extension that the Qt Designer generates, for Widget based
projects.

Note that the initial PR used `ui` as a name for `*.ui`, but this seems
overly general. Instead, we use `qui` here instead.

Closes #3141

[1]: https://doc.qt.io/qt-6/resources.html
[2]: https://doc.qt.io/qt-6/uic.html
2025-09-19 21:08:19 -04:00
Andrew Gallant
74959a14cb man: escape all hyphens in flag names
Apparently, if we don't do this, some roff renderers with use a special
Unicode hyphen. That in turn makes searching a man page not work as one
would expect.

Fixes #3140
2025-09-19 21:08:19 -04:00
dana
78383de9b2 complete/zsh: improve --hyperlink-format completion
Also don't re-define helper functions if they exist.

Closes #3102
2025-09-19 21:08:19 -04:00
Ilya Grigoriev
519c1bd5cf complete: improvements for the --hyperlink-format flag
The goal is to make the completion for `rg --hyperlink-format v<TAB>`
work in the fish shell.

These are not exhaustive (the user can also specify custom formats).
This is somewhat unfortunate, but is probably better than not doing
anything at all.

The `grep+` value necessitated a change to a test.

Closes #3096
2025-09-19 21:08:19 -04:00
Lucas Trzesniewski
66aa4a63bb printer: deduplicate hyperlink alias names
This exports a new `HyperlinkAlias` type in the `grep-printer` crate.
This includes a "display priority" with each alias and a function for
getting all supported aliases from the crate.

This should hopefully make it possible for downstream users of this
crate to include a list of supported aliases in the documentation.

Closes #3103
2025-09-19 21:08:19 -04:00
Andrew Gallant
fdfda9ae73 doc: actually fix deb download link
Amazingly, there were about a dozen PRs fixing this same thing, and I
happened to choose the one that didn't actually fix the URL completely.

Apparently some users found this "interesting":
https://github.com/BurntSushi/ripgrep/pull/3065#issuecomment-3204275122
2025-09-19 21:08:19 -04:00
wackget
c037310050 doc: update installation instructions for RHEL/CentOS/Rocky Linux 9
Closes #2924, Closes #2981, Closes #3124
2025-09-19 21:08:19 -04:00
emrebengue
99fe884536 colors: add highlight type support for matching lines
This lets users highlight non-matching text in matching lines.

Closes #3024, Closes #3107
2025-09-19 21:08:19 -04:00
Andrew Gallant
126bbeab8c printer: fix handling of has_match for summary printer
Previously, `Quiet` mode in the summary printer always acted like
"print matching paths," except without the printing. This happened even
if we wanted to "print non-matching paths." Since this only afflicted
quiet mode, this had the effect of flipping the exit status when
`--files-without-match --quiet` was used.

Fixes #3108, Ref #3118
2025-09-19 21:08:19 -04:00
Ben Heidemann
859d54270e globset: make GlobSet::new public
For users of globset who already have a `Vec<Glob>` (or similar),
the current API requires them to iterate over their `Vec<Glob>`,
calling `GlobSetBuilder::add` for each `Glob`, thus constructing a new
`Vec<Glob>` internal to the GlobSetBuilder. This makes the consuming
code unnecessarily verbose. (There is unlikely to be any meaningful
performance impact of this, however, since the cost of allocating a new
`Vec` is likely marginal compared to the cost of glob compilation.)

Instead of taking a `&[Glob]`, we accept an iterator of anything that
can be borrowed as a `&Glob`. This required some light refactoring of
the constructor, but nothing onerous.

Closes #3066
2025-09-19 21:08:19 -04:00
David Tolnay
33b44812c0 globset: make GlobSet::empty const
Closes #3098
2025-09-19 21:08:19 -04:00
Lucas Garron
c007d89145 doc: clarify that .git is covered by --hidden and not --ignore-vcs
Fixes #3121, Closes #3122
2025-09-19 21:08:19 -04:00
Vishva Natarajan
60aa9f1727 tests: increase sleep duration for sort file metadata tests on Windows AArch64
Use `cfg!` to assign a 1000ms delay only on Windows Aarch64 targets.

This was done because it has been observed to be necessary on this
platform. The conditional logic is used because 1s is quite long to
wait on every other more sensible platform.

Closes #3071, Closes #3072
2025-09-19 21:08:19 -04:00
Porkepix
56d03a1e2f ignore/types: include missing files for the tf type
Existing matches were too restrictives, so we simplify those to every
type of tfvars file we can encounter.

Closes #3117
2025-09-19 21:08:19 -04:00
Tomek
e166f271df ignore/types: add gleam
[Gleam] is a general-purpose, concurrent, functional high-level
programming language that compiles to Erlang or JavaScript source code.

Closes #3105

[Gleam]: https://gleam.run/
2025-09-19 21:08:19 -04:00
Andrew McNulty
83d94672ae ignore/types: add LLVM to default types
This PR adds llvm to the list of default types, matching files with
extension ll which is used widely for the textual form of LLVM's
Intermediate Representation.

Ref: https://llvm.org/docs/LangRef.html

Closes #3079
2025-09-19 21:08:19 -04:00
James Moberg
6887122e5b ignore/types: add ColdFusion and BoxLang
Closes #3090
2025-09-19 21:08:19 -04:00
Lilian A. Moraru
06210b382a ignore/types: add .env to sh file type
`.env` or "dotenv" is used quite often in cross-compilation/embedded
development environments to load environment variables, define shell
functions or even to execute shell commands. Just like `.zshenv` in
this list, I think `.env` should also be added here.

Closes #3063
2025-09-19 21:08:19 -04:00
kevichi7
00e501b529 build: emit warning if git is missing during build
Closes #3057
2025-09-19 21:08:19 -04:00
Andrew Gallant
2ebd768d40 doc: remove CentOS/RHEL installation instructions
These distros, or their Docker images, appear FUBAR. The UX is so poor
that I cannot verify the correct installation instructions. So I'm
removing them.

Ref https://github.com/BurntSushi/ripgrep/pull/2981#issuecomment-3202063173

Closes #2981, Closes #3124
2025-09-19 21:08:19 -04:00
Andrew Gallant
4df1298127 globset: fix bug where trailing . in file name was incorrectly handled
I'm not sure why I did this, but I think I was trying to imitate the
contract of [`std::path::Path::file_name`]:

> Returns None if the path terminates in `..`.

But the status quo clearly did not implement this. And as a result, if
you have a glob that ends in a `.`, it was instead treated as the empty
string (which only matches the empty string).

We fix this by implementing the semantic from the standard library
correctly.

Fixes #2990

[`std::path::Path::file_name`]: https://doc.rust-lang.org/std/path/struct.Path.html#method.file_name
2025-09-19 21:08:19 -04:00
bbb651
ba23ced817 ignore/types: add scdoc
Ref https://sr.ht/~sircmpwn/scdoc/

Closes #3007
2025-09-19 21:08:19 -04:00
Nadir Ishiguro
28cce895ff doc: fix nixpkgs link
Closes #3006
2025-09-19 21:08:19 -04:00
Andrew Gallant
7339bdf4b5 test: check binary file detection when using memory maps
This resolves a TODO comment I wrote a while back.

Memory maps behave a little differently in terms of detecting binary
data, so the tests have somewhat different results than the tests that
disable memory maps.

Closes #3002
2025-09-19 21:08:19 -04:00
Alexander Weiss
79f5a5a66e globset: add Candidate::from_bytes constructor
This is already technically possible to do on Unix by going through
`OsStr` and `&[u8]` conversions. This just makes it easier to do in all
circumstances and is reasonable to intentionally support.

Closes #2954, Closes #2955
2025-09-19 21:08:19 -04:00
Andrew Gallant
4ab1862dc0 stats: fix case where "bytes searched" could be wrong
Specifically, if the search was instructed to quit early, we might not
have correctly marked the number of bytes consumed.

I don't think this bug occurs when memory maps are used to read the
haystack.

Closes #2944
2025-09-19 21:08:19 -04:00
Thomas Weißschuh
6244e635a1 ignore/types: add Kconfig
Kconfig files are used to represent the configuration database of
Kbuild build system. Kbuild is developed as part of the Linux kernel.
There are numerous other users including OpenWrt and U-Boot.

Ref: https://docs.kernel.org/kbuild/index.html

Closes #2942
2025-09-19 21:08:19 -04:00
ChristopherYoung
5e2d32fe7f printer: slightly simplify code
I'm not sure why it was written with `map` previously. It almost looks
like I was trying to make it deref, but apparently that isn't needed.

Closes #2941
2025-09-19 21:08:19 -04:00
Dmitry Gerasimov
75e17fcabe ignore/types: add *.dtso to devicetree type
`dtso` files became recognized as devicetree a
couple of years ago with the following commit:
363547d219

Closes #2938
2025-09-19 21:08:19 -04:00
Martin Pool
99b7957122 ignore/doc: explain that require_git(false) will ascend above git roots
This should hopefully help avoid confusion about #2812 as encountered
in https://github.com/sourcefrog/cargo-mutants/issues/450.

Closes #2937
2025-09-19 21:08:19 -04:00
Andrew Gallant
ab4665a164 globset: remove __Nonexhaustive work-around
This existed before the `#[non_exhaustive]` attribute was a thing. Since
it was not part of the API of the crate, it is not a semver incompatible
change.
2025-09-19 21:08:19 -04:00
Luke Sandberg
5f5da48307 globset: support nested alternates
For example, `**/{node_modules/**/*/{ts,js},crates/**/*.{rs,toml}`.

I originally didn't add this I think for implementation simplicity, but
it turns out that it really isn't much work to do. There might have also
been some odd behavior in the regex engine for dealing with empty
alternates, but that has all been long fixed.

Closes #3048, Closes #3112
2025-09-19 21:08:19 -04:00
Colin Heffernan
b0c6d4c34a ignore/types: add *.svelte.ts to Svelte file type glob
I was somewhat unsure about adding this, since `.svelte.ts` seems
primarily like a TypeScript file and it could be surprising to show up
in a search for Svelte files. In particular, ripgrep doesn't know how to
only search the Svelte stuff inside of a `.svelte.ts` file, so you could
end up with lots of false positives.

However, I was swayed[1] by the argument that the extension does
actually include `svelte` in it, so maybe this is fine. Please open an
issue if this change ends up being too annoying for most users.

Closes #2874, Closes #2909

[1]: https://github.com/BurntSushi/ripgrep/issues/2874#issuecomment-3126892931
2025-09-19 21:08:19 -04:00
Andrew Gallant
d199058e77 cli: make rg -vf file behave sensibly
Previously, when `file` is empty (literally empty, as in, zero byte),
`rg -f file` and `rg -vf file` would behave identically. This is odd
and also doesn't match how GNU grep behaves. It's also not logically
correct. An empty file means _zero_ patterns which is an empty set. An
empty set matches nothing. Inverting the empty set should result in
matching everything.

This was because of an errant optimization that lets ripgrep quit early
if it can statically detect that no matches are possible.

Moreover, there was *also* a bug in how we constructed the PCRE2 pattern
when there are zero patterns. PCRE2 doesn't have a concept of sets of
patterns (unlike the `regex` crate), so we need to fake it with an empty
character class.

Fixes #1332, Fixes #3001, Closes #3041
2025-09-19 21:08:19 -04:00
Josh Cotton
bb0cbae312 ci: add aarch64 Windows
This also adds a new release artifact for aarch64 Windows.

Closes #2943, Closes #3038
2025-09-19 21:08:19 -04:00
Wilfred Hughes
8fca3cdca6 doc: fix typo in FAQ
Closes #3027
2025-09-19 21:08:19 -04:00
squidfunk
6f39f830cb globset: compact Debug impl for GlobSetBuilder and Glob
Ideally we'd have a compact impl for `GlobSet` too, but that's a lot
more work. In particular, the constituent types don't all store the
original pattern string, so that would need to be added.

Closes #3026
2025-09-19 21:08:19 -04:00
Zach Ahn
e83828fc8c ignore/types: add *.rake extension to list of Ruby file types
This PR adds the .rake extension to the Ruby type. It's a pretty common
file extension in Rails apps—in my experience, the Rakefile is often
pretty empty and only sets some stuff up while most of the code lives
in various .rake files.

See: https://ruby.github.io/rake/doc/rakefile_rdoc.html#label-Multiple+Rake+Files

Closes #2921
2025-09-19 21:08:19 -04:00
f3rn0s
72a1303238 ignore/types: add typst
Closes #2914
2025-09-19 21:08:19 -04:00
Hamir Mahal
861f6d374f style: simplify string formatting
Most of this code was written before this was supported by Rust.

Closes #2912
2025-09-19 21:08:19 -04:00
Thayne McCombs
624bbf7dce globset: add matches_all method
This returns true if all globs in the set match the supplied file.

Fixes #2869, Closes #2900
2025-09-19 21:08:19 -04:00
Aleksey Vasilenko
53279db414 deps: switch to tikv-jemallocator
It is now a recommended crate for jemalloc and it contains an
[important fix for compilation on riscv64gc-unknown-linux-musl][fix],
I bumped into this when I was trying to
[build ripgrep on OpenWrt][openwrt].

Closes #2889

[fix]: https://github.com/tikv/jemallocator/pull/67
[openwrt]: https://github.com/openwrt/packages/pull/24961
2025-09-19 21:08:19 -04:00
Stephan Badragan
292bc54e64 printer: support -r/--replace with --json
This adds a `replacement` field to each submatch object in the JSON
output. In effect, this extends the `-r/--replace` flag so that it works
with `--json`.

This adds a new field instead of replacing the match text (which is how
the standard printer works) for maximum flexibility. This way, consumers
of the JSON output can access the original match text (and always rely
on it corresponding to the original match text) while also getting the
replacement text without needing to do the replacement themselves.

Closes #1872, Closes #2883
2025-09-19 21:08:19 -04:00
Melvin Wang
5be67c1244 ignore/types: include msbuild solution filters
Closes #2871
2025-09-19 21:08:19 -04:00
Lucas Trzesniewski
119407d0a9 printer: use std::path::absolute on Windows
This specifically avoids touching the file system, which can lead to
fairly dramatic speed-ups in large repositories with lots of matches.

Closes #2865
2025-09-19 21:08:19 -04:00
Alex Povel
d869038cf6 ignore: improve multithreading heuristic
This copies the one found in ripgrep.

See also:
71d71d2d98/crates/core/flags/hiargs.rs (L172)

Closes #2854, Closes #2856
2025-09-19 21:08:19 -04:00
Thomas Otto
75970fd16b ignore: don't process command line arguments in reverse order
When searching in parallel with many more arguments than threads, the
first arguments are searched last -- unlike in the -j1 case.

This is unexpected for users who know about the parallel nature of rg
and think they can give the scheduler a hint by positioning larger
input files (L1, L2, ..) before smaller ones (█, ██). Instead, this can
result in sub-optimal thread usage and thus longer runtime (simplified
example with 2 threads):

 T1:  █ ██ █ █ █ █ ██ █ █ █ █ █ ██ ╠═════════════L1════════════╣
 T2:  █ █ ██ █ █ ██ █ █ █ ██ █ █ ╠═════L2════╣

                                       ┏━━━━┳━━━━┳━━━━┳━━━━┓
This is caused by assigning work to    ┃ T1 ┃ T2 ┃ T3 ┃ T4 ┃
 per-thread stacks in a round-robin    ┡━━━━╇━━━━╇━━━━╇━━━━┩
              manner, starting here  → │ L1 │ L2 │ L3 │ L4 │ ↵
                                       ├────├────┼────┼────┤
                                       │ s5 │ s6 │ s7 │ s8 │ ↵
                                       ├────┼────┼────┼────┤
                                       ╷ .. ╷ .. ╷ .. ╷ .. ╷
                                       ├────┼────┼────┼────┤
                                       │ st │ su │ sv │ sw │ ↵
                                       ├────┼────┼────┼────┘
                                       │ sx │ sy │ sz │
                                       └────┴────┴────┘
   and then processing them bottom-up:   ↥    ↥    ↥    ↥

                                       ╷ .. ╷ .. ╷ .. ╷ .. ╷
This patch reverses the input order    ├────┼────┼────┼────┤
so the two reversals cancel each other │ s7 │ s6 │ s5 │ L4 │ ↵
out. Now at least the first N          ├────┼────┼────┼────┘
arguments, N=number-of-threads, are    │ L3 │ L2 │ L1 │
processed before any others (then      └────┴────┴────┘
work-stealing may happen):

 T1:  ╠═════════════L1════════════╣ █ ██ █ █ █ █ █ █ ██
 T2:  ╠═════L2════╣ █ █ ██ █ █ ██ █ █ █ ██ █ █ ██ █ █ █

(With some more shuffling T1 could always be assigned L1 etc., but
that would mostly be for optics).

Closes #2849
2025-09-19 21:08:19 -04:00
Christoph Badura
380809f1e2 ignore/types: add Makefile.*
The *BSD build systems make use of "Makefile.inc" a lot. Make the
"make" type recognize this file by default. And more generally,
`Makefile.*` seems to be a convention, so just generalize it.

Closes #2846
2025-09-19 21:08:19 -04:00
Matt Kulukundis
94ea38da30 ignore: support .jj as well as .git
This makes it so the presence of `.jj` will cause ripgrep to treat it
as a VCS directory, just as if `.git` were present. This is useful for
ripgrep's default behavior when working with jj repositories that don't
have a `.git` but do have `.gitignore`. Namely, ripgrep requires the
presence of a VCS repository in order to respect `.gitignore`.

We don't handle clone-specific exclude rules for jj repositories without
`.git` though. It seems it isn't 100% set yet where we can find
those[1].

Closes #2842

[1]: https://github.com/BurntSushi/ripgrep/pull/2842#discussion_r2020076722
2025-09-19 21:08:19 -04:00
Tor Shepherd
da672f87e8 color: add italic to style attributes
Closes #2841
2025-09-19 21:08:19 -04:00
robert-bryson
edafb612d2 core: add "total" to --stats output
This makes it a little clearer. Apologies to anyone who is regex
matching on this output.

Closes #2797
2025-09-19 21:08:19 -04:00
Stephen Albert-Moore
483628469a ignore/gitignore: skip BOM at start of ignore file
This matches Git's behavior.

Fixes #2177, Closes #2782
2025-09-19 21:08:19 -04:00
Riccardo Attilio Galli
c93fc793a0 searcher: add more tests for replace_bytes
... and add a comment explaining an optimization.

Closes #2729
2025-09-19 21:08:19 -04:00
Keith Smiley
7c004f224e ignore/types: detect WORKSPACE.bzlmod for bazel file type
This file came alongside MODULE.bazel and I should have added it here
previously.

Closes #2726
2025-09-19 21:08:19 -04:00
William Johnson
52115ab633 globset: add opt-in Arbitrary trait implementations
This feature is mandatory when using `Glob` in fuzz testing.

Closes #2720
2025-09-19 21:08:19 -04:00
Andrew Gallant
bfe2def121 tests: add test for filtering hidden files
Note that this isn't a regression test. In particular, this didn't fail
with ripgrep 14.1.1. I couldn't figure out how to turn what the OP gave
me into a failing test.

With #829 fixed, if the OP can provide a better regression test, it
might make sense to re-investigate this.

Closes #2711
2025-09-19 21:08:19 -04:00
ChristopherYoung
14f4957b3d ignore: fix filtering searching subdir or .ignore in parent dir
The previous code deleted too many parts of the path when constructing
the absolute path, resulting in a shortened final path. This patch
creates the correct absolute path by only removing the necessary parts.

Fixes #829, Fixes #2731, Fixes #2747, Fixes #2778, Fixes #2836, Fixes #2933, Fixes #3144
Closes #2933
2025-09-19 21:08:19 -04:00
Jan Verbeek
f722268814 complete/fish: Take RIPGREP_CONFIG_PATH into account
The fish completions now also pay attention to the configuration file
to determine whether to suggest negation options and not just to the
current command line.

This doesn't cover all edge cases. For example the config file is
cached, and so changes may not take effect until the next shell
session. But the cases it doesn't cover are hopefully very rare.

Closes #2708
2025-09-19 21:08:19 -04:00
wang384670111
90a680ab45 impl: switch most atomic ops to Relaxed ordering
These all seem pretty straight-forward. Compared with #2706, I dropped
the changes to the atomic orderings used in `ignore` because I haven't
had time to think through that carefully. But the ops in this PR seem
fine.

Closes #2706
2025-09-19 21:08:19 -04:00
Andrew Gallant
119a58a400 msrv: bump to Rust 1.88
This is to prep for the next release. I don't know if the requirement
will actually be for Rust 1.88, but it is intended to support the latest
version of stable Rust.
2025-07-26 10:41:47 -04:00
Andrew Gallant
3b7fd442a6 deps: update everything
It looks like a new dependency on `getrandom` was added (which brings in
a few more dependencies itself) because of `jobserver`. Thankfully,
`jobserver` is only used when ripgrep's `pcre2` feature is enabled, so
this still keeps the default set of dependencies very small.
2025-07-04 10:12:38 -04:00
wm
cbc598f245 doc: update version number in dpkg installation
PR #3058
2025-05-30 08:30:52 -04:00
dependabot[bot]
6dfaec03e8 deps: bump crossbeam-channel from 0.5.13 to 0.5.15
Bumps [crossbeam-channel](https://github.com/crossbeam-rs/crossbeam) from 0.5.13 to 0.5.15.
- [Release notes](https://github.com/crossbeam-rs/crossbeam/releases)
- [Changelog](https://github.com/crossbeam-rs/crossbeam/blob/master/CHANGELOG.md)
- [Commits](https://github.com/crossbeam-rs/crossbeam/compare/crossbeam-channel-0.5.13...crossbeam-channel-0.5.15)

---
updated-dependencies:
- dependency-name: crossbeam-channel
  dependency-version: 0.5.15
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-04-10 10:55:32 -04:00
Pierre Rouleau
5fbc4fee64 ignore/types: fix Seed7 file extension
PR #3023
2025-04-07 10:53:32 -04:00
Pierre Rouleau
004370bd16 ignore/types: add support for Seed7 files
For more info on the Seed7 programming Language see:

- on Wikipedia: https://en.wikipedia.org/wiki/Seed7
- Seed7 home:   https://seed7.sourceforge.net/
- Seed7 repo:   https://github.com/ThomasMertes/seed7

PR #3022
2025-04-07 08:51:22 -04:00
Andrew Gallant
de4baa1002 globset-0.4.16 2025-02-27 12:46:58 -05:00
Andrew Gallant
163ac157d3 globset: escape { and } in escape
This appears to be an oversight from when `escape` was
implemented in #2061.
2025-02-27 12:46:48 -05:00
Andrew Gallant
e2362d4d51 searcher: add log message noting detected encoding
This helps improve diagnostics. Otherwise it can be easy to miss that
ripgrep is doing transcoding.

Fixes #2979
2025-01-25 14:27:00 -05:00
Kizhyk
d6b59feff8 github: update WASI compilation job
Ref https://blog.rust-lang.org/2024/04/09/updates-to-rusts-wasi-targets.html

PR #2970
2025-01-13 10:16:09 -05:00
Max Coplan
94305125ef zsh: support sourcing zsh completion dynamically
Previously, you needed to save the completion script to a file and
then source it. Now, you can dynamically source completions in zsh by
running

    $ source <(rg --generate complete-zsh)

Before this commit, you would get an error after step 1.
After this commit, it should work as expected.

We also improve the FAQ item for zsh completions.

Fixes #2956
2024-12-31 08:23:13 -05:00
Andrew Gallant
79cbe89deb doc: tweak wording for stdin detection
This makes it slightly more precise to cover weird cases like trying to
pass a directory on stdin.

Closes #2906
2024-09-30 07:38:05 -04:00
Thayne McCombs
bf63fe8f25 regex: add as_match method to Captures trait
Ref https://github.com/rust-lang/regex/issues/1146

PR #2898
2024-09-19 09:30:31 -04:00
Andrew Gallant
8bd5950296 changelog: add next section 2024-09-08 22:32:09 -04:00
Andrew Gallant
6e0539ab91 pkg/brew: update tap 2024-09-08 22:32:02 -04:00
Andrew Gallant
4649aa9700 14.1.1
Some checks failed
release / create-release (push) Has been cancelled
release / build-release (linux, ubuntu-latest, nightly, x86_64-linux-musl-strip, x86_64-unknown-linux-musl) (push) Has been cancelled
release / build-release (macos, macos-latest, nightly, x86_64-apple-darwin) (push) Has been cancelled
release / build-release (stable-aarch64, ubuntu-latest, qemu-aarch64, stable, aarch64-linux-gnu-strip, aarch64-unknown-linux-gnu) (push) Has been cancelled
release / build-release (stable-arm-gnueabihf, ubuntu-latest, qemu-arm, stable, arm-linux-gnueabihf-strip, armv7-unknown-linux-gnueabihf) (push) Has been cancelled
release / build-release (stable-arm-musleabi, ubuntu-latest, qemu-arm, stable, arm-linux-musleabi-strip, armv7-unknown-linux-musleabi) (push) Has been cancelled
release / build-release (stable-arm-musleabihf, ubuntu-latest, qemu-arm, stable, arm-linux-musleabihf-strip, armv7-unknown-linux-musleabihf) (push) Has been cancelled
release / build-release (stable-powerpc64, ubuntu-latest, qemu-ppc64, stable, powerpc64-linux-gnu-strip, powerpc64-unknown-linux-gnu) (push) Has been cancelled
release / build-release (stable-s390x, ubuntu-latest, qemu-s390x, stable, s390x-linux-gnu-strip, s390x-unknown-linux-gnu) (push) Has been cancelled
release / build-release (stable-x86, ubuntu-latest, i386, stable, x86_64-linux-gnu-strip, i686-unknown-linux-gnu) (push) Has been cancelled
release / build-release (win-gnu, windows-latest, nightly-x86_64-gnu, x86_64-pc-windows-gnu) (push) Has been cancelled
release / build-release (win-msvc, windows-latest, nightly, x86_64-pc-windows-msvc) (push) Has been cancelled
release / build-release (win32-msvc, windows-latest, nightly, i686-pc-windows-msvc) (push) Has been cancelled
release / build-release-deb (push) Has been cancelled
2024-09-08 22:15:00 -04:00
Andrew Gallant
c009652e77 changelog: 14.1.1 2024-09-08 22:13:53 -04:00
Andrew Gallant
b9f7a9ba2b deps: bump grep to 0.3.2 2024-09-08 22:11:17 -04:00
Andrew Gallant
a1960877cf grep-0.3.2 2024-09-08 22:11:00 -04:00
Andrew Gallant
bb0925af91 deps: bump grep-printer to 0.2.2 2024-09-08 22:10:49 -04:00
Andrew Gallant
be117dbafa grep-printer-0.2.2 2024-09-08 22:10:29 -04:00
Andrew Gallant
06dc13ad2d deps: bump grep-searcher to 0.1.14 2024-09-08 22:09:55 -04:00
Andrew Gallant
c6c2e69b8f grep-searcher-0.1.14 2024-09-08 22:09:27 -04:00
Andrew Gallant
e67c868ddd deps: bump grep-pcre2 to 0.1.8 2024-09-08 22:09:23 -04:00
Andrew Gallant
d33f2e2f70 grep-pcre2-0.1.8 2024-09-08 22:08:41 -04:00
Andrew Gallant
082edafffa deps: bump grep-regex to 0.1.13 2024-09-08 22:08:22 -04:00
Andrew Gallant
7c8dc332b3 grep-regex-0.1.13 2024-09-08 22:07:52 -04:00
Andrew Gallant
ea961915b5 deps: bump grep-cli to 0.1.11 2024-09-08 22:07:30 -04:00
Andrew Gallant
7943bdfe82 grep-cli-0.1.11 2024-09-08 22:06:59 -04:00
Andrew Gallant
312a7884fc deps: bump ignore to 0.4.23 2024-09-08 22:06:39 -04:00
96 changed files with 5358 additions and 1703 deletions

View File

@@ -53,7 +53,7 @@ jobs:
include:
- build: pinned
os: ubuntu-latest
rust: 1.74.0
rust: 1.85.0
- build: stable
os: ubuntu-latest
rust: stable
@@ -95,15 +95,22 @@ jobs:
os: ubuntu-latest
rust: stable
target: s390x-unknown-linux-gnu
- build: stable-riscv64
os: ubuntu-latest
rust: stable
target: riscv64gc-unknown-linux-gnu
- build: macos
os: macos-latest
rust: nightly
- build: win-msvc
os: windows-2022
os: windows-latest
rust: nightly
- build: win-gnu
os: windows-2022
os: windows-latest
rust: nightly-x86_64-gnu
- build: winaarch64-msvc
os: windows-11-arm
rust: nightly
steps:
- name: Checkout repository
uses: actions/checkout@v4
@@ -177,7 +184,7 @@ jobs:
# 'rg' binary (done in test-complete) with qemu, which is a pain and
# doesn't really gain us much. If shell completion works in one place,
# it probably works everywhere.
if: matrix.target == '' && matrix.os != 'windows-2022'
if: matrix.target == '' && !startsWith(matrix.os, 'windows')
shell: bash
run: ci/test-complete
@@ -187,9 +194,9 @@ jobs:
- name: Print available short flags
shell: bash
run: ${{ env.CARGO }} test --bin rg ${{ env.TARGET_FLAGS }} flags::defs::tests::available_shorts -- --nocapture
run: ${{ env.CARGO }} test --bin rgs ${{ env.TARGET_FLAGS }} flags::defs::tests::available_shorts -- --nocapture
# Setup and compile on the wasm32-wasi target
# Setup and compile on the wasm32-wasip1 target
wasm:
runs-on: ubuntu-latest
steps:
@@ -199,8 +206,8 @@ jobs:
uses: dtolnay/rust-toolchain@master
with:
toolchain: stable
- name: Add wasm32-wasi target
run: rustup target add wasm32-wasi
- name: Add wasm32-wasip1 target
run: rustup target add wasm32-wasip1
- name: Basic build
run: cargo build --verbose
@@ -230,3 +237,28 @@ jobs:
env:
RUSTDOCFLAGS: -D warnings
run: cargo doc --no-deps --document-private-items --workspace
fuzz_testing:
name: Compile Fuzz Test Targets
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Install required packages (Ubuntu)
run: |
sudo apt-get update
sudo apt-get install g++ --yes
- name: Install Rust
uses: dtolnay/rust-toolchain@master
with:
toolchain: stable
- name: Install Fuzzer
run: cargo install cargo-fuzz
working-directory: fuzz
- name: Verify fuzz targets build
run: cargo check
working-directory: fuzz

View File

@@ -98,12 +98,6 @@ jobs:
target: armv7-unknown-linux-musleabi
strip: arm-linux-musleabi-strip
qemu: qemu-arm
- build: stable-powerpc64
os: ubuntu-latest
rust: stable
target: powerpc64-unknown-linux-gnu
strip: powerpc64-linux-gnu-strip
qemu: qemu-ppc64
- build: stable-s390x
os: ubuntu-latest
rust: stable
@@ -114,6 +108,10 @@ jobs:
os: macos-latest
rust: nightly
target: x86_64-apple-darwin
- build: macos
os: macos-latest
rust: nightly
target: aarch64-apple-darwin
- build: win-msvc
os: windows-latest
rust: nightly
@@ -122,6 +120,10 @@ jobs:
os: windows-latest
rust: nightly-x86_64-gnu
target: x86_64-pc-windows-gnu
- build: winaarch64-msvc
os: windows-11-arm
rust: nightly
target: aarch64-pc-windows-msvc
- build: win32-msvc
os: windows-latest
rust: nightly
@@ -174,11 +176,11 @@ jobs:
- name: Build release binary
shell: bash
run: |
${{ env.CARGO }} build --verbose --release --features pcre2 ${{ env.TARGET_FLAGS }}
if [ "${{ matrix.os }}" = "windows-latest" ]; then
bin="target/${{ matrix.target }}/release/rg.exe"
${{ env.CARGO }} build --verbose --profile release-lto --features pcre2 ${{ env.TARGET_FLAGS }}
if [[ "${{ matrix.os }}" == windows-* ]]; then
bin="target/${{ matrix.target }}/release-lto/rgs.exe"
else
bin="target/${{ matrix.target }}/release/rg"
bin="target/${{ matrix.target }}/release-lto/rgs"
fi
echo "BIN=$bin" >> $GITHUB_ENV
@@ -201,14 +203,14 @@ jobs:
shell: bash
run: |
version="${{ needs.create-release.outputs.version }}"
echo "ARCHIVE=ripgrep-$version-${{ matrix.target }}" >> $GITHUB_ENV
echo "ARCHIVE=rgs-$version-${{ matrix.target }}" >> $GITHUB_ENV
- name: Creating directory for archive
shell: bash
run: |
mkdir -p "$ARCHIVE"/{complete,doc}
cp "$BIN" "$ARCHIVE"/
cp {README.md,COPYING,UNLICENSE,LICENSE-MIT} "$ARCHIVE"/
cp {README.md,README-ripgrep.md,COPYING,UNLICENSE,LICENSE-MIT} "$ARCHIVE"/
cp {CHANGELOG.md,FAQ.md,GUIDE.md} "$ARCHIVE"/doc/
- name: Generate man page and completions (no emulation)
@@ -216,11 +218,11 @@ jobs:
shell: bash
run: |
"$BIN" --version
"$BIN" --generate complete-bash > "$ARCHIVE/complete/rg.bash"
"$BIN" --generate complete-fish > "$ARCHIVE/complete/rg.fish"
"$BIN" --generate complete-powershell > "$ARCHIVE/complete/_rg.ps1"
"$BIN" --generate complete-zsh > "$ARCHIVE/complete/_rg"
"$BIN" --generate man > "$ARCHIVE/doc/rg.1"
"$BIN" --generate complete-bash > "$ARCHIVE/complete/rgs.bash"
"$BIN" --generate complete-fish > "$ARCHIVE/complete/rgs.fish"
"$BIN" --generate complete-powershell > "$ARCHIVE/complete/_rgs.ps1"
"$BIN" --generate complete-zsh > "$ARCHIVE/complete/_rgs"
"$BIN" --generate man > "$ARCHIVE/doc/rgs.1"
- name: Generate man page and completions (emulation)
if: matrix.qemu != ''
@@ -234,31 +236,31 @@ jobs:
"$PWD/target:/target:Z" \
"ghcr.io/cross-rs/${{ matrix.target }}:main" \
"${{ matrix.qemu }}" "/$BIN" \
--generate complete-bash > "$ARCHIVE/complete/rg.bash"
--generate complete-bash > "$ARCHIVE/complete/rgs.bash"
docker run --rm -v \
"$PWD/target:/target:Z" \
"ghcr.io/cross-rs/${{ matrix.target }}:main" \
"${{ matrix.qemu }}" "/$BIN" \
--generate complete-fish > "$ARCHIVE/complete/rg.fish"
--generate complete-fish > "$ARCHIVE/complete/rgs.fish"
docker run --rm -v \
"$PWD/target:/target:Z" \
"ghcr.io/cross-rs/${{ matrix.target }}:main" \
"${{ matrix.qemu }}" "/$BIN" \
--generate complete-powershell > "$ARCHIVE/complete/_rg.ps1"
--generate complete-powershell > "$ARCHIVE/complete/_rgs.ps1"
docker run --rm -v \
"$PWD/target:/target:Z" \
"ghcr.io/cross-rs/${{ matrix.target }}:main" \
"${{ matrix.qemu }}" "/$BIN" \
--generate complete-zsh > "$ARCHIVE/complete/_rg"
--generate complete-zsh > "$ARCHIVE/complete/_rgs"
docker run --rm -v \
"$PWD/target:/target:Z" \
"ghcr.io/cross-rs/${{ matrix.target }}:main" \
"${{ matrix.qemu }}" "/$BIN" \
--generate man > "$ARCHIVE/doc/rg.1"
--generate man > "$ARCHIVE/doc/rgs.1"
- name: Build archive (Windows)
shell: bash
if: matrix.os == 'windows-latest'
if: startsWith(matrix.os, 'windows')
run: |
7z a "$ARCHIVE.zip" "$ARCHIVE"
certutil -hashfile "$ARCHIVE.zip" SHA256 > "$ARCHIVE.zip.sha256"
@@ -267,7 +269,7 @@ jobs:
- name: Build archive (Unix)
shell: bash
if: matrix.os != 'windows-latest'
if: ${{ !startsWith(matrix.os, 'windows') }}
run: |
tar czf "$ARCHIVE.tar.gz" "$ARCHIVE"
shasum -a 256 "$ARCHIVE.tar.gz" > "$ARCHIVE.tar.gz.sha256"
@@ -323,7 +325,7 @@ jobs:
shell: bash
run: |
cargo build --target ${{ env.TARGET }}
bin="target/${{ env.TARGET }}/debug/rg"
bin="target/${{ env.TARGET }}/debug/rgs"
echo "BIN=$bin" >> $GITHUB_ENV
- name: Create deployment directory
@@ -336,14 +338,14 @@ jobs:
- name: Generate man page
shell: bash
run: |
"$BIN" --generate man > "$DEPLOY_DIR/rg.1"
"$BIN" --generate man > "$DEPLOY_DIR/rgs.1"
- name: Generate shell completions
shell: bash
run: |
"$BIN" --generate complete-bash > "$DEPLOY_DIR/rg.bash"
"$BIN" --generate complete-fish > "$DEPLOY_DIR/rg.fish"
"$BIN" --generate complete-zsh > "$DEPLOY_DIR/_rg"
"$BIN" --generate complete-bash > "$DEPLOY_DIR/rgs.bash"
"$BIN" --generate complete-fish > "$DEPLOY_DIR/rgs.fish"
"$BIN" --generate complete-zsh > "$DEPLOY_DIR/_rgs"
- name: Build release binary
shell: bash
@@ -351,7 +353,7 @@ jobs:
cargo deb --profile deb --target ${{ env.TARGET }}
version="${{ needs.create-release.outputs.version }}"
echo "DEB_DIR=target/${{ env.TARGET }}/debian" >> $GITHUB_ENV
echo "DEB_NAME=ripgrep_$version-1_amd64.deb" >> $GITHUB_ENV
echo "DEB_NAME=rgs_$version-1_amd64.deb" >> $GITHUB_ENV
- name: Create sha256 sum of deb file
shell: bash

View File

@@ -1,6 +1,147 @@
TBD
===
Unreleased changes. Release notes have not yet been written.
Bug fixes:
* [BUG #3212](https://github.com/BurntSushi/ripgrep/pull/3212):
Don't check for the existence of `.jj` when `--no-ignore` is used.
15.1.0
======
This is a small release that fixes a bug with how ripgrep handles line
buffering. This might manifest as ripgrep printing output later than you
expect or not working correctly with `tail -f` (even if you're using the
`--line-buffered` flag).
Bug fixes:
* [BUG #3194](https://github.com/BurntSushi/ripgrep/issues/3194):
Fix a regression with `--line-buffered` introduced in ripgrep 15.0.0.
Feature enhancements:
* [FEATURE #3192](https://github.com/BurntSushi/ripgrep/pull/3192):
Add hyperlink alias for Cursor.
15.0.0 (2025-10-15)
===================
ripgrep 15 is a new major version release of ripgrep that mostly has bug fixes,
some minor performance improvements and minor new features. Here are some
highlights:
* Several bugs around gitignore matching have been fixed. This includes
a commonly reported bug related to applying gitignore rules from parent
directories.
* A memory usage regression when handling very large gitignore files has been
fixed.
* `rg -vf file`, where `file` is empty, now matches everything.
* The `-r/--replace` flag now works with `--json`.
* A subset of Jujutsu (`jj`) repositories are now treated as if they were git
repositories. That is, ripgrep will respect `jj`'s gitignores.
* Globs can now use nested curly braces.
Platform support:
* `aarch64` for Windows now has release artifacts.
* `powerpc64` no longer has release artifacts generated for it. The CI
release workflow stopped working, and I didn't deem it worth my time to
debug it. If someone wants this and can test it, I'd be happy to add it
back.
* ripgrep binaries are now compiled with full LTO enabled. You may notice
small performance improvements from this and a modest decrease in binary
size.
Performance improvements:
* [PERF #2111](https://github.com/BurntSushi/ripgrep/issues/2111):
Don't resolve helper binaries on Windows when `-z/--search-zip` isn't used.
* [PERF #2865](https://github.com/BurntSushi/ripgrep/pull/2865):
Avoid using path canonicalization on Windows when emitting hyperlinks.
Bug fixes:
* [BUG #829](https://github.com/BurntSushi/ripgrep/issues/829),
[BUG #2731](https://github.com/BurntSushi/ripgrep/issues/2731),
[BUG #2747](https://github.com/BurntSushi/ripgrep/issues/2747),
[BUG #2770](https://github.com/BurntSushi/ripgrep/issues/2770),
[BUG #2778](https://github.com/BurntSushi/ripgrep/issues/2778),
[BUG #2836](https://github.com/BurntSushi/ripgrep/issues/2836),
[BUG #2933](https://github.com/BurntSushi/ripgrep/pull/2933),
[BUG #3067](https://github.com/BurntSushi/ripgrep/pull/3067):
Fix bug related to gitignores from parent directories.
* [BUG #1332](https://github.com/BurntSushi/ripgrep/issues/1332),
[BUG #3001](https://github.com/BurntSushi/ripgrep/issues/3001):
Make `rg -vf file` where `file` is empty match everything.
* [BUG #2177](https://github.com/BurntSushi/ripgrep/issues/2177):
Ignore a UTF-8 BOM marker at the start of `.gitignore` (and similar files).
* [BUG #2750](https://github.com/BurntSushi/ripgrep/issues/2750):
Fix memory usage regression for some truly large gitignore files.
* [BUG #2944](https://github.com/BurntSushi/ripgrep/pull/2944):
Fix a bug where the "bytes searched" in `--stats` output could be incorrect.
* [BUG #2990](https://github.com/BurntSushi/ripgrep/issues/2990):
Fix a bug where ripgrep would mishandle globs that ended with a `.`.
* [BUG #2094](https://github.com/BurntSushi/ripgrep/issues/2094),
[BUG #3076](https://github.com/BurntSushi/ripgrep/issues/3076):
Fix bug with `-m/--max-count` and `-U/--multiline` showing too many matches.
* [BUG #3100](https://github.com/BurntSushi/ripgrep/pull/3100):
Preserve line terminators when using `-r/--replace` flag.
* [BUG #3108](https://github.com/BurntSushi/ripgrep/issues/3108):
Fix a bug where `-q --files-without-match` inverted the exit code.
* [BUG #3131](https://github.com/BurntSushi/ripgrep/issues/3131):
Document inconsistency between `-c/--count` and `--files-with-matches`.
* [BUG #3135](https://github.com/BurntSushi/ripgrep/issues/3135):
Fix rare panic for some classes of large regexes on large haystacks.
* [BUG #3140](https://github.com/BurntSushi/ripgrep/issues/3140):
Ensure hyphens in flag names are escaped in the roff text for the man page.
* [BUG #3155](https://github.com/BurntSushi/ripgrep/issues/3155):
Statically compile PCRE2 into macOS release artifacts on `aarch64`.
* [BUG #3173](https://github.com/BurntSushi/ripgrep/issues/3173):
Fix ancestor ignore filter bug when searching whitelisted hidden files.
* [BUG #3178](https://github.com/BurntSushi/ripgrep/discussions/3178):
Fix bug causing incorrect summary statistics with `--json` flag.
* [BUG #3179](https://github.com/BurntSushi/ripgrep/issues/3179):
Fix gitignore bug when searching absolute paths with global gitignores.
* [BUG #3180](https://github.com/BurntSushi/ripgrep/issues/3180):
Fix a panicking bug when using `-U/--multiline` and `-r/--replace`.
Feature enhancements:
* Many enhancements to the default set of file types available for filtering.
* [FEATURE #1872](https://github.com/BurntSushi/ripgrep/issues/1872):
Make `-r/--replace` work with `--json`.
* [FEATURE #2708](https://github.com/BurntSushi/ripgrep/pull/2708):
Completions for the fish shell take ripgrep's config file into account.
* [FEATURE #2841](https://github.com/BurntSushi/ripgrep/pull/2841):
Add `italic` to the list of available style attributes in `--color`.
* [FEATURE #2842](https://github.com/BurntSushi/ripgrep/pull/2842):
Directories containing `.jj` are now treated as git repositories.
* [FEATURE #2849](https://github.com/BurntSushi/ripgrep/pull/2849):
When using multithreading, schedule files to search in order given on CLI.
* [FEATURE #2943](https://github.com/BurntSushi/ripgrep/issues/2943):
Add `aarch64` release artifacts for Windows.
* [FEATURE #3024](https://github.com/BurntSushi/ripgrep/issues/3024):
Add `highlight` color type, for styling non-matching text in a matching line.
* [FEATURE #3048](https://github.com/BurntSushi/ripgrep/pull/3048):
Globs in ripgrep (and the `globset` crate) now support nested alternates.
* [FEATURE #3096](https://github.com/BurntSushi/ripgrep/pull/3096):
Improve completions for `--hyperlink-format` in bash and fish.
* [FEATURE #3102](https://github.com/BurntSushi/ripgrep/pull/3102):
Improve completions for `--hyperlink-format` in zsh.
14.1.1 (2024-09-08)
===================
Unreleased changes. Release notes have not yet been written.
This is a minor release with a bug fix for a matching bug. In particular, a bug
was found that could cause ripgrep to ignore lines that should match. That is,
false negatives. It is difficult to characterize the specific set of regexes
in which this occurs as it requires multiple different optimization strategies
to collide and produce an incorrect result. But as one reported example, in
ripgrep, the regex `(?i:e.x|ex)` does not match `e-x` when it should. (This
bug is a result of an inner literal optimization performed in the `grep-regex`
crate and not in the `regex` crate.)
Bug fixes:

340
Cargo.lock generated
View File

@@ -1,6 +1,6 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
version = 4
[[package]]
name = "aho-corasick"
@@ -13,15 +13,24 @@ dependencies = [
[[package]]
name = "anyhow"
version = "1.0.87"
version = "1.0.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "10f00e1f6e58a40e807377c75c6a7f97bf9044fab57816f2414e6f5f4499d7b8"
checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61"
[[package]]
name = "arbitrary"
version = "1.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c3d036a3c4ab069c7b410a2ce876bd74808d2d0888a82667669f8e783a898bf1"
dependencies = [
"derive_arbitrary",
]
[[package]]
name = "bstr"
version = "1.10.0"
version = "1.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "40723b8fb387abc38f4f4a37c09073622e41dd12327033091ef8950659e6dc0c"
checksum = "234113d19d0d7d613b40e86fb654acf958910802bcceab913a4f9e7cda03b1a4"
dependencies = [
"memchr",
"regex-automata",
@@ -30,10 +39,11 @@ dependencies = [
[[package]]
name = "cc"
version = "1.1.18"
version = "1.2.41"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b62ac837cdb5cb22e10a256099b4fc502b1dfe560cb282963a974d7abd80e476"
checksum = "ac9fe6cdbb24b6ade63616c0a0688e45bb56732262c158df3c0c4bea4ca47cb7"
dependencies = [
"find-msvc-tools",
"jobserver",
"libc",
"shlex",
@@ -41,24 +51,24 @@ dependencies = [
[[package]]
name = "cfg-if"
version = "1.0.0"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
[[package]]
name = "crossbeam-channel"
version = "0.5.13"
version = "0.5.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "33480d6946193aa8033910124896ca395333cae7e2d1113d1fef6c3272217df2"
checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2"
dependencies = [
"crossbeam-utils",
]
[[package]]
name = "crossbeam-deque"
version = "0.8.5"
version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d"
checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
dependencies = [
"crossbeam-epoch",
"crossbeam-utils",
@@ -75,15 +85,26 @@ dependencies = [
[[package]]
name = "crossbeam-utils"
version = "0.8.20"
version = "0.8.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80"
checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
[[package]]
name = "derive_arbitrary"
version = "1.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e567bd82dcff979e4b03460c307b3cdc9e96fde3d73bed1496d2bc75d9dd62a"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "encoding_rs"
version = "0.8.34"
version = "0.8.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b45de904aa0b010bce2ab45264d0631681847fa7b6f2eaa7dab7619943bc4f59"
checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3"
dependencies = [
"cfg-if",
]
@@ -98,16 +119,35 @@ dependencies = [
]
[[package]]
name = "glob"
version = "0.3.1"
name = "find-msvc-tools"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
checksum = "52051878f80a721bb68ebfbc930e07b65ba72f2da88968ea5c06fd6ca3d3a127"
[[package]]
name = "getrandom"
version = "0.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd"
dependencies = [
"cfg-if",
"libc",
"r-efi",
"wasip2",
]
[[package]]
name = "glob"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280"
[[package]]
name = "globset"
version = "0.4.15"
version = "0.4.18"
dependencies = [
"aho-corasick",
"arbitrary",
"bstr",
"glob",
"log",
@@ -119,7 +159,7 @@ dependencies = [
[[package]]
name = "grep"
version = "0.3.1"
version = "0.4.1"
dependencies = [
"grep-cli",
"grep-matcher",
@@ -133,7 +173,7 @@ dependencies = [
[[package]]
name = "grep-cli"
version = "0.1.10"
version = "0.1.12"
dependencies = [
"bstr",
"globset",
@@ -145,7 +185,7 @@ dependencies = [
[[package]]
name = "grep-matcher"
version = "0.1.7"
version = "0.1.8"
dependencies = [
"memchr",
"regex",
@@ -153,7 +193,7 @@ dependencies = [
[[package]]
name = "grep-pcre2"
version = "0.1.7"
version = "0.1.9"
dependencies = [
"grep-matcher",
"log",
@@ -162,7 +202,7 @@ dependencies = [
[[package]]
name = "grep-printer"
version = "0.2.1"
version = "0.3.1"
dependencies = [
"bstr",
"grep-matcher",
@@ -176,7 +216,7 @@ dependencies = [
[[package]]
name = "grep-regex"
version = "0.1.12"
version = "0.1.14"
dependencies = [
"bstr",
"grep-matcher",
@@ -187,7 +227,7 @@ dependencies = [
[[package]]
name = "grep-searcher"
version = "0.1.13"
version = "0.1.16"
dependencies = [
"bstr",
"encoding_rs",
@@ -202,7 +242,7 @@ dependencies = [
[[package]]
name = "ignore"
version = "0.4.23"
version = "0.4.25"
dependencies = [
"bstr",
"crossbeam-channel",
@@ -218,77 +258,58 @@ dependencies = [
[[package]]
name = "itoa"
version = "1.0.11"
version = "1.0.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b"
[[package]]
name = "jemalloc-sys"
version = "0.5.4+5.3.0-patched"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac6c1946e1cea1788cbfde01c993b52a10e2da07f4bac608228d1bed20bfebf2"
dependencies = [
"cc",
"libc",
]
[[package]]
name = "jemallocator"
version = "0.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a0de374a9f8e63150e6f5e8a60cc14c668226d7a347d8aee1a45766e3c4dd3bc"
dependencies = [
"jemalloc-sys",
"libc",
]
checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
[[package]]
name = "jobserver"
version = "0.1.32"
version = "0.1.34"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "48d1dbcbbeb6a7fec7e059840aa538bd62aaccf972c7346c4d9d2059312853d0"
checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33"
dependencies = [
"getrandom",
"libc",
]
[[package]]
name = "lexopt"
version = "0.3.0"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baff4b617f7df3d896f97fe922b64817f6cd9a756bb81d40f8883f2f66dcb401"
checksum = "9fa0e2a1fcbe2f6be6c42e342259976206b383122fc152e872795338b5a3f3a7"
[[package]]
name = "libc"
version = "0.2.158"
version = "0.2.177"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d8adc4bb1803a324070e64a98ae98f38934d91957a99cfb3a43dcbc01bc56439"
checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976"
[[package]]
name = "log"
version = "0.4.22"
version = "0.4.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24"
checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432"
[[package]]
name = "memchr"
version = "2.7.4"
version = "2.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273"
[[package]]
name = "memmap2"
version = "0.9.4"
version = "0.9.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fe751422e4a8caa417e13c3ea66452215d7d63e19e604f4980461212f3ae1322"
checksum = "744133e4a0e0a658e1374cf3bf8e415c4052a15a111acd372764c55b4177d490"
dependencies = [
"libc",
]
[[package]]
name = "pcre2"
version = "0.2.9"
version = "0.2.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3be55c43ac18044541d58d897e8f4c55157218428953ebd39d86df3ba0286b2b"
checksum = "9e970b0fcce0c7ee6ef662744ff711f21ccd6f11b7cf03cd187a80e89797fc67"
dependencies = [
"libc",
"log",
@@ -297,9 +318,9 @@ dependencies = [
[[package]]
name = "pcre2-sys"
version = "0.2.9"
version = "0.2.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "550f5d18fb1b90c20b87e161852c10cde77858c3900c5059b5ad2a1449f11d8a"
checksum = "18b9073c1a2549bd409bf4a32c94d903bb1a09bf845bc306ae148897fa0760a4"
dependencies = [
"cc",
"libc",
@@ -308,33 +329,39 @@ dependencies = [
[[package]]
name = "pkg-config"
version = "0.3.30"
version = "0.3.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec"
checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
[[package]]
name = "proc-macro2"
version = "1.0.86"
version = "1.0.101"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77"
checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.37"
version = "1.0.41"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af"
checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1"
dependencies = [
"proc-macro2",
]
[[package]]
name = "regex"
version = "1.10.6"
name = "r-efi"
version = "5.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4219d74c6b67a3654a9fbebc4b419e22126d13d2f3c4a07ee0cb61ff79a79619"
checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
[[package]]
name = "regex"
version = "1.12.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4"
dependencies = [
"aho-corasick",
"memchr",
@@ -344,9 +371,9 @@ dependencies = [
[[package]]
name = "regex-automata"
version = "0.4.7"
version = "0.4.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df"
checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c"
dependencies = [
"aho-corasick",
"memchr",
@@ -355,19 +382,18 @@ dependencies = [
[[package]]
name = "regex-syntax"
version = "0.8.4"
version = "0.8.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b"
checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58"
[[package]]
name = "ripgrep"
version = "14.1.0"
version = "15.1.0"
dependencies = [
"anyhow",
"bstr",
"grep",
"ignore",
"jemallocator",
"lexopt",
"log",
"serde",
@@ -375,14 +401,15 @@ dependencies = [
"serde_json",
"termcolor",
"textwrap",
"tikv-jemallocator",
"walkdir",
]
[[package]]
name = "ryu"
version = "1.0.18"
version = "1.0.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f"
checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f"
[[package]]
name = "same-file"
@@ -395,18 +422,27 @@ dependencies = [
[[package]]
name = "serde"
version = "1.0.210"
version = "1.0.228"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c8e3592472072e6e22e0a54d5904d9febf8508f65fb8552499a1abc7d1078c3a"
checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
dependencies = [
"serde_core",
]
[[package]]
name = "serde_core"
version = "1.0.228"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.210"
version = "1.0.228"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "243902eda00fad750862fc144cea25caca5e20d615af0a81bee94ca738f1df1f"
checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
dependencies = [
"proc-macro2",
"quote",
@@ -415,14 +451,15 @@ dependencies = [
[[package]]
name = "serde_json"
version = "1.0.128"
version = "1.0.145"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6ff5456707a1de34e7e37f2a6fd3d3f808c318259cbd01ab6377795054b483d8"
checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c"
dependencies = [
"itoa",
"memchr",
"ryu",
"serde",
"serde_core",
]
[[package]]
@@ -433,9 +470,9 @@ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
[[package]]
name = "syn"
version = "2.0.77"
version = "2.0.107"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9f35bcdf61fd8e7be6caf75f429fdca8beb3ed76584befb503b1569faee373ed"
checksum = "2a26dbd934e5451d21ef060c018dae56fc073894c5a7896f882928a76e6d081b"
dependencies = [
"proc-macro2",
"quote",
@@ -453,15 +490,35 @@ dependencies = [
[[package]]
name = "textwrap"
version = "0.16.1"
version = "0.16.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "23d434d3f8967a09480fb04132ebe0a3e088c173e6d0ee7897abbdf4eab0f8b9"
checksum = "c13547615a44dc9c452a8a534638acdf07120d4b6847c8178705da06306a3057"
[[package]]
name = "tikv-jemalloc-sys"
version = "0.6.1+5.3.0-1-ge13ca993e8ccb9ba9847cc330696e02839f328f7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cd8aa5b2ab86a2cefa406d889139c162cbb230092f7d1d7cbc1716405d852a3b"
dependencies = [
"cc",
"libc",
]
[[package]]
name = "tikv-jemallocator"
version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0359b4327f954e0567e69fb191cf1436617748813819c94b8cd4a431422d053a"
dependencies = [
"libc",
"tikv-jemalloc-sys",
]
[[package]]
name = "unicode-ident"
version = "1.0.12"
version = "1.0.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
checksum = "462eeb75aeb73aea900253ce739c8e18a67423fadf006037cd3ff27e82748a06"
[[package]]
name = "walkdir"
@@ -474,83 +531,40 @@ dependencies = [
]
[[package]]
name = "winapi-util"
version = "0.1.9"
name = "wasip2"
version = "1.0.1+wasi-0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb"
checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7"
dependencies = [
"wit-bindgen",
]
[[package]]
name = "winapi-util"
version = "0.1.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
dependencies = [
"windows-sys",
]
[[package]]
name = "windows-link"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
[[package]]
name = "windows-sys"
version = "0.59.0"
version = "0.61.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc"
dependencies = [
"windows-targets",
"windows-link",
]
[[package]]
name = "windows-targets"
version = "0.52.6"
name = "wit-bindgen"
version = "0.46.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
dependencies = [
"windows_aarch64_gnullvm",
"windows_aarch64_msvc",
"windows_i686_gnu",
"windows_i686_gnullvm",
"windows_i686_msvc",
"windows_x86_64_gnu",
"windows_x86_64_gnullvm",
"windows_x86_64_msvc",
]
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
[[package]]
name = "windows_aarch64_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
[[package]]
name = "windows_i686_gnu"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
[[package]]
name = "windows_i686_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
[[package]]
name = "windows_i686_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
[[package]]
name = "windows_x86_64_gnu"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
[[package]]
name = "windows_x86_64_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59"

View File

@@ -1,7 +1,10 @@
[package]
name = "ripgrep"
version = "14.1.0" #:version
authors = ["Andrew Gallant <jamslam@gmail.com>"]
name = "rgs"
version = "0.1.0" #:version
authors = [
"Andrew Gallant <jamslam@gmail.com>",
"Peisong Xiao <peisong.xiao.xps@gmail.com>",
]
description = """
ripgrep is a line-oriented search tool that recursively searches the current
directory for a regex pattern while respecting gitignore rules. ripgrep has
@@ -20,16 +23,17 @@ exclude = [
"/pkg/brew",
"/benchsuite/",
"/scripts/",
"/crates/fuzz",
]
build = "build.rs"
autotests = false
edition = "2021"
rust-version = "1.72"
edition = "2024"
rust-version = "1.85"
[[bin]]
bench = false
path = "crates/core/main.rs"
name = "rg"
name = "rgs"
[[test]]
name = "integration"
@@ -51,16 +55,16 @@ members = [
[dependencies]
anyhow = "1.0.75"
bstr = "1.7.0"
grep = { version = "0.3.1", path = "crates/grep" }
ignore = { version = "0.4.22", path = "crates/ignore" }
grep = { version = "0.4.1", path = "crates/grep" }
ignore = { version = "0.4.24", path = "crates/ignore" }
lexopt = "0.3.0"
log = "0.4.5"
serde_json = "1.0.23"
termcolor = "1.1.0"
textwrap = { version = "0.16.0", default-features = false }
[target.'cfg(all(target_env = "musl", target_pointer_width = "64"))'.dependencies.jemallocator]
version = "0.5.0"
[target.'cfg(all(target_env = "musl", target_pointer_width = "64"))'.dependencies.tikv-jemallocator]
version = "0.6.0"
[dev-dependencies]
serde = "1.0.77"
@@ -85,36 +89,33 @@ panic = "abort"
incremental = false
codegen-units = 1
# This is the main way to strip binaries in the deb package created by
# 'cargo deb'. For other release binaries, we (currently) call 'strip'
# explicitly in the release process.
[profile.deb]
inherits = "release"
debug = false
inherits = "release-lto"
[package.metadata.deb]
features = ["pcre2"]
section = "utils"
assets = [
["target/release/rg", "usr/bin/", "755"],
["target/release/rgs", "usr/bin/", "755"],
["COPYING", "usr/share/doc/ripgrep/", "644"],
["LICENSE-MIT", "usr/share/doc/ripgrep/", "644"],
["UNLICENSE", "usr/share/doc/ripgrep/", "644"],
["CHANGELOG.md", "usr/share/doc/ripgrep/CHANGELOG", "644"],
["README.md", "usr/share/doc/ripgrep/README", "644"],
["README-ripgrep.md", "usr/share/doc/ripgrep/README-ripgrep", "644"],
["FAQ.md", "usr/share/doc/ripgrep/FAQ", "644"],
# The man page is automatically generated by ripgrep's build process, so
# this file isn't actually committed. Instead, to create a dpkg, either
# create a deployment/deb directory and copy the man page to it, or use the
# 'ci/build-deb' script.
["deployment/deb/rg.1", "usr/share/man/man1/rg.1", "644"],
["deployment/deb/rgs.1", "usr/share/man/man1/rgs.1", "644"],
# Similarly for shell completions.
["deployment/deb/rg.bash", "usr/share/bash-completion/completions/rg", "644"],
["deployment/deb/rg.fish", "usr/share/fish/vendor_completions.d/rg.fish", "644"],
["deployment/deb/_rg", "usr/share/zsh/vendor-completions/", "644"],
["deployment/deb/rgs.bash", "usr/share/bash-completion/completions/rgs", "644"],
["deployment/deb/rgs.fish", "usr/share/fish/vendor_completions.d/rgs.fish", "644"],
["deployment/deb/_rgs", "usr/share/zsh/vendor-completions/", "644"],
]
extended-description = """\
ripgrep (rg) recursively searches your current directory for a regex pattern.
rgs recursively searches your current directory for a regex pattern.
By default, ripgrep will respect your .gitignore and automatically skip hidden
files/directories and binary files.
"""

33
FAQ.md
View File

@@ -94,7 +94,7 @@ Does ripgrep have support for shell auto-completion?
Yes! If you installed ripgrep through a package manager on a Unix system, then
the shell completion files included in the release archive should have been
installed for you automatically. If not, you can generate completes using
installed for you automatically. If not, you can generate completions using
ripgrep's command line interface.
For **bash**:
@@ -113,14 +113,31 @@ $ mkdir -p "$dir"
$ rg --generate complete-fish > "$dir/rg.fish"
```
For **zsh**:
For **zsh**, the recommended approach is:
```
```zsh
$ dir="$HOME/.zsh-complete"
$ mkdir -p "$dir"
$ rg --generate complete-zsh > "$dir/_rg"
```
And then add `$HOME/.zsh-complete` to your `fpath` in, e.g., your
`$HOME/.zshrc` file:
```zsh
fpath=($HOME/.zsh-complete $fpath)
```
Or if you'd prefer to load and generate completions at the same time, you can
add the following to your `$HOME/.zshrc` file:
```zsh
$ source <(rg --generate complete-zsh)
```
Note though that while this approach is easier to setup, is generally slower
than the previous method, and will add more time to loading your shell prompt.
For **PowerShell**, create the completions:
```
@@ -248,8 +265,8 @@ The `--colors` flag is a bit more complicated. The general format is:
to bold the output or not).
* `{value}` is determined by the value of `{attribute}`. If
`{attribute}` is `style`, then `{value}` should be one of `nobold`,
`bold`, `nointense`, `intense`, `nounderline` or `underline`. If
`{attribute}` is `fg` or `bg`, then `{value}` should be a color.
`bold`, `nointense`, `intense`, `nounderline`, `underline`, `noitalic` or
`italic`. If `{attribute}` is `fg` or `bg`, then `{value}` should be a color.
A color is specified by either one of eight of English names, a single 256-bit
number or an RGB triple (with over 16 million possible values, or "true
@@ -268,8 +285,8 @@ As a special case, `--colors '{type}:none'` will clear all colors and styles
associated with `{type}`, which lets you start with a clean slate (instead of
building on top of ripgrep's default color settings).
Here's an example that makes highlights the matches with a nice blue background
with bolded white text:
Here's an example that highlights the matches with a nice blue background with
bolded white text:
```
$ rg somepattern \
@@ -1038,7 +1055,7 @@ How can I donate to ripgrep or its maintainers?
I welcome [sponsorship](https://github.com/sponsors/BurntSushi/).
Or if you'd prefer, donating to a charitably organization that you like would
Or if you'd prefer, donating to a charitable organization that you like would
also be most welcome. My favorites are:
* [The Internet Archive](https://archive.org/donate/)

View File

@@ -2,7 +2,7 @@
This guide is intended to give an elementary description of ripgrep and an
overview of its capabilities. This guide assumes that ripgrep is
[installed](README.md#installation)
[installed](README-ripgrep.md#installation)
and that readers have passing familiarity with using command line tools. This
also assumes a Unix-like system, although most commands are probably easily
translatable to any command line shell environment.
@@ -42,17 +42,17 @@ $ unzip 0.7.1.zip
$ cd ripgrep-0.7.1
$ ls
benchsuite grep tests Cargo.toml LICENSE-MIT
ci ignore wincolor CHANGELOG.md README.md
ci ignore wincolor CHANGELOG.md README-ripgrep.md
complete pkg appveyor.yml compile snapcraft.yaml
doc src build.rs COPYING UNLICENSE
globset termcolor Cargo.lock HomebrewFormula
```
Let's try our first search by looking for all occurrences of the word `fast`
in `README.md`:
in `README-ripgrep.md`:
```
$ rg fast README.md
$ rg fast README-ripgrep.md
75: faster than both. (N.B. It is not, strictly speaking, a "drop-in" replacement
88: color and full Unicode support. Unlike GNU grep, `ripgrep` stays fast while
119:### Is it really faster than everything else?
@@ -64,7 +64,7 @@ $ rg fast README.md
search any files, then re-run ripgrep with the `--debug` flag. One likely cause
of this is that you have a `*` rule in a `$HOME/.gitignore` file.)
So what happened here? ripgrep read the contents of `README.md`, and for each
So what happened here? ripgrep read the contents of `README-ripgrep.md`, and for each
line that contained `fast`, ripgrep printed it to your terminal. ripgrep also
included the line number for each line by default. If your terminal supports
colors, then your output might actually look something like this screenshot:
@@ -79,7 +79,7 @@ what if we wanted to find all lines have a word that contains `fast` followed
by some number of other letters?
```
$ rg 'fast\w+' README.md
$ rg 'fast\w+' README-ripgrep.md
75: faster than both. (N.B. It is not, strictly speaking, a "drop-in" replacement
119:### Is it really faster than everything else?
```
@@ -95,7 +95,7 @@ like `faster` will. `faste` would also match!
Here's a different variation on this same theme:
```
$ rg 'fast\w*' README.md
$ rg 'fast\w*' README-ripgrep.md
75: faster than both. (N.B. It is not, strictly speaking, a "drop-in" replacement
88: color and full Unicode support. Unlike GNU grep, `ripgrep` stays fast while
119:### Is it really faster than everything else?
@@ -444,7 +444,7 @@ text with some other text. This is easiest to explain with an example. Remember
when we searched for the word `fast` in ripgrep's README?
```
$ rg fast README.md
$ rg fast README-ripgrep.md
75: faster than both. (N.B. It is not, strictly speaking, a "drop-in" replacement
88: color and full Unicode support. Unlike GNU grep, `ripgrep` stays fast while
119:### Is it really faster than everything else?
@@ -456,7 +456,7 @@ What if we wanted to *replace* all occurrences of `fast` with `FAST`? That's
easy with ripgrep's `--replace` flag:
```
$ rg fast README.md --replace FAST
$ rg fast README-ripgrep.md --replace FAST
75: FASTer than both. (N.B. It is not, strictly speaking, a "drop-in" replacement
88: color and full Unicode support. Unlike GNU grep, `ripgrep` stays FAST while
119:### Is it really FASTer than everything else?
@@ -467,7 +467,7 @@ $ rg fast README.md --replace FAST
or, more succinctly,
```
$ rg fast README.md -r FAST
$ rg fast README-ripgrep.md -r FAST
[snip]
```
@@ -476,7 +476,7 @@ in the output. If you instead wanted to replace an entire line of text, then
you need to include the entire line in your match. For example:
```
$ rg '^.*fast.*$' README.md -r FAST
$ rg '^.*fast.*$' README-ripgrep.md -r FAST
75:FAST
88:FAST
119:FAST
@@ -488,7 +488,7 @@ Alternatively, you can combine the `--only-matching` (or `-o` for short) with
the `--replace` flag to achieve the same result:
```
$ rg fast README.md --only-matching --replace FAST
$ rg fast README-ripgrep.md --only-matching --replace FAST
75:FAST
88:FAST
119:FAST
@@ -499,7 +499,7 @@ $ rg fast README.md --only-matching --replace FAST
or, more succinctly,
```
$ rg fast README.md -or FAST
$ rg fast README-ripgrep.md -or FAST
[snip]
```
@@ -512,7 +512,7 @@ group" (indicated by parentheses) so that we can reference it later in our
replacement string. For example:
```
$ rg 'fast\s+(\w+)' README.md -r 'fast-$1'
$ rg 'fast\s+(\w+)' README-ripgrep.md -r 'fast-$1'
88: color and full Unicode support. Unlike GNU grep, `ripgrep` stays fast-while
124:Summarizing, `ripgrep` is fast-because:
```
@@ -528,7 +528,7 @@ using the indices. For example, the following command is equivalent to the
above command:
```
$ rg 'fast\s+(?P<word>\w+)' README.md -r 'fast-$word'
$ rg 'fast\s+(?P<word>\w+)' README-ripgrep.md -r 'fast-$word'
88: color and full Unicode support. Unlike GNU grep, `ripgrep` stays fast-while
124:Summarizing, `ripgrep` is fast-because:
```

541
README-ripgrep.md Normal file
View File

@@ -0,0 +1,541 @@
ripgrep (rg)
------------
ripgrep is a line-oriented search tool that recursively searches the current
directory for a regex pattern. By default, ripgrep will respect gitignore rules
and automatically skip hidden files/directories and binary files. (To disable
all automatic filtering by default, use `rg -uuu`.) ripgrep has first class
support on Windows, macOS and Linux, with binary downloads available for [every
release](https://github.com/BurntSushi/ripgrep/releases). ripgrep is similar to
other popular search tools like The Silver Searcher, ack and grep.
[![Build status](https://github.com/BurntSushi/ripgrep/workflows/ci/badge.svg)](https://github.com/BurntSushi/ripgrep/actions)
[![Crates.io](https://img.shields.io/crates/v/ripgrep.svg)](https://crates.io/crates/ripgrep)
[![Packaging status](https://repology.org/badge/tiny-repos/ripgrep.svg)](https://repology.org/project/ripgrep/badges)
Dual-licensed under MIT or the [UNLICENSE](https://unlicense.org).
### CHANGELOG
Please see the [CHANGELOG](CHANGELOG.md) for a release history.
### Documentation quick links
* [Installation](#installation)
* [User Guide](GUIDE.md)
* [Frequently Asked Questions](FAQ.md)
* [Regex syntax](https://docs.rs/regex/1/regex/#syntax)
* [Configuration files](GUIDE.md#configuration-file)
* [Shell completions](FAQ.md#complete)
* [Building](#building)
* [Translations](#translations)
### Screenshot of search results
[![A screenshot of a sample search with ripgrep](https://burntsushi.net/stuff/ripgrep1.png)](https://burntsushi.net/stuff/ripgrep1.png)
### Quick examples comparing tools
This example searches the entire
[Linux kernel source tree](https://github.com/BurntSushi/linux)
(after running `make defconfig && make -j8`) for `[A-Z]+_SUSPEND`, where
all matches must be words. Timings were collected on a system with an Intel
i9-12900K 5.2 GHz.
Please remember that a single benchmark is never enough! See my
[blog post on ripgrep](https://blog.burntsushi.net/ripgrep/)
for a very detailed comparison with more benchmarks and analysis.
| Tool | Command | Line count | Time |
| ---- | ------- | ---------- | ---- |
| ripgrep (Unicode) | `rg -n -w '[A-Z]+_SUSPEND'` | 536 | **0.082s** (1.00x) |
| [hypergrep](https://github.com/p-ranav/hypergrep) | `hgrep -n -w '[A-Z]+_SUSPEND'` | 536 | 0.167s (2.04x) |
| [git grep](https://www.kernel.org/pub/software/scm/git/docs/git-grep.html) | `git grep -P -n -w '[A-Z]+_SUSPEND'` | 536 | 0.273s (3.34x) |
| [The Silver Searcher](https://github.com/ggreer/the_silver_searcher) | `ag -w '[A-Z]+_SUSPEND'` | 534 | 0.443s (5.43x) |
| [ugrep](https://github.com/Genivia/ugrep) | `ugrep -r --ignore-files --no-hidden -I -w '[A-Z]+_SUSPEND'` | 536 | 0.639s (7.82x) |
| [git grep](https://www.kernel.org/pub/software/scm/git/docs/git-grep.html) | `LC_ALL=C git grep -E -n -w '[A-Z]+_SUSPEND'` | 536 | 0.727s (8.91x) |
| [git grep (Unicode)](https://www.kernel.org/pub/software/scm/git/docs/git-grep.html) | `LC_ALL=en_US.UTF-8 git grep -E -n -w '[A-Z]+_SUSPEND'` | 536 | 2.670s (32.70x) |
| [ack](https://github.com/beyondgrep/ack3) | `ack -w '[A-Z]+_SUSPEND'` | 2677 | 2.935s (35.94x) |
Here's another benchmark on the same corpus as above that disregards gitignore
files and searches with a whitelist instead. The corpus is the same as in the
previous benchmark, and the flags passed to each command ensure that they are
doing equivalent work:
| Tool | Command | Line count | Time |
| ---- | ------- | ---------- | ---- |
| ripgrep | `rg -uuu -tc -n -w '[A-Z]+_SUSPEND'` | 447 | **0.063s** (1.00x) |
| [ugrep](https://github.com/Genivia/ugrep) | `ugrep -r -n --include='*.c' --include='*.h' -w '[A-Z]+_SUSPEND'` | 447 | 0.607s (9.62x) |
| [GNU grep](https://www.gnu.org/software/grep/) | `grep -E -r -n --include='*.c' --include='*.h' -w '[A-Z]+_SUSPEND'` | 447 | 0.674s (10.69x) |
Now we'll move to searching on single large file. Here is a straight-up
comparison between ripgrep, ugrep and GNU grep on a file cached in memory
(~13GB, [`OpenSubtitles.raw.en.gz`](http://opus.nlpl.eu/download.php?f=OpenSubtitles/v2018/mono/OpenSubtitles.raw.en.gz), decompressed):
| Tool | Command | Line count | Time |
| ---- | ------- | ---------- | ---- |
| ripgrep (Unicode) | `rg -w 'Sherlock [A-Z]\w+'` | 7882 | **1.042s** (1.00x) |
| [ugrep](https://github.com/Genivia/ugrep) | `ugrep -w 'Sherlock [A-Z]\w+'` | 7882 | 1.339s (1.28x) |
| [GNU grep (Unicode)](https://www.gnu.org/software/grep/) | `LC_ALL=en_US.UTF-8 egrep -w 'Sherlock [A-Z]\w+'` | 7882 | 6.577s (6.31x) |
In the above benchmark, passing the `-n` flag (for showing line numbers)
increases the times to `1.664s` for ripgrep and `9.484s` for GNU grep. ugrep
times are unaffected by the presence or absence of `-n`.
Beware of performance cliffs though:
| Tool | Command | Line count | Time |
| ---- | ------- | ---------- | ---- |
| ripgrep (Unicode) | `rg -w '[A-Z]\w+ Sherlock [A-Z]\w+'` | 485 | **1.053s** (1.00x) |
| [GNU grep (Unicode)](https://www.gnu.org/software/grep/) | `LC_ALL=en_US.UTF-8 grep -E -w '[A-Z]\w+ Sherlock [A-Z]\w+'` | 485 | 6.234s (5.92x) |
| [ugrep](https://github.com/Genivia/ugrep) | `ugrep -w '[A-Z]\w+ Sherlock [A-Z]\w+'` | 485 | 28.973s (27.51x) |
And performance can drop precipitously across the board when searching big
files for patterns without any opportunities for literal optimizations:
| Tool | Command | Line count | Time |
| ---- | ------- | ---------- | ---- |
| ripgrep | `rg '[A-Za-z]{30}'` | 6749 | **15.569s** (1.00x) |
| [ugrep](https://github.com/Genivia/ugrep) | `ugrep -E '[A-Za-z]{30}'` | 6749 | 21.857s (1.40x) |
| [GNU grep](https://www.gnu.org/software/grep/) | `LC_ALL=C grep -E '[A-Za-z]{30}'` | 6749 | 32.409s (2.08x) |
| [GNU grep (Unicode)](https://www.gnu.org/software/grep/) | `LC_ALL=en_US.UTF-8 grep -E '[A-Za-z]{30}'` | 6795 | 8m30s (32.74x) |
Finally, high match counts also tend to both tank performance and smooth
out the differences between tools (because performance is dominated by how
quickly one can handle a match and not the algorithm used to detect the match,
generally speaking):
| Tool | Command | Line count | Time |
| ---- | ------- | ---------- | ---- |
| ripgrep | `rg the` | 83499915 | **6.948s** (1.00x) |
| [ugrep](https://github.com/Genivia/ugrep) | `ugrep the` | 83499915 | 11.721s (1.69x) |
| [GNU grep](https://www.gnu.org/software/grep/) | `LC_ALL=C grep the` | 83499915 | 15.217s (2.19x) |
### Why should I use ripgrep?
* It can replace many use cases served by other search tools
because it contains most of their features and is generally faster. (See
[the FAQ](FAQ.md#posix4ever) for more details on whether ripgrep can truly
replace grep.)
* Like other tools specialized to code search, ripgrep defaults to
[recursive search](GUIDE.md#recursive-search) and does [automatic
filtering](GUIDE.md#automatic-filtering). Namely, ripgrep won't search files
ignored by your `.gitignore`/`.ignore`/`.rgignore` files, it won't search
hidden files and it won't search binary files. Automatic filtering can be
disabled with `rg -uuu`.
* ripgrep can [search specific types of files](GUIDE.md#manual-filtering-file-types).
For example, `rg -tpy foo` limits your search to Python files and `rg -Tjs
foo` excludes JavaScript files from your search. ripgrep can be taught about
new file types with custom matching rules.
* ripgrep supports many features found in `grep`, such as showing the context
of search results, searching multiple patterns, highlighting matches with
color and full Unicode support. Unlike GNU grep, ripgrep stays fast while
supporting Unicode (which is always on).
* ripgrep has optional support for switching its regex engine to use PCRE2.
Among other things, this makes it possible to use look-around and
backreferences in your patterns, which are not supported in ripgrep's default
regex engine. PCRE2 support can be enabled with `-P/--pcre2` (use PCRE2
always) or `--auto-hybrid-regex` (use PCRE2 only if needed). An alternative
syntax is provided via the `--engine (default|pcre2|auto)` option.
* ripgrep has [rudimentary support for replacements](GUIDE.md#replacements),
which permit rewriting output based on what was matched.
* ripgrep supports [searching files in text encodings](GUIDE.md#file-encoding)
other than UTF-8, such as UTF-16, latin-1, GBK, EUC-JP, Shift_JIS and more.
(Some support for automatically detecting UTF-16 is provided. Other text
encodings must be specifically specified with the `-E/--encoding` flag.)
* ripgrep supports searching files compressed in a common format (brotli,
bzip2, gzip, lz4, lzma, xz, or zstandard) with the `-z/--search-zip` flag.
* ripgrep supports
[arbitrary input preprocessing filters](GUIDE.md#preprocessor)
which could be PDF text extraction, less supported decompression, decrypting,
automatic encoding detection and so on.
* ripgrep can be configured via a
[configuration file](GUIDE.md#configuration-file).
In other words, use ripgrep if you like speed, filtering by default, fewer
bugs and Unicode support.
### Why shouldn't I use ripgrep?
Despite initially not wanting to add every feature under the sun to ripgrep,
over time, ripgrep has grown support for most features found in other file
searching tools. This includes searching for results spanning across multiple
lines, and opt-in support for PCRE2, which provides look-around and
backreference support.
At this point, the primary reasons not to use ripgrep probably consist of one
or more of the following:
* You need a portable and ubiquitous tool. While ripgrep works on Windows,
macOS and Linux, it is not ubiquitous and it does not conform to any
standard such as POSIX. The best tool for this job is good old grep.
* There still exists some other feature (or bug) not listed in this README that
you rely on that's in another tool that isn't in ripgrep.
* There is a performance edge case where ripgrep doesn't do well where another
tool does do well. (Please file a bug report!)
* ripgrep isn't possible to install on your machine or isn't available for your
platform. (Please file a bug report!)
### Is it really faster than everything else?
Generally, yes. A large number of benchmarks with detailed analysis for each is
[available on my blog](https://blog.burntsushi.net/ripgrep/).
Summarizing, ripgrep is fast because:
* It is built on top of
[Rust's regex engine](https://github.com/rust-lang/regex).
Rust's regex engine uses finite automata, SIMD and aggressive literal
optimizations to make searching very fast. (PCRE2 support can be opted into
with the `-P/--pcre2` flag.)
* Rust's regex library maintains performance with full Unicode support by
building UTF-8 decoding directly into its deterministic finite automaton
engine.
* It supports searching with either memory maps or by searching incrementally
with an intermediate buffer. The former is better for single files and the
latter is better for large directories. ripgrep chooses the best searching
strategy for you automatically.
* Applies your ignore patterns in `.gitignore` files using a
[`RegexSet`](https://docs.rs/regex/1/regex/struct.RegexSet.html).
That means a single file path can be matched against multiple glob patterns
simultaneously.
* It uses a lock-free parallel recursive directory iterator, courtesy of
[`crossbeam`](https://docs.rs/crossbeam) and
[`ignore`](https://docs.rs/ignore).
### Feature comparison
Andy Lester, author of [ack](https://beyondgrep.com/), has published an
excellent table comparing the features of ack, ag, git-grep, GNU grep and
ripgrep: https://beyondgrep.com/feature-comparison/
Note that ripgrep has grown a few significant new features recently that
are not yet present in Andy's table. This includes, but is not limited to,
configuration files, passthru, support for searching compressed files,
multiline search and opt-in fancy regex support via PCRE2.
### Playground
If you'd like to try ripgrep before installing, there's an unofficial
[playground](https://codapi.org/ripgrep/) and an [interactive
tutorial](https://codapi.org/try/ripgrep/).
If you have any questions about these, please open an issue in the [tutorial
repo](https://github.com/nalgeon/tryxinyminutes).
### Installation
The binary name for ripgrep is `rg`.
**[Archives of precompiled binaries for ripgrep are available for Windows,
macOS and Linux.](https://github.com/BurntSushi/ripgrep/releases)** Linux and
Windows binaries are static executables. Users of platforms not explicitly
mentioned below are advised to download one of these archives.
If you're a **macOS Homebrew** or a **Linuxbrew** user, then you can install
ripgrep from homebrew-core:
```
$ brew install ripgrep
```
If you're a **MacPorts** user, then you can install ripgrep from the
[official ports](https://www.macports.org/ports.php?by=name&substr=ripgrep):
```
$ sudo port install ripgrep
```
If you're a **Windows Chocolatey** user, then you can install ripgrep from the
[official repo](https://chocolatey.org/packages/ripgrep):
```
$ choco install ripgrep
```
If you're a **Windows Scoop** user, then you can install ripgrep from the
[official bucket](https://github.com/ScoopInstaller/Main/blob/master/bucket/ripgrep.json):
```
$ scoop install ripgrep
```
If you're a **Windows Winget** user, then you can install ripgrep from the
[winget-pkgs](https://github.com/microsoft/winget-pkgs/tree/master/manifests/b/BurntSushi/ripgrep)
repository:
```
$ winget install BurntSushi.ripgrep.MSVC
```
If you're an **Arch Linux** user, then you can install ripgrep from the official repos:
```
$ sudo pacman -S ripgrep
```
If you're a **Gentoo** user, you can install ripgrep from the
[official repo](https://packages.gentoo.org/packages/sys-apps/ripgrep):
```
$ sudo emerge sys-apps/ripgrep
```
If you're a **Fedora** user, you can install ripgrep from official
repositories.
```
$ sudo dnf install ripgrep
```
If you're an **openSUSE** user, ripgrep is included in **openSUSE Tumbleweed**
and **openSUSE Leap** since 15.1.
```
$ sudo zypper install ripgrep
```
If you're a **CentOS Stream 10** user, you can install ripgrep from the
[EPEL](https://docs.fedoraproject.org/en-US/epel/getting-started/) repository:
```
$ sudo dnf config-manager --set-enabled crb
$ sudo dnf install https://dl.fedoraproject.org/pub/epel/epel-release-latest-10.noarch.rpm
$ sudo dnf install ripgrep
```
If you're a **Red Hat 10** user, you can install ripgrep from the
[EPEL](https://docs.fedoraproject.org/en-US/epel/getting-started/) repository:
```
$ sudo subscription-manager repos --enable codeready-builder-for-rhel-10-$(arch)-rpms
$ sudo dnf install https://dl.fedoraproject.org/pub/epel/epel-release-latest-10.noarch.rpm
$ sudo dnf install ripgrep
```
If you're a **Rocky Linux 10** user, you can install ripgrep from the
[EPEL](https://docs.fedoraproject.org/en-US/epel/getting-started/) repository:
```
$ sudo dnf install https://dl.fedoraproject.org/pub/epel/epel-release-latest-10.noarch.rpm
$ sudo dnf install ripgrep
```
If you're a **Nix** user, you can install ripgrep from
[nixpkgs](https://github.com/NixOS/nixpkgs/blob/master/pkgs/by-name/ri/ripgrep/package.nix):
```
$ nix-env --install ripgrep
```
If you're a **Flox** user, you can install ripgrep as follows:
```
$ flox install ripgrep
```
If you're a **Guix** user, you can install ripgrep from the official
package collection:
```
$ guix install ripgrep
```
If you're a **Debian** user (or a user of a Debian derivative like **Ubuntu**),
then ripgrep can be installed using a binary `.deb` file provided in each
[ripgrep release](https://github.com/BurntSushi/ripgrep/releases).
```
$ curl -LO https://github.com/BurntSushi/ripgrep/releases/download/14.1.1/ripgrep_14.1.1-1_amd64.deb
$ sudo dpkg -i ripgrep_14.1.1-1_amd64.deb
```
If you run Debian stable, ripgrep is [officially maintained by
Debian](https://tracker.debian.org/pkg/rust-ripgrep), although its version may
be older than the `deb` package available in the previous step.
```
$ sudo apt-get install ripgrep
```
If you're an **Ubuntu Cosmic (18.10)** (or newer) user, ripgrep is
[available](https://launchpad.net/ubuntu/+source/rust-ripgrep) using the same
packaging as Debian:
```
$ sudo apt-get install ripgrep
```
(N.B. Various snaps for ripgrep on Ubuntu are also available, but none of them
seem to work right and generate a number of very strange bug reports that I
don't know how to fix and don't have the time to fix. Therefore, it is no
longer a recommended installation option.)
If you're an **ALT** user, you can install ripgrep from the
[official repo](https://packages.altlinux.org/en/search?name=ripgrep):
```
$ sudo apt-get install ripgrep
```
If you're a **FreeBSD** user, then you can install ripgrep from the
[official ports](https://www.freshports.org/textproc/ripgrep/):
```
$ sudo pkg install ripgrep
```
If you're an **OpenBSD** user, then you can install ripgrep from the
[official ports](https://openports.se/textproc/ripgrep):
```
$ doas pkg_add ripgrep
```
If you're a **NetBSD** user, then you can install ripgrep from
[pkgsrc](https://pkgsrc.se/textproc/ripgrep):
```
$ sudo pkgin install ripgrep
```
If you're a **Haiku x86_64** user, then you can install ripgrep from the
[official ports](https://github.com/haikuports/haikuports/tree/master/sys-apps/ripgrep):
```
$ sudo pkgman install ripgrep
```
If you're a **Haiku x86_gcc2** user, then you can install ripgrep from the
same port as Haiku x86_64 using the x86 secondary architecture build:
```
$ sudo pkgman install ripgrep_x86
```
If you're a **Void Linux** user, then you can install ripgrep from the
[official repository](https://voidlinux.org/packages/?arch=x86_64&q=ripgrep):
```
$ sudo xbps-install -Syv ripgrep
```
If you're a **Rust programmer**, ripgrep can be installed with `cargo`.
* Note that the minimum supported version of Rust for ripgrep is **1.85.0**,
although ripgrep may work with older versions.
* Note that the binary may be bigger than expected because it contains debug
symbols. This is intentional. To remove debug symbols and therefore reduce
the file size, run `strip` on the binary.
```
$ cargo install ripgrep
```
Alternatively, one can use [`cargo
binstall`](https://github.com/cargo-bins/cargo-binstall) to install a ripgrep
binary directly from GitHub:
```
$ cargo binstall ripgrep
```
### Building
ripgrep is written in Rust, so you'll need to grab a
[Rust installation](https://www.rust-lang.org/) in order to compile it.
ripgrep compiles with Rust 1.85.0 (stable) or newer. In general, ripgrep tracks
the latest stable release of the Rust compiler.
To build ripgrep:
```
$ git clone https://github.com/BurntSushi/ripgrep
$ cd ripgrep
$ cargo build --release
$ ./target/release/rg --version
0.1.3
```
**NOTE:** In the past, ripgrep supported a `simd-accel` Cargo feature when
using a Rust nightly compiler. This only benefited UTF-16 transcoding.
Since it required unstable features, this build mode was prone to breakage.
Because of that, support for it has been removed. If you want SIMD
optimizations for UTF-16 transcoding, then you'll have to petition the
[`encoding_rs`](https://github.com/hsivonen/encoding_rs) project to use stable
APIs.
Finally, optional PCRE2 support can be built with ripgrep by enabling the
`pcre2` feature:
```
$ cargo build --release --features 'pcre2'
```
Enabling the PCRE2 feature works with a stable Rust compiler and will
attempt to automatically find and link with your system's PCRE2 library via
`pkg-config`. If one doesn't exist, then ripgrep will build PCRE2 from source
using your system's C compiler and then statically link it into the final
executable. Static linking can be forced even when there is an available PCRE2
system library by either building ripgrep with the MUSL target or by setting
`PCRE2_SYS_STATIC=1`.
ripgrep can be built with the MUSL target on Linux by first installing the MUSL
library on your system (consult your friendly neighborhood package manager).
Then you just need to add MUSL support to your Rust toolchain and rebuild
ripgrep, which yields a fully static executable:
```
$ rustup target add x86_64-unknown-linux-musl
$ cargo build --release --target x86_64-unknown-linux-musl
```
Applying the `--features` flag from above works as expected. If you want to
build a static executable with MUSL and with PCRE2, then you will need to have
`musl-gcc` installed, which might be in a separate package from the actual
MUSL library, depending on your Linux distribution.
### Running tests
ripgrep is relatively well-tested, including both unit tests and integration
tests. To run the full test suite, use:
```
$ cargo test --all
```
from the repository root.
### Related tools
* [delta](https://github.com/dandavison/delta) is a syntax highlighting
pager that supports the `rg --json` output format. So all you need to do to
make it work is `rg --json pattern | delta`. See [delta's manual section on
grep](https://dandavison.github.io/delta/grep.html) for more details.
### Vulnerability reporting
For reporting a security vulnerability, please
[contact Andrew Gallant](https://blog.burntsushi.net/about/).
The contact page has my email address and PGP public key if you wish to send an
encrypted message.
### Translations
The following is a list of known translations of ripgrep's documentation. These
are unofficially maintained and may not be up to date.
* [Chinese](https://github.com/chinanf-boy/ripgrep-zh#%E6%9B%B4%E6%96%B0-)
* [Spanish](https://github.com/UltiRequiem/traducciones/tree/master/ripgrep)

540
README.md
View File

@@ -1,524 +1,42 @@
ripgrep (rg)
------------
ripgrep is a line-oriented search tool that recursively searches the current
directory for a regex pattern. By default, ripgrep will respect gitignore rules
and automatically skip hidden files/directories and binary files. (To disable
all automatic filtering by default, use `rg -uuu`.) ripgrep has first class
support on Windows, macOS and Linux, with binary downloads available for [every
release](https://github.com/BurntSushi/ripgrep/releases). ripgrep is similar to
other popular search tools like The Silver Searcher, ack and grep.
# rgs
[![Build status](https://github.com/BurntSushi/ripgrep/workflows/ci/badge.svg)](https://github.com/BurntSushi/ripgrep/actions)
[![Crates.io](https://img.shields.io/crates/v/ripgrep.svg)](https://crates.io/crates/ripgrep)
[![Packaging status](https://repology.org/badge/tiny-repos/ripgrep.svg)](https://repology.org/project/ripgrep/badges)
This repository is a fork of ripgrep with additional features. The original
ripgrep documentation is in README-ripgrep.md:
Dual-licensed under MIT or the [UNLICENSE](https://unlicense.org).
- README-ripgrep.md
## Additional features in this fork
### CHANGELOG
### Multiline windowing
Please see the [CHANGELOG](CHANGELOG.md) for a release history.
- `--multiline-window=N` (short: `-W N`) limits multiline matches to a sliding
window of N lines while still using multiline matching semantics.
- `--multiline-window` implicitly enables `--multiline` and cannot be used with
`--no-multiline`.
### Documentation quick links
### Per-file match indexing
* [Installation](#installation)
* [User Guide](GUIDE.md)
* [Frequently Asked Questions](FAQ.md)
* [Regex syntax](https://docs.rs/regex/1/regex/#syntax)
* [Configuration files](GUIDE.md#configuration-file)
* [Shell completions](FAQ.md#complete)
* [Building](#building)
* [Translations](#translations)
- `--in-file-index` / `--no-in-file-index` control indexing of matches within a
file to disambiguate overlapping multiline results.
- When enabled, output is formatted as `filename[index]:line:`.
- When searching a single file, the output is formatted as `[index]:line:` (no
filename).
### Squashed output
### Screenshot of search results
- `--squash` collapses contiguous Unicode whitespace (including newlines) into a
single ASCII space in output.
- `--squash-nl-only` collapses newlines into spaces while preserving other
whitespace.
- When multiple lines are squashed into one, line numbers are printed as
`start-end:`.
[![A screenshot of a sample search with ripgrep](https://burntsushi.net/stuff/ripgrep1.png)](https://burntsushi.net/stuff/ripgrep1.png)
### Binary name
- The target binary name is `rgs` (not `rg`).
### Quick examples comparing tools
## Acknowledgements
This example searches the entire
[Linux kernel source tree](https://github.com/BurntSushi/linux)
(after running `make defconfig && make -j8`) for `[A-Z]+_SUSPEND`, where
all matches must be words. Timings were collected on a system with an Intel
i9-12900K 5.2 GHz.
Please remember that a single benchmark is never enough! See my
[blog post on ripgrep](https://blog.burntsushi.net/ripgrep/)
for a very detailed comparison with more benchmarks and analysis.
| Tool | Command | Line count | Time |
| ---- | ------- | ---------- | ---- |
| ripgrep (Unicode) | `rg -n -w '[A-Z]+_SUSPEND'` | 536 | **0.082s** (1.00x) |
| [hypergrep](https://github.com/p-ranav/hypergrep) | `hgrep -n -w '[A-Z]+_SUSPEND'` | 536 | 0.167s (2.04x) |
| [git grep](https://www.kernel.org/pub/software/scm/git/docs/git-grep.html) | `git grep -P -n -w '[A-Z]+_SUSPEND'` | 536 | 0.273s (3.34x) |
| [The Silver Searcher](https://github.com/ggreer/the_silver_searcher) | `ag -w '[A-Z]+_SUSPEND'` | 534 | 0.443s (5.43x) |
| [ugrep](https://github.com/Genivia/ugrep) | `ugrep -r --ignore-files --no-hidden -I -w '[A-Z]+_SUSPEND'` | 536 | 0.639s (7.82x) |
| [git grep](https://www.kernel.org/pub/software/scm/git/docs/git-grep.html) | `LC_ALL=C git grep -E -n -w '[A-Z]+_SUSPEND'` | 536 | 0.727s (8.91x) |
| [git grep (Unicode)](https://www.kernel.org/pub/software/scm/git/docs/git-grep.html) | `LC_ALL=en_US.UTF-8 git grep -E -n -w '[A-Z]+_SUSPEND'` | 536 | 2.670s (32.70x) |
| [ack](https://github.com/beyondgrep/ack3) | `ack -w '[A-Z]+_SUSPEND'` | 2677 | 2.935s (35.94x) |
Here's another benchmark on the same corpus as above that disregards gitignore
files and searches with a whitelist instead. The corpus is the same as in the
previous benchmark, and the flags passed to each command ensure that they are
doing equivalent work:
| Tool | Command | Line count | Time |
| ---- | ------- | ---------- | ---- |
| ripgrep | `rg -uuu -tc -n -w '[A-Z]+_SUSPEND'` | 447 | **0.063s** (1.00x) |
| [ugrep](https://github.com/Genivia/ugrep) | `ugrep -r -n --include='*.c' --include='*.h' -w '[A-Z]+_SUSPEND'` | 447 | 0.607s (9.62x) |
| [GNU grep](https://www.gnu.org/software/grep/) | `grep -E -r -n --include='*.c' --include='*.h' -w '[A-Z]+_SUSPEND'` | 447 | 0.674s (10.69x) |
Now we'll move to searching on single large file. Here is a straight-up
comparison between ripgrep, ugrep and GNU grep on a file cached in memory
(~13GB, [`OpenSubtitles.raw.en.gz`](http://opus.nlpl.eu/download.php?f=OpenSubtitles/v2018/mono/OpenSubtitles.raw.en.gz), decompressed):
| Tool | Command | Line count | Time |
| ---- | ------- | ---------- | ---- |
| ripgrep (Unicode) | `rg -w 'Sherlock [A-Z]\w+'` | 7882 | **1.042s** (1.00x) |
| [ugrep](https://github.com/Genivia/ugrep) | `ugrep -w 'Sherlock [A-Z]\w+'` | 7882 | 1.339s (1.28x) |
| [GNU grep (Unicode)](https://www.gnu.org/software/grep/) | `LC_ALL=en_US.UTF-8 egrep -w 'Sherlock [A-Z]\w+'` | 7882 | 6.577s (6.31x) |
In the above benchmark, passing the `-n` flag (for showing line numbers)
increases the times to `1.664s` for ripgrep and `9.484s` for GNU grep. ugrep
times are unaffected by the presence or absence of `-n`.
Beware of performance cliffs though:
| Tool | Command | Line count | Time |
| ---- | ------- | ---------- | ---- |
| ripgrep (Unicode) | `rg -w '[A-Z]\w+ Sherlock [A-Z]\w+'` | 485 | **1.053s** (1.00x) |
| [GNU grep (Unicode)](https://www.gnu.org/software/grep/) | `LC_ALL=en_US.UTF-8 grep -E -w '[A-Z]\w+ Sherlock [A-Z]\w+'` | 485 | 6.234s (5.92x) |
| [ugrep](https://github.com/Genivia/ugrep) | `ugrep -w '[A-Z]\w+ Sherlock [A-Z]\w+'` | 485 | 28.973s (27.51x) |
And performance can drop precipitously across the board when searching big
files for patterns without any opportunities for literal optimizations:
| Tool | Command | Line count | Time |
| ---- | ------- | ---------- | ---- |
| ripgrep | `rg '[A-Za-z]{30}'` | 6749 | **15.569s** (1.00x) |
| [ugrep](https://github.com/Genivia/ugrep) | `ugrep -E '[A-Za-z]{30}'` | 6749 | 21.857s (1.40x) |
| [GNU grep](https://www.gnu.org/software/grep/) | `LC_ALL=C grep -E '[A-Za-z]{30}'` | 6749 | 32.409s (2.08x) |
| [GNU grep (Unicode)](https://www.gnu.org/software/grep/) | `LC_ALL=en_US.UTF-8 grep -E '[A-Za-z]{30}'` | 6795 | 8m30s (32.74x) |
Finally, high match counts also tend to both tank performance and smooth
out the differences between tools (because performance is dominated by how
quickly one can handle a match and not the algorithm used to detect the match,
generally speaking):
| Tool | Command | Line count | Time |
| ---- | ------- | ---------- | ---- |
| ripgrep | `rg the` | 83499915 | **6.948s** (1.00x) |
| [ugrep](https://github.com/Genivia/ugrep) | `ugrep the` | 83499915 | 11.721s (1.69x) |
| [GNU grep](https://www.gnu.org/software/grep/) | `LC_ALL=C grep the` | 83499915 | 15.217s (2.19x) |
### Why should I use ripgrep?
* It can replace many use cases served by other search tools
because it contains most of their features and is generally faster. (See
[the FAQ](FAQ.md#posix4ever) for more details on whether ripgrep can truly
replace grep.)
* Like other tools specialized to code search, ripgrep defaults to
[recursive search](GUIDE.md#recursive-search) and does [automatic
filtering](GUIDE.md#automatic-filtering). Namely, ripgrep won't search files
ignored by your `.gitignore`/`.ignore`/`.rgignore` files, it won't search
hidden files and it won't search binary files. Automatic filtering can be
disabled with `rg -uuu`.
* ripgrep can [search specific types of files](GUIDE.md#manual-filtering-file-types).
For example, `rg -tpy foo` limits your search to Python files and `rg -Tjs
foo` excludes JavaScript files from your search. ripgrep can be taught about
new file types with custom matching rules.
* ripgrep supports many features found in `grep`, such as showing the context
of search results, searching multiple patterns, highlighting matches with
color and full Unicode support. Unlike GNU grep, ripgrep stays fast while
supporting Unicode (which is always on).
* ripgrep has optional support for switching its regex engine to use PCRE2.
Among other things, this makes it possible to use look-around and
backreferences in your patterns, which are not supported in ripgrep's default
regex engine. PCRE2 support can be enabled with `-P/--pcre2` (use PCRE2
always) or `--auto-hybrid-regex` (use PCRE2 only if needed). An alternative
syntax is provided via the `--engine (default|pcre2|auto)` option.
* ripgrep has [rudimentary support for replacements](GUIDE.md#replacements),
which permit rewriting output based on what was matched.
* ripgrep supports [searching files in text encodings](GUIDE.md#file-encoding)
other than UTF-8, such as UTF-16, latin-1, GBK, EUC-JP, Shift_JIS and more.
(Some support for automatically detecting UTF-16 is provided. Other text
encodings must be specifically specified with the `-E/--encoding` flag.)
* ripgrep supports searching files compressed in a common format (brotli,
bzip2, gzip, lz4, lzma, xz, or zstandard) with the `-z/--search-zip` flag.
* ripgrep supports
[arbitrary input preprocessing filters](GUIDE.md#preprocessor)
which could be PDF text extraction, less supported decompression, decrypting,
automatic encoding detection and so on.
* ripgrep can be configured via a
[configuration file](GUIDE.md#configuration-file).
In other words, use ripgrep if you like speed, filtering by default, fewer
bugs and Unicode support.
### Why shouldn't I use ripgrep?
Despite initially not wanting to add every feature under the sun to ripgrep,
over time, ripgrep has grown support for most features found in other file
searching tools. This includes searching for results spanning across multiple
lines, and opt-in support for PCRE2, which provides look-around and
backreference support.
At this point, the primary reasons not to use ripgrep probably consist of one
or more of the following:
* You need a portable and ubiquitous tool. While ripgrep works on Windows,
macOS and Linux, it is not ubiquitous and it does not conform to any
standard such as POSIX. The best tool for this job is good old grep.
* There still exists some other feature (or bug) not listed in this README that
you rely on that's in another tool that isn't in ripgrep.
* There is a performance edge case where ripgrep doesn't do well where another
tool does do well. (Please file a bug report!)
* ripgrep isn't possible to install on your machine or isn't available for your
platform. (Please file a bug report!)
### Is it really faster than everything else?
Generally, yes. A large number of benchmarks with detailed analysis for each is
[available on my blog](https://blog.burntsushi.net/ripgrep/).
Summarizing, ripgrep is fast because:
* It is built on top of
[Rust's regex engine](https://github.com/rust-lang/regex).
Rust's regex engine uses finite automata, SIMD and aggressive literal
optimizations to make searching very fast. (PCRE2 support can be opted into
with the `-P/--pcre2` flag.)
* Rust's regex library maintains performance with full Unicode support by
building UTF-8 decoding directly into its deterministic finite automaton
engine.
* It supports searching with either memory maps or by searching incrementally
with an intermediate buffer. The former is better for single files and the
latter is better for large directories. ripgrep chooses the best searching
strategy for you automatically.
* Applies your ignore patterns in `.gitignore` files using a
[`RegexSet`](https://docs.rs/regex/1/regex/struct.RegexSet.html).
That means a single file path can be matched against multiple glob patterns
simultaneously.
* It uses a lock-free parallel recursive directory iterator, courtesy of
[`crossbeam`](https://docs.rs/crossbeam) and
[`ignore`](https://docs.rs/ignore).
### Feature comparison
Andy Lester, author of [ack](https://beyondgrep.com/), has published an
excellent table comparing the features of ack, ag, git-grep, GNU grep and
ripgrep: https://beyondgrep.com/feature-comparison/
Note that ripgrep has grown a few significant new features recently that
are not yet present in Andy's table. This includes, but is not limited to,
configuration files, passthru, support for searching compressed files,
multiline search and opt-in fancy regex support via PCRE2.
### Playground
If you'd like to try ripgrep before installing, there's an unofficial
[playground](https://codapi.org/ripgrep/) and an [interactive
tutorial](https://codapi.org/try/ripgrep/).
If you have any questions about these, please open an issue in the [tutorial
repo](https://github.com/nalgeon/tryxinyminutes).
### Installation
The binary name for ripgrep is `rg`.
**[Archives of precompiled binaries for ripgrep are available for Windows,
macOS and Linux.](https://github.com/BurntSushi/ripgrep/releases)** Linux and
Windows binaries are static executables. Users of platforms not explicitly
mentioned below are advised to download one of these archives.
If you're a **macOS Homebrew** or a **Linuxbrew** user, then you can install
ripgrep from homebrew-core:
```
$ brew install ripgrep
```
If you're a **MacPorts** user, then you can install ripgrep from the
[official ports](https://www.macports.org/ports.php?by=name&substr=ripgrep):
```
$ sudo port install ripgrep
```
If you're a **Windows Chocolatey** user, then you can install ripgrep from the
[official repo](https://chocolatey.org/packages/ripgrep):
```
$ choco install ripgrep
```
If you're a **Windows Scoop** user, then you can install ripgrep from the
[official bucket](https://github.com/ScoopInstaller/Main/blob/master/bucket/ripgrep.json):
```
$ scoop install ripgrep
```
If you're a **Windows Winget** user, then you can install ripgrep from the
[winget-pkgs](https://github.com/microsoft/winget-pkgs/tree/master/manifests/b/BurntSushi/ripgrep)
repository:
```
$ winget install BurntSushi.ripgrep.MSVC
```
If you're an **Arch Linux** user, then you can install ripgrep from the official repos:
```
$ sudo pacman -S ripgrep
```
If you're a **Gentoo** user, you can install ripgrep from the
[official repo](https://packages.gentoo.org/packages/sys-apps/ripgrep):
```
$ sudo emerge sys-apps/ripgrep
```
If you're a **Fedora** user, you can install ripgrep from official
repositories.
```
$ sudo dnf install ripgrep
```
If you're an **openSUSE** user, ripgrep is included in **openSUSE Tumbleweed**
and **openSUSE Leap** since 15.1.
```
$ sudo zypper install ripgrep
```
If you're a **RHEL/CentOS 7/8** user, you can install ripgrep from
[copr](https://copr.fedorainfracloud.org/coprs/carlwgeorge/ripgrep/):
```
$ sudo yum install -y yum-utils
$ sudo yum-config-manager --add-repo=https://copr.fedorainfracloud.org/coprs/carlwgeorge/ripgrep/repo/epel-7/carlwgeorge-ripgrep-epel-7.repo
$ sudo yum install ripgrep
```
If you're a **Nix** user, you can install ripgrep from
[nixpkgs](https://github.com/NixOS/nixpkgs/blob/master/pkgs/tools/text/ripgrep/default.nix):
```
$ nix-env --install ripgrep
```
If you're a **Flox** user, you can install ripgrep as follows:
```
$ flox install ripgrep
```
If you're a **Guix** user, you can install ripgrep from the official
package collection:
```
$ guix install ripgrep
```
If you're a **Debian** user (or a user of a Debian derivative like **Ubuntu**),
then ripgrep can be installed using a binary `.deb` file provided in each
[ripgrep release](https://github.com/BurntSushi/ripgrep/releases).
```
$ curl -LO https://github.com/BurntSushi/ripgrep/releases/download/14.1.0/ripgrep_14.1.0-1_amd64.deb
$ sudo dpkg -i ripgrep_14.1.0-1_amd64.deb
```
If you run Debian stable, ripgrep is [officially maintained by
Debian](https://tracker.debian.org/pkg/rust-ripgrep), although its version may
be older than the `deb` package available in the previous step.
```
$ sudo apt-get install ripgrep
```
If you're an **Ubuntu Cosmic (18.10)** (or newer) user, ripgrep is
[available](https://launchpad.net/ubuntu/+source/rust-ripgrep) using the same
packaging as Debian:
```
$ sudo apt-get install ripgrep
```
(N.B. Various snaps for ripgrep on Ubuntu are also available, but none of them
seem to work right and generate a number of very strange bug reports that I
don't know how to fix and don't have the time to fix. Therefore, it is no
longer a recommended installation option.)
If you're an **ALT** user, you can install ripgrep from the
[official repo](https://packages.altlinux.org/en/search?name=ripgrep):
```
$ sudo apt-get install ripgrep
```
If you're a **FreeBSD** user, then you can install ripgrep from the
[official ports](https://www.freshports.org/textproc/ripgrep/):
```
$ sudo pkg install ripgrep
```
If you're an **OpenBSD** user, then you can install ripgrep from the
[official ports](https://openports.se/textproc/ripgrep):
```
$ doas pkg_add ripgrep
```
If you're a **NetBSD** user, then you can install ripgrep from
[pkgsrc](https://pkgsrc.se/textproc/ripgrep):
```
$ sudo pkgin install ripgrep
```
If you're a **Haiku x86_64** user, then you can install ripgrep from the
[official ports](https://github.com/haikuports/haikuports/tree/master/sys-apps/ripgrep):
```
$ sudo pkgman install ripgrep
```
If you're a **Haiku x86_gcc2** user, then you can install ripgrep from the
same port as Haiku x86_64 using the x86 secondary architecture build:
```
$ sudo pkgman install ripgrep_x86
```
If you're a **Void Linux** user, then you can install ripgrep from the
[official repository](https://voidlinux.org/packages/?arch=x86_64&q=ripgrep):
```
$ sudo xbps-install -Syv ripgrep
```
If you're a **Rust programmer**, ripgrep can be installed with `cargo`.
* Note that the minimum supported version of Rust for ripgrep is **1.72.0**,
although ripgrep may work with older versions.
* Note that the binary may be bigger than expected because it contains debug
symbols. This is intentional. To remove debug symbols and therefore reduce
the file size, run `strip` on the binary.
```
$ cargo install ripgrep
```
Alternatively, one can use [`cargo
binstall`](https://github.com/cargo-bins/cargo-binstall) to install a ripgrep
binary directly from GitHub:
```
$ cargo binstall ripgrep
```
### Building
ripgrep is written in Rust, so you'll need to grab a
[Rust installation](https://www.rust-lang.org/) in order to compile it.
ripgrep compiles with Rust 1.72.0 (stable) or newer. In general, ripgrep tracks
the latest stable release of the Rust compiler.
To build ripgrep:
```
$ git clone https://github.com/BurntSushi/ripgrep
$ cd ripgrep
$ cargo build --release
$ ./target/release/rg --version
0.1.3
```
**NOTE:** In the past, ripgrep supported a `simd-accel` Cargo feature when
using a Rust nightly compiler. This only benefited UTF-16 transcoding.
Since it required unstable features, this build mode was prone to breakage.
Because of that, support for it has been removed. If you want SIMD
optimizations for UTF-16 transcoding, then you'll have to petition the
[`encoding_rs`](https://github.com/hsivonen/encoding_rs) project to use stable
APIs.
Finally, optional PCRE2 support can be built with ripgrep by enabling the
`pcre2` feature:
```
$ cargo build --release --features 'pcre2'
```
Enabling the PCRE2 feature works with a stable Rust compiler and will
attempt to automatically find and link with your system's PCRE2 library via
`pkg-config`. If one doesn't exist, then ripgrep will build PCRE2 from source
using your system's C compiler and then statically link it into the final
executable. Static linking can be forced even when there is an available PCRE2
system library by either building ripgrep with the MUSL target or by setting
`PCRE2_SYS_STATIC=1`.
ripgrep can be built with the MUSL target on Linux by first installing the MUSL
library on your system (consult your friendly neighborhood package manager).
Then you just need to add MUSL support to your Rust toolchain and rebuild
ripgrep, which yields a fully static executable:
```
$ rustup target add x86_64-unknown-linux-musl
$ cargo build --release --target x86_64-unknown-linux-musl
```
Applying the `--features` flag from above works as expected. If you want to
build a static executable with MUSL and with PCRE2, then you will need to have
`musl-gcc` installed, which might be in a separate package from the actual
MUSL library, depending on your Linux distribution.
### Running tests
ripgrep is relatively well-tested, including both unit tests and integration
tests. To run the full test suite, use:
```
$ cargo test --all
```
from the repository root.
### Related tools
* [delta](https://github.com/dandavison/delta) is a syntax highlighting
pager that supports the `rg --json` output format. So all you need to do to
make it work is `rg --json pattern | delta`. See [delta's manual section on
grep](https://dandavison.github.io/delta/grep.html) for more details.
### Vulnerability reporting
For reporting a security vulnerability, please
[contact Andrew Gallant](https://blog.burntsushi.net/about/).
The contact page has my email address and PGP public key if you wish to send an
encrypted message.
### Translations
The following is a list of known translations of ripgrep's documentation. These
are unofficially maintained and may not be up to date.
* [Chinese](https://github.com/chinanf-boy/ripgrep-zh#%E6%9B%B4%E6%96%B0-)
* [Spanish](https://github.com/UltiRequiem/traducciones/tree/master/ripgrep)
This project is built on top of ripgrep by Andrew Gallant and contributors.
All credit for the original tool, documentation, and design belongs to the
ripgrep project. See README-ripgrep.md and the upstream licenses for details.

View File

@@ -7,6 +7,7 @@
a strong motivation otherwise, review and update every dependency. Also
run `--aggressive`, but don't update to crates that are still in beta.
* Update date in `crates/core/flags/doc/template.rg.1`.
* Update the CHANGELOG as appropriate.
* Review changes for every crate in `crates` since the last ripgrep release.
If the set of changes is non-empty, issue a new release for that crate. Check
crates in the following order. After updating a crate, ensure minimal
@@ -22,7 +23,6 @@
* crates/printer
* crates/grep (bump minimal versions as necessary)
* crates/core (do **not** bump version, but update dependencies as needed)
* Update the CHANGELOG as appropriate.
* Edit the `Cargo.toml` to set the new ripgrep version. Run
`cargo update -p ripgrep` so that the `Cargo.lock` is updated. Commit the
changes and create a new signed tag. Alternatively, use
@@ -41,8 +41,6 @@
> tool that recursively searches the current directory for a regex pattern.
> By default, ripgrep will respect gitignore rules and automatically skip
> hidden files/directories and binary files.
* Run `git checkout {VERSION} && ci/build-and-publish-m2 {VERSION}` on a macOS
system with Apple silicon.
* Run `cargo publish`.
* Run `ci/sha256-releases {VERSION} >> pkg/brew/ripgrep-bin.rb`. Then edit
`pkg/brew/ripgrep-bin.rb` to update the version number and sha256 hashes.

View File

@@ -22,13 +22,13 @@ fn set_windows_exe_options() {
manifest.push(MANIFEST);
let Some(manifest) = manifest.to_str() else { return };
println!("cargo:rerun-if-changed={}", MANIFEST);
println!("cargo:rerun-if-changed={MANIFEST}");
// Embed the Windows application manifest file.
println!("cargo:rustc-link-arg-bin=rg=/MANIFEST:EMBED");
println!("cargo:rustc-link-arg-bin=rg=/MANIFESTINPUT:{manifest}");
println!("cargo:rustc-link-arg-bin=rgs=/MANIFEST:EMBED");
println!("cargo:rustc-link-arg-bin=rgs=/MANIFESTINPUT:{manifest}");
// Turn linker warnings into errors. Helps debugging, otherwise the
// warnings get squashed (I believe).
println!("cargo:rustc-link-arg-bin=rg=/WX");
println!("cargo:rustc-link-arg-bin=rgs=/WX");
}
/// Make the current git hash available to the build as the environment
@@ -37,10 +37,25 @@ fn set_git_revision_hash() {
use std::process::Command;
let args = &["rev-parse", "--short=10", "HEAD"];
let Ok(output) = Command::new("git").args(args).output() else { return };
let rev = String::from_utf8_lossy(&output.stdout).trim().to_string();
if rev.is_empty() {
return;
let output = Command::new("git").args(args).output();
match output {
Ok(output) => {
let rev =
String::from_utf8_lossy(&output.stdout).trim().to_string();
if rev.is_empty() {
println!(
"cargo:warning=output from `git rev-parse` is empty, \
so skipping embedding of commit hash"
);
return;
}
println!("cargo:rustc-env=RIPGREP_BUILD_GIT_HASH={rev}");
}
Err(e) => {
println!(
"cargo:warning=failed to run `git rev-parse`, \
so skipping embedding of commit hash: {e}"
);
}
}
println!("cargo:rustc-env=RIPGREP_BUILD_GIT_HASH={}", rev);
}

View File

@@ -1,43 +0,0 @@
#!/bin/bash
# This script builds a ripgrep release for the aarch64-apple-darwin target.
# At time of writing (2023-11-21), GitHub Actions does not free Apple silicon
# runners. Since I have somewhat recently acquired an M2 mac mini, I just use
# this script to build the release tarball and upload it with `gh`.
#
# Once GitHub Actions has proper support for Apple silicon, we should add it
# to our release workflow and drop this script.
set -e
version="$1"
if [ -z "$version" ]; then
echo "missing version" >&2
echo "Usage: "$(basename "$0")" <version>" >&2
exit 1
fi
if ! grep -q "version = \"$version\"" Cargo.toml; then
echo "version does not match Cargo.toml" >&2
exit 1
fi
target=aarch64-apple-darwin
cargo build --release --features pcre2 --target $target
BIN=target/$target/release/rg
NAME=ripgrep-$version-$target
ARCHIVE="deployment/m2/$NAME"
mkdir -p "$ARCHIVE"/{complete,doc}
cp "$BIN" "$ARCHIVE"/
strip "$ARCHIVE/rg"
cp {README.md,COPYING,UNLICENSE,LICENSE-MIT} "$ARCHIVE"/
cp {CHANGELOG.md,FAQ.md,GUIDE.md} "$ARCHIVE"/doc/
"$BIN" --generate complete-bash > "$ARCHIVE/complete/rg.bash"
"$BIN" --generate complete-fish > "$ARCHIVE/complete/rg.fish"
"$BIN" --generate complete-powershell > "$ARCHIVE/complete/_rg.ps1"
"$BIN" --generate complete-zsh > "$ARCHIVE/complete/_rg"
"$BIN" --generate man > "$ARCHIVE/doc/rg.1"
tar c -C deployment/m2 -z -f "$ARCHIVE.tar.gz" "$NAME"
shasum -a 256 "$ARCHIVE.tar.gz" > "$ARCHIVE.tar.gz.sha256"
gh release upload "$version" "$ARCHIVE.tar.gz" "$ARCHIVE.tar.gz.sha256"

View File

@@ -11,7 +11,7 @@ version="$1"
# Linux and Darwin builds.
for arch in i686 x86_64; do
for target in apple-darwin unknown-linux-musl; do
url="https://github.com/BurntSushi/ripgrep/releases/download/$version/ripgrep-$version-$arch-$target.tar.gz"
url="https://git.peisongxiao.com/peisongxiao/rgs/releases/download/$version/rgs-$version-$arch-$target.tar.gz"
sha=$(curl -sfSL "$url" | sha256sum)
echo "$version-$arch-$target $sha"
done
@@ -19,7 +19,7 @@ done
# Source.
for ext in zip tar.gz; do
url="https://github.com/BurntSushi/ripgrep/archive/$version.$ext"
url="https://git.peisongxiao.com/peisongxiao/rgs/archive/$version.$ext"
sha=$(curl -sfSL "$url" | sha256sum)
echo "source.$ext $sha"
done

View File

@@ -18,11 +18,11 @@ get_comp_args() {
main() {
local diff
local rg="${0:a:h}/../${TARGET_DIR:-target}/release/rg"
local _rg="${0:a:h}/../crates/core/flags/complete/rg.zsh"
local rg="${0:a:h}/../${TARGET_DIR:-target}/release/rgs"
local _rg="${0:a:h}/../crates/core/flags/complete/rgs.zsh"
local -a help_args comp_args
[[ -e $rg ]] || rg=${rg/%\/release\/rg/\/debug\/rg}
[[ -e $rg ]] || rg=${rg/%\/release\/rgs/\/debug\/rgs}
rg=${rg:a}
_rg=${_rg:a}

View File

@@ -11,4 +11,4 @@ if ! command -V sudo; then
fi
sudo apt-get update
sudo apt-get install -y --no-install-recommends \
zsh xz-utils liblz4-tool musl-tools brotli zstd
zsh xz-utils liblz4-tool musl-tools brotli zstd g++

View File

@@ -1,6 +1,6 @@
[package]
name = "grep-cli"
version = "0.1.10" #:version
version = "0.1.12" #:version
authors = ["Andrew Gallant <jamslam@gmail.com>"]
description = """
Utilities for search oriented command line applications.
@@ -11,11 +11,11 @@ repository = "https://github.com/BurntSushi/ripgrep/tree/master/crates/cli"
readme = "README.md"
keywords = ["regex", "grep", "cli", "utility", "util"]
license = "Unlicense OR MIT"
edition = "2021"
edition = "2024"
[dependencies]
bstr = { version = "1.6.2", features = ["std"] }
globset = { version = "0.4.15", path = "../globset" }
globset = { version = "0.4.18", path = "../globset" }
log = "0.4.20"
termcolor = "1.3.0"

View File

@@ -177,7 +177,7 @@ impl DecompressionMatcher {
/// If there are multiple possible commands matching the given path, then
/// the command added last takes precedence.
pub fn command<P: AsRef<Path>>(&self, path: P) -> Option<Command> {
for i in self.globs.matches(path).into_iter().rev() {
if let Some(i) = self.globs.matches(path).into_iter().next_back() {
let decomp_cmd = &self.commands[i];
let mut cmd = Command::new(&decomp_cmd.bin);
cmd.args(&decomp_cmd.args);
@@ -414,6 +414,8 @@ impl io::Read for DecompressionReader {
/// relative path. We permit this since it is assumed that the user has set
/// this explicitly, and thus, desires this behavior.
///
/// # Platform behavior
///
/// On non-Windows, this is a no-op.
pub fn resolve_binary<P: AsRef<Path>>(
prog: P,

View File

@@ -16,7 +16,7 @@ use std::{ffi::OsString, io};
pub fn hostname() -> io::Result<OsString> {
#[cfg(windows)]
{
use winapi_util::sysinfo::{get_computer_name, ComputerNameKind};
use winapi_util::sysinfo::{ComputerNameKind, get_computer_name};
get_computer_name(ComputerNameKind::PhysicalDnsHostname)
}
#[cfg(unix)]

View File

@@ -133,19 +133,19 @@ mod wtr;
pub use crate::{
decompress::{
resolve_binary, DecompressionMatcher, DecompressionMatcherBuilder,
DecompressionReader, DecompressionReaderBuilder,
DecompressionMatcher, DecompressionMatcherBuilder,
DecompressionReader, DecompressionReaderBuilder, resolve_binary,
},
escape::{escape, escape_os, unescape, unescape_os},
hostname::hostname,
human::{parse_human_readable_size, ParseSizeError},
human::{ParseSizeError, parse_human_readable_size},
pattern::{
pattern_from_bytes, pattern_from_os, patterns_from_path,
patterns_from_reader, patterns_from_stdin, InvalidPatternError,
InvalidPatternError, pattern_from_bytes, pattern_from_os,
patterns_from_path, patterns_from_reader, patterns_from_stdin,
},
process::{CommandError, CommandReader, CommandReaderBuilder},
wtr::{
stdout, stdout_buffered_block, stdout_buffered_line, StandardStream,
StandardStream, stdout, stdout_buffered_block, stdout_buffered_line,
},
};

View File

@@ -2,15 +2,18 @@
Provides completions for ripgrep's CLI for the fish shell.
*/
use crate::flags::{defs::FLAGS, CompletionType};
use crate::flags::{CompletionType, defs::FLAGS};
const TEMPLATE: &'static str = "complete -c rg !SHORT! -l !LONG! -d '!DOC!'";
const TEMPLATE_NEGATED: &'static str =
"complete -c rg -l !NEGATED! -n '__fish_contains_opt !SHORT! !LONG!' -d '!DOC!'\n";
const TEMPLATE_NEGATED: &'static str = "complete -c rg -l !NEGATED! -n '__rg_contains_opt !LONG! !SHORT!' -d '!DOC!'\n";
/// Generate completions for Fish.
///
/// Reference: <https://fishshell.com/docs/current/completions.html>
pub(crate) fn generate() -> String {
let mut out = String::new();
out.push_str(include_str!("prelude.fish"));
out.push('\n');
for flag in FLAGS.iter() {
let short = match flag.name_short() {
None => "".to_string(),
@@ -55,6 +58,10 @@ pub(crate) fn generate() -> String {
out.push_str(&completion);
if let Some(negated) = flag.name_negated() {
let short = match flag.name_short() {
None => "".to_string(),
Some(byte) => char::from(byte).to_string(),
};
out.push_str(
&TEMPLATE_NEGATED
.replace("!NEGATED!", &negated)

View File

@@ -34,8 +34,7 @@ Register-ArgumentCompleter -Native -CommandName 'rg' -ScriptBlock {
}
";
const TEMPLATE_FLAG: &'static str =
"[CompletionResult]::new('!DASH_NAME!', '!NAME!', [CompletionResultType]::ParameterName, '!DOC!')";
const TEMPLATE_FLAG: &'static str = "[CompletionResult]::new('!DASH_NAME!', '!NAME!', [CompletionResultType]::ParameterName, '!DOC!')";
/// Generate completions for PowerShell.
///
@@ -72,7 +71,7 @@ pub(crate) fn generate() -> String {
}
if let Some(negated) = flag.name_negated() {
let dash_name = format!("--{}", negated);
let dash_name = format!("--{negated}");
flags.push_str("\n ");
flags.push_str(
&TEMPLATE_FLAG

View File

@@ -0,0 +1,31 @@
# Usage: __rg_contains_opt LONG [SHORT]
function __rg_contains_opt --description 'Specialized __fish_contains_opt'
# Cache the config file because this function is called many times per
# completion attempt.
# The cache will persist for the entire shell session (even if the
# variable or the file contents change).
if not set -q __rg_config
set -g __rg_config
if set -qx RIPGREP_CONFIG_PATH
set __rg_config (
cat -- $RIPGREP_CONFIG_PATH 2>/dev/null \
| string trim \
| string match -rv '^$|^#'
)
end
end
set -l commandline (commandline -cpo) (commandline -ct) $__rg_config
if contains -- "--$argv[1]" $commandline
return 0
end
if set -q argv[2]
if string match -qr -- "^-[^-]*$argv[2]" $commandline
return 0
end
end
return 1
end

View File

@@ -1,7 +1,7 @@
#compdef rg
#compdef rgs
##
# zsh completion function for ripgrep
# zsh completion function for rgs
#
# Run ci/test-complete after building to ensure that the options supported by
# this function stay in synch with the `rg` binary.
@@ -96,6 +96,8 @@ _rg() {
+ '(file-name)' # File-name options
{-H,--with-filename}'[show file name for matches]'
{-I,--no-filename}"[don't show file name for matches]"
'--in-file-index[show per-file match index in output]'
'--no-in-file-index[hide per-file match index in output]'
+ '(file-system)' # File system options
"--one-file-system[don't descend into directories on other file systems]"
@@ -210,6 +212,7 @@ _rg() {
+ '(multiline)' # Multiline options
{-U,--multiline}'[permit matching across multiple lines]'
{-W+,--multiline-window=}'[limit multiline matches to NUM lines (with -U enabled implicitly)]:number of lines'
$no'(multiline-dotall)--no-multiline[restrict matches to at most one line each]'
+ '(multiline-dotall)' # Multiline DOTALL options
@@ -279,6 +282,10 @@ _rg() {
+ '(threads)' # Thread-count options
'(sort)'{-j+,--threads=}'[specify approximate number of threads to use]:number of threads'
+ '(squash)' # Squash options
'--squash[squash contiguous whitespace into a single space]'
'--squash-nl-only[squash new lines into a single space]'
+ '(trim)' # Trim options
'--trim[trim any ASCII whitespace prefix from each line]'
$no"--no-trim[don't trim ASCII whitespace prefix from each line]"
@@ -319,7 +326,7 @@ _rg() {
'--field-context-separator[set string to delimit fields in context lines]'
'--field-match-separator[set string to delimit fields in matching lines]'
'--hostname-bin=[executable for getting system hostname]:hostname executable:_command_names -e'
'--hyperlink-format=[specify pattern for hyperlinks]:pattern'
'--hyperlink-format=[specify pattern for hyperlinks]: :_rg_hyperlink_formats'
'--trace[show more verbose debug messages]'
'--dfa-size-limit=[specify upper size limit of generated DFA]:DFA size (bytes)'
"(1 stats)--files[show each file that would be searched (but don't search)]"
@@ -363,10 +370,11 @@ _rg() {
'column:specify coloring for column numbers'
'line:specify coloring for line numbers'
'match:specify coloring for match text'
'highlight:specify coloring for matching lines'
'path:specify coloring for file names'
)
descr='color/style type'
elif [[ ${IPREFIX#--*=}$PREFIX == (column|line|match|path):[^:]# ]]; then
elif [[ ${IPREFIX#--*=}$PREFIX == (column|line|match|highlight|path):[^:]# ]]; then
suf=( -qS: )
tmp=(
'none:clear color/style for type'
@@ -409,6 +417,7 @@ _rg() {
}
# Complete encodings
(( $+functions[_rg_encodings] )) ||
_rg_encodings() {
local -a expl
local -aU _encodings
@@ -421,6 +430,7 @@ _rg_encodings() {
}
# Complete file types
(( $+functions[_rg_types] )) ||
_rg_types() {
local -a expl
local -aU _types
@@ -434,7 +444,58 @@ _rg_types() {
fi
}
_rg "$@"
# Complete hyperlink format-string aliases
(( $+functions[_rg_hyperlink_format_aliases] )) ||
_rg_hyperlink_format_aliases() {
_describe -t format-aliases 'hyperlink format alias' '(
!HYPERLINK_ALIASES!
)'
}
# Complete custom hyperlink format strings
(( $+functions[_rg_hyperlink_format_strings] )) ||
_rg_hyperlink_format_strings() {
local op='{' ed='}'
local -a pfx sfx rmv
compquote op ed
sfx=( -S $ed )
rmv=( -r ${(q)ed[1]} )
compset -S "$op*"
compset -S "$ed*" && sfx=( -S '' )
compset -P "*$ed"
compset -p ${#PREFIX%$op*}
compset -P $op || pfx=( -P $op )
WSL_DISTRO_NAME=${WSL_DISTRO_NAME:-\$WSL_DISTRO_NAME} \
_describe -t format-variables 'hyperlink format variable' '(
path:"absolute path to file containing match (required)"
host:"system host name or output of --hostname-bin executable"
line:"line number of match"
column:"column of match (requires {line})"
wslprefix:"\"wsl$/$WSL_DISTRO_NAME\" (for WSL share)"
)' "${(@)pfx}" "${(@)sfx}" "${(@)rmv}"
}
# Complete hyperlink formats
(( $+functions[_rg_hyperlink_formats] )) ||
_rg_hyperlink_formats() {
_alternative \
'format-string-aliases: :_rg_hyperlink_format_aliases' \
'format-strings: :_rg_hyperlink_format_strings'
}
# Don't run the completion function when being sourced by itself.
#
# See https://github.com/BurntSushi/ripgrep/issues/2956
# See https://github.com/BurntSushi/ripgrep/pull/2957
if [[ $funcstack[1] == _rg ]] || (( ! $+functions[compdef] )); then
_rg "$@"
else
compdef _rg rg
fi
################################################################################
# ZSH COMPLETION REFERENCE

View File

@@ -19,5 +19,14 @@ long as it meets criteria 3 and 4 above.
/// Generate completions for zsh.
pub(crate) fn generate() -> String {
include_str!("rg.zsh").replace("!ENCODINGS!", super::ENCODINGS.trim_end())
let hyperlink_alias_descriptions = grep::printer::hyperlink_aliases()
.iter()
.map(|alias| {
format!(r#" {}:"{}""#, alias.name(), alias.description())
})
.collect::<Vec<String>>()
.join("\n");
include_str!("rgs.zsh")
.replace("!ENCODINGS!", super::ENCODINGS.trim_end())
.replace("!HYPERLINK_ALIASES!", &hyperlink_alias_descriptions)
}

View File

@@ -10,7 +10,7 @@ use std::{
path::{Path, PathBuf},
};
use bstr::{io::BufReadExt, ByteSlice};
use bstr::{ByteSlice, io::BufReadExt};
/// Return a sequence of arguments derived from ripgrep rc configuration files.
pub fn args() -> Vec<OsString> {

View File

@@ -17,18 +17,18 @@ ripgrep. For example, `-E`, `--encoding` and `--no-encoding` all manipulate the
same encoding state in ripgrep.
*/
use std::path::PathBuf;
use std::{path::PathBuf, sync::LazyLock};
use {anyhow::Context as AnyhowContext, bstr::ByteVec};
use crate::flags::{
Category, Flag, FlagValue,
lowargs::{
BinaryMode, BoundaryMode, BufferMode, CaseMode, ColorChoice,
ContextMode, EncodingMode, EngineChoice, GenerateMode, LoggingMode,
LowArgs, MmapMode, Mode, PatternSource, SearchMode, SortMode,
SortModeKind, SpecialMode, TypeChange,
},
Category, Flag, FlagValue,
};
#[cfg(test)]
@@ -97,6 +97,7 @@ pub(super) const FLAGS: &[&dyn Flag] = &[
&MaxFilesize,
&Mmap,
&Multiline,
&MultilineWindow,
&MultilineDotall,
&NoConfig,
&NoIgnore,
@@ -133,6 +134,8 @@ pub(super) const FLAGS: &[&dyn Flag] = &[
&Text,
&Threads,
&Trace,
&Squash,
&SquashNlOnly,
&Trim,
&Type,
&TypeNot,
@@ -142,6 +145,7 @@ pub(super) const FLAGS: &[&dyn Flag] = &[
&Unrestricted,
&Version,
&Vimgrep,
&InFileIndex,
&WithFilename,
&WithFilenameNo,
&WordRegexp,
@@ -751,7 +755,8 @@ the \flag{colors} flag to manually set all color styles to \fBnone\fP:
\-\-colors 'path:none' \\
\-\-colors 'line:none' \\
\-\-colors 'column:none' \\
\-\-colors 'match:none'
\-\-colors 'match:none' \\
\-\-colors 'highlight:none'
.EE
.sp
"
@@ -829,21 +834,21 @@ impl Flag for Colors {
"Configure color settings and styles."
}
fn doc_long(&self) -> &'static str {
r"
r#"
This flag specifies color settings for use in the output. This flag may be
provided multiple times. Settings are applied iteratively. Pre-existing color
labels are limited to one of eight choices: \fBred\fP, \fBblue\fP, \fBgreen\fP,
\fBcyan\fP, \fBmagenta\fP, \fByellow\fP, \fBwhite\fP and \fBblack\fP. Styles
are limited to \fBnobold\fP, \fBbold\fP, \fBnointense\fP, \fBintense\fP,
\fBnounderline\fP or \fBunderline\fP.
\fBnounderline\fP, \fBunderline\fP, \fBnoitalic\fP or \fBitalic\fP.
.sp
The format of the flag is
\fB{\fP\fItype\fP\fB}:{\fP\fIattribute\fP\fB}:{\fP\fIvalue\fP\fB}\fP.
\fItype\fP should be one of \fBpath\fP, \fBline\fP, \fBcolumn\fP or
\fBmatch\fP. \fIattribute\fP can be \fBfg\fP, \fBbg\fP or \fBstyle\fP.
\fIvalue\fP is either a color (for \fBfg\fP and \fBbg\fP) or a text style. A
special format, \fB{\fP\fItype\fP\fB}:none\fP, will clear all color settings
for \fItype\fP.
\fItype\fP should be one of \fBpath\fP, \fBline\fP, \fBcolumn\fP,
\fBhighlight\fP or \fBmatch\fP. \fIattribute\fP can be \fBfg\fP, \fBbg\fP or
\fBstyle\fP. \fIvalue\fP is either a color (for \fBfg\fP and \fBbg\fP) or a
text style. A special format, \fB{\fP\fItype\fP\fB}:none\fP, will clear all
color settings for \fItype\fP.
.sp
For example, the following command will change the match color to magenta and
the background color for line numbers to yellow:
@@ -852,6 +857,17 @@ the background color for line numbers to yellow:
rg \-\-colors 'match:fg:magenta' \-\-colors 'line:bg:yellow'
.EE
.sp
Another example, the following command will "highlight" the non-matching text
in matching lines:
.sp
.EX
rg \-\-colors 'highlight:bg:yellow' \-\-colors 'highlight:fg:black'
.EE
.sp
The "highlight" color type is particularly useful for contrasting matching
lines with surrounding context printed by the \flag{before-context},
\flag{after-context}, \flag{context} or \flag{passthru} flags.
.sp
Extended colors can be used for \fIvalue\fP when the tty supports ANSI color
sequences. These are specified as either \fIx\fP (256-color) or
.IB x , x , x
@@ -874,7 +890,7 @@ or, equivalently,
.sp
Note that the \fBintense\fP and \fBnointense\fP styles will have no effect when
used alongside these extended color codes.
"
"#
}
fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> {
@@ -908,6 +924,24 @@ fn test_colors() {
"line:bg:yellow".parse().unwrap()
]
);
let args = parse_low_raw(["--colors", "highlight:bg:240"]).unwrap();
assert_eq!(args.colors, vec!["highlight:bg:240".parse().unwrap()]);
let args = parse_low_raw([
"--colors",
"match:fg:magenta",
"--colors",
"highlight:bg:blue",
])
.unwrap();
assert_eq!(
args.colors,
vec![
"match:fg:magenta".parse().unwrap(),
"highlight:bg:blue".parse().unwrap()
]
);
}
/// --column
@@ -1234,18 +1268,27 @@ impl Flag for Count {
}
fn doc_long(&self) -> &'static str {
r"
This flag suppresses normal output and shows the number of lines that match the
given patterns for each file searched. Each file containing a match has its
path and count printed on each line. Note that unless \flag{multiline}
is enabled, this reports the number of lines that match and not the total
number of matches. In multiline mode, \flag{count} is equivalent to
\flag{count-matches}.
This flag suppresses normal output and shows the number of lines that match
the given patterns for each file searched. Each file containing a match has
its path and count printed on each line. Note that unless \flag{multiline} is
enabled and the pattern(s) given can match over multiple lines, this reports
the number of lines that match and not the total number of matches. When
multiline mode is enabled and the pattern(s) given can match over multiple
lines, \flag{count} is equivalent to \flag{count-matches}.
.sp
If only one file is given to ripgrep, then only the count is printed if there
is a match. The \flag{with-filename} flag can be used to force printing the
file path in this case. If you need a count to be printed regardless of whether
there is a match, then use \flag{include-zero}.
.sp
Note that it is possible for this flag to have results inconsistent with
the output of \flag{files-with-matches}. Notably, by default, ripgrep tries
to avoid searching files with binary data. With this flag, ripgrep needs to
search the entire content of files, which may include binary data. But with
\flag{files-with-matches}, ripgrep can stop as soon as a match is observed,
which may come well before any binary data. To avoid this inconsistency without
disabling binary detection, use the \flag{binary} flag.
.sp
This overrides the \flag{count-matches} flag. Note that when \flag{count}
is combined with \flag{only-matching}, then ripgrep behaves as if
\flag{count-matches} was given.
@@ -2154,6 +2197,14 @@ impl Flag for FilesWithMatches {
r"
Print only the paths with at least one match and suppress match contents.
.sp
Note that it is possible for this flag to have results inconsistent with the
output of \flag{count}. Notably, by default, ripgrep tries to avoid searching
files with binary data. With this flag, ripgrep might stop searching before
the binary data is observed. But with \flag{count}, ripgrep has to search the
entire contents to determine the match count, which means it might see binary
data that causes it to skip searching that file. To avoid this inconsistency
without disabling binary detection, use the \flag{binary} flag.
.sp
This overrides \flag{files-without-match}.
"
}
@@ -2752,6 +2803,11 @@ ripgrep.
A file or directory is considered hidden if its base name starts with a dot
character (\fB.\fP). On operating systems which support a "hidden" file
attribute, like Windows, files with this attribute are also considered hidden.
.sp
Note that \flag{hidden} will include files and folders like \fB.git\fP
regardless of \flag{no-ignore-vcs}. To exclude such paths when using
\flag{hidden}, you must explicitly ignore them using another flag or ignore
file.
"#
}
@@ -2862,7 +2918,10 @@ impl Flag for HyperlinkFormat {
r"Set the format of hyperlinks."
}
fn doc_long(&self) -> &'static str {
r#"
static DOC: LazyLock<String> = LazyLock::new(|| {
let mut doc = String::new();
doc.push_str(
r#"
Set the format of hyperlinks to use when printing results. Hyperlinks make
certain elements of ripgrep's output, such as file paths, clickable. This
generally only works in terminal emulators that support OSC-8 hyperlinks. For
@@ -2870,10 +2929,23 @@ example, the format \fBfile://{host}{path}\fP will emit an RFC 8089 hyperlink.
To see the format that ripgrep is using, pass the \flag{debug} flag.
.sp
Alternatively, a format string may correspond to one of the following aliases:
\fBdefault\fP, \fBnone\fP, \fBfile\fP, \fBgrep+\fP, \fBkitty\fP, \fBmacvim\fP,
\fBtextmate\fP, \fBvscode\fP, \fBvscode-insiders\fP, \fBvscodium\fP. The
alias will be replaced with a format string that is intended to work for the
corresponding application.
"#,
);
let mut aliases = grep::printer::hyperlink_aliases();
aliases.sort_by_key(|alias| {
alias.display_priority().unwrap_or(i16::MAX)
});
for (i, alias) in aliases.iter().enumerate() {
doc.push_str(r"\fB");
doc.push_str(alias.name());
doc.push_str(r"\fP");
doc.push_str(if i < aliases.len() - 1 { ", " } else { "." });
}
doc.push_str(
r#"
The alias will be replaced with a format string that is intended to work for
the corresponding application.
.sp
The following variables are available in the format string:
.sp
@@ -2950,7 +3022,24 @@ in the output. To make the path appear, and thus also a hyperlink, use the
.sp
For more information on hyperlinks in terminal emulators, see:
https://gist.github.com/egmontkob/eb114294efbcd5adb1944c9f3cb5feda
"#
"#,
);
doc
});
&DOC
}
fn doc_choices(&self) -> &'static [&'static str] {
static CHOICES: LazyLock<Vec<String>> = LazyLock::new(|| {
let mut aliases = grep::printer::hyperlink_aliases();
aliases.sort_by_key(|alias| {
alias.display_priority().unwrap_or(i16::MAX)
});
aliases.iter().map(|alias| alias.name().to_string()).collect()
});
static BORROWED: LazyLock<Vec<&'static str>> =
LazyLock::new(|| CHOICES.iter().map(|name| &**name).collect());
&*BORROWED
}
fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> {
@@ -3144,9 +3233,11 @@ impl Flag for IgnoreFile {
Specifies a path to one or more \fBgitignore\fP formatted rules files.
These patterns are applied after the patterns found in \fB.gitignore\fP,
\fB.rgignore\fP and \fB.ignore\fP are applied and are matched relative to the
current working directory. Multiple additional ignore files can be specified
by using this flag repeatedly. When specifying multiple ignore files, earlier
files have lower precedence than later files.
current working directory. That is, files specified via this flag have lower
precedence than files automatically found in the directory tree. Multiple
additional ignore files can be specified by using this flag repeatedly. When
specifying multiple ignore files, earlier files have lower precedence than
later files.
.sp
If you are looking for a way to include or exclude files and directories
directly on the command line, then use \flag{glob} instead.
@@ -3809,6 +3900,14 @@ impl Flag for MaxCount {
r"
Limit the number of matching lines per file searched to \fINUM\fP.
.sp
When \flag{multiline} is used, a single match that spans multiple lines is only
counted once for the purposes of this limit. Multiple matches in a single line
are counted only once, as they would be in non-multiline mode.
.sp
When combined with \flag{after-context} or \flag{context}, it's possible for
more matches than the maximum to be printed if contextual lines contain a
match.
.sp
Note that \fB0\fP is a legal value but not likely to be useful. When used,
ripgrep won't search anything.
"
@@ -4090,7 +4189,14 @@ This overrides the \flag{stop-on-nonmatch} flag.
}
fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> {
args.multiline = v.unwrap_switch();
let enabled = v.unwrap_switch();
if !enabled && args.multiline_window.is_some() {
anyhow::bail!(
"--no-multiline cannot be used with --multiline-window \
(which implicitly enables --multiline)"
);
}
args.multiline = enabled;
if args.multiline {
args.stop_on_nonmatch = false;
}
@@ -4114,6 +4220,68 @@ fn test_multiline() {
assert_eq!(false, args.multiline);
}
/// --multiline-window
#[derive(Debug)]
struct MultilineWindow;
impl Flag for MultilineWindow {
fn is_switch(&self) -> bool {
false
}
fn name_short(&self) -> Option<u8> {
Some(b'W')
}
fn name_long(&self) -> &'static str {
"multiline-window"
}
fn doc_variable(&self) -> Option<&'static str> {
Some("NUM")
}
fn doc_category(&self) -> Category {
Category::Search
}
fn doc_short(&self) -> &'static str {
r"Limit multiline matches to a fixed number of lines."
}
fn doc_long(&self) -> &'static str {
r#"
Limit the maximum number of lines that a multiline match may span to
\fINUM\fP (use \fB--multiline-window=\fP\fINUM\fP).
.sp
This flag implicitly enables \flag{multiline}. Matches are found as if the file being
searched were limited to \fINUM\fP lines at a time, which can prevent
unintended long matches while still enabling multi-line searching.
.sp
The value of \fINUM\fP must be at least 1.
"#
}
fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> {
let lines = convert::usize(&v.unwrap_value())?;
if lines == 0 {
anyhow::bail!("--multiline-window must be at least 1");
}
args.multiline_window = Some(lines);
args.multiline = true;
Ok(())
}
}
#[cfg(test)]
#[test]
fn test_multiline_window() {
let args = parse_low_raw(None::<&str>).unwrap();
assert_eq!(None, args.multiline_window);
let args = parse_low_raw(["--multiline-window=2"]).unwrap();
assert_eq!(Some(2), args.multiline_window);
assert_eq!(true, args.multiline);
let args = parse_low_raw(["-W", "3"]).unwrap();
assert_eq!(Some(3), args.multiline_window);
assert_eq!(true, args.multiline);
}
/// --multiline-dotall
#[derive(Debug)]
struct MultilineDotall;
@@ -4603,11 +4771,15 @@ impl Flag for NoIgnoreVcs {
}
fn doc_long(&self) -> &'static str {
r"
When given, filter rules from source control ignore files (e.g., \fB.gitignore\fP)
are not respected. By default, ripgrep respects \fBgit\fP's ignore rules for
automatic filtering. In some cases, it may not be desirable to respect the
source control's ignore rules and instead only respect rules in \fB.ignore\fP
or \fB.rgignore\fP.
When given, filter rules from source control ignore files (e.g.,
\fB.gitignore\fP) are not respected. By default, ripgrep respects \fBgit\fP's
ignore rules for automatic filtering. In some cases, it may not be desirable
to respect the source control's ignore rules and instead only respect rules in
\fB.ignore\fP or \fB.rgignore\fP.
.sp
Note that this flag does not directly affect the filtering of source control
files or folders that start with a dot (\fB.\fP), like \fB.git\fP. These are
affected by \flag{hidden} and its related flags instead.
.sp
This flag implies \flag{no-ignore-parent} for source control ignore files as
well.
@@ -5504,9 +5676,9 @@ don't need preprocessing. For example, given the following shell script,
pdftotext "$1" -
.EE
.sp
then it is possible to use \fB\-\-pre\fP \fIpre-pdftotext\fP \fB--pre-glob
'\fP\fI*.pdf\fP\fB'\fP to make it so ripgrep only executes the
\fIpre-pdftotext\fP command on files with a \fI.pdf\fP extension.
then it is possible to use \fB\-\-pre\fP \fIpre-pdftotext\fP
\fB\-\-pre\-glob\fP '\fI*.pdf\fP' to make it so ripgrep only executes
the \fIpre-pdftotext\fP command on files with a \fI.pdf\fP extension.
.sp
Multiple \flag{pre-glob} flags may be used. Globbing rules match
\fBgitignore\fP globs. Precede a glob with a \fB!\fP to exclude it.
@@ -6712,6 +6884,88 @@ fn test_trace() {
assert_eq!(Some(LoggingMode::Trace), args.logging);
}
/// --squash
#[derive(Debug)]
struct Squash;
impl Flag for Squash {
fn is_switch(&self) -> bool {
true
}
fn name_long(&self) -> &'static str {
"squash"
}
fn doc_category(&self) -> Category {
Category::Output
}
fn doc_short(&self) -> &'static str {
r"Squash contiguous whitespace in output to a single space."
}
fn doc_long(&self) -> &'static str {
r#"
Squash any contiguous Unicode whitespace (including new lines) into a single
ASCII space when printing matches.
"#
}
fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> {
assert!(v.unwrap_switch(), "--squash can only be enabled");
args.squash = grep::printer::SquashMode::Whitespace;
Ok(())
}
}
#[cfg(test)]
#[test]
fn test_squash() {
let args = parse_low_raw(None::<&str>).unwrap();
assert_eq!(grep::printer::SquashMode::None, args.squash);
let args = parse_low_raw(["--squash"]).unwrap();
assert_eq!(grep::printer::SquashMode::Whitespace, args.squash);
}
/// --squash-nl-only
#[derive(Debug)]
struct SquashNlOnly;
impl Flag for SquashNlOnly {
fn is_switch(&self) -> bool {
true
}
fn name_long(&self) -> &'static str {
"squash-nl-only"
}
fn doc_category(&self) -> Category {
Category::Output
}
fn doc_short(&self) -> &'static str {
r"Squash new lines into spaces in output."
}
fn doc_long(&self) -> &'static str {
r#"
Squash contiguous line terminators into a single ASCII space when printing
matches. Other whitespace is preserved.
"#
}
fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> {
assert!(v.unwrap_switch(), "--squash-nl-only can only be enabled");
args.squash = grep::printer::SquashMode::Newlines;
Ok(())
}
}
#[cfg(test)]
#[test]
fn test_squash_nl_only() {
let args = parse_low_raw(None::<&str>).unwrap();
assert_eq!(grep::printer::SquashMode::None, args.squash);
let args = parse_low_raw(["--squash-nl-only"]).unwrap();
assert_eq!(grep::printer::SquashMode::Newlines, args.squash);
}
/// --trim
#[derive(Debug)]
struct Trim;
@@ -7302,6 +7556,53 @@ fn test_vimgrep() {
assert_eq!(true, args.vimgrep);
}
/// --in-file-index
#[derive(Debug)]
struct InFileIndex;
impl Flag for InFileIndex {
fn is_switch(&self) -> bool {
true
}
fn name_long(&self) -> &'static str {
"in-file-index"
}
fn name_negated(&self) -> Option<&'static str> {
Some("no-in-file-index")
}
fn doc_category(&self) -> Category {
Category::Output
}
fn doc_short(&self) -> &'static str {
r"Prefix matches with an index per file."
}
fn doc_long(&self) -> &'static str {
r"
When enabled, ripgrep prefixes each matching line with an index that is
incremented per file. The format is \fIFILE\fP[\fIN\fP]:\fILINE\fP:, which can
disambiguate multi-line matches that print the same line multiple times.
"
}
fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> {
args.in_file_index = v.unwrap_switch();
Ok(())
}
}
#[cfg(test)]
#[test]
fn test_in_file_index() {
let args = parse_low_raw(None::<&str>).unwrap();
assert_eq!(false, args.in_file_index);
let args = parse_low_raw(["--in-file-index"]).unwrap();
assert_eq!(true, args.in_file_index);
let args = parse_low_raw(["--in-file-index", "--no-in-file-index"]).unwrap();
assert_eq!(false, args.in_file_index);
}
/// --with-filename
#[derive(Debug)]
struct WithFilename;
@@ -7665,9 +7966,10 @@ mod tests {
assert!(
choice.chars().all(|c| c.is_ascii_alphanumeric()
|| c == '-'
|| c == ':'),
|| c == ':'
|| c == '+'),
"choice '{choice}' for flag '{long}' does not match \
^[-:0-9A-Za-z]+$",
^[-+:0-9A-Za-z]+$",
)
}
}

View File

@@ -8,7 +8,7 @@ is used when the `--help` flag is given.
use std::{collections::BTreeMap, fmt::Write};
use crate::flags::{defs::FLAGS, doc::version, Category, Flag};
use crate::flags::{Category, Flag, defs::FLAGS, doc::version};
const TEMPLATE_SHORT: &'static str = include_str!("template.short.help");
const TEMPLATE_LONG: &'static str = include_str!("template.long.help");

View File

@@ -4,7 +4,7 @@ Provides routines for generating ripgrep's man page in `roff` format.
use std::{collections::BTreeMap, fmt::Write};
use crate::flags::{defs::FLAGS, doc::version, Flag};
use crate::flags::{Flag, defs::FLAGS, doc::version};
const TEMPLATE: &'static str = include_str!("template.rg.1");
@@ -53,7 +53,7 @@ fn generate_flag(flag: &'static dyn Flag, out: &mut String) {
write!(out, r", ");
}
let name = flag.name_long();
let name = flag.name_long().replace("-", r"\-");
write!(out, r"\fB\-\-{name}\fP");
if let Some(var) = flag.doc_variable() {
write!(out, r"=\fI{var}\fP");
@@ -71,7 +71,7 @@ fn generate_flag(flag: &'static dyn Flag, out: &mut String) {
if let Some(name) = flag.name_short() {
write!(out, r"\-{}/", char::from(name));
}
write!(out, r"\-\-{}", flag.name_long());
write!(out, r"\-\-{}", flag.name_long().replace("-", r"\-"));
out.push_str(r"\fP");
});
// Convert \flag-negate{foo} into something nicer.

View File

@@ -1,4 +1,4 @@
.TH RG 1 2024-09-08 "!!VERSION!!" "User Commands"
.TH RG 1 2025-10-22 "!!VERSION!!" "User Commands"
.
.
.SH NAME
@@ -43,10 +43,10 @@ configuration file. The file can specify one shell argument per line. Lines
starting with \fB#\fP are ignored. For more details, see \fBCONFIGURATION
FILES\fP below.
.sp
ripgrep will automatically detect if stdin exists and search stdin for a regex
pattern, e.g. \fBls | rg foo\fP. In some environments, stdin may exist when
it shouldn't. To turn off stdin detection, one can explicitly specify the
directory to search, e.g. \fBrg foo ./\fP.
ripgrep will automatically detect if stdin is a readable file and search stdin
for a regex pattern, e.g. \fBls | rg foo\fP. In some environments, stdin may
exist when it shouldn't. To turn off stdin detection, one can explicitly
specify the directory to search, e.g. \fBrg foo ./\fP.
.sp
Like other tools such as \fBls\fP, ripgrep will alter its output depending on
whether stdout is connected to a tty. By default, when printing a tty, ripgrep

View File

@@ -169,9 +169,5 @@ fn features() -> Vec<String> {
/// Returns `+` when `enabled` is `true` and `-` otherwise.
fn sign(enabled: bool) -> &'static str {
if enabled {
"+"
} else {
"-"
}
if enabled { "+" } else { "-" }
}

View File

@@ -9,7 +9,7 @@ use std::{
use {
bstr::BString,
grep::printer::{ColorSpecs, SummaryKind},
grep::printer::{ColorSpecs, SquashMode, SummaryKind},
};
use crate::{
@@ -45,6 +45,7 @@ pub(crate) struct HiArgs {
context: ContextMode,
context_separator: ContextSeparator,
crlf: bool,
cwd: PathBuf,
dfa_size_limit: Option<usize>,
encoding: EncodingMode,
engine: EngineChoice,
@@ -60,6 +61,7 @@ pub(crate) struct HiArgs {
ignore_file_case_insensitive: bool,
ignore_file: Vec<PathBuf>,
include_zero: bool,
in_file_index: bool,
invert_match: bool,
is_terminal_stdout: bool,
line_number: bool,
@@ -72,6 +74,7 @@ pub(crate) struct HiArgs {
mode: Mode,
multiline: bool,
multiline_dotall: bool,
multiline_window: Option<usize>,
no_ignore_dot: bool,
no_ignore_exclude: bool,
no_ignore_files: bool,
@@ -97,6 +100,7 @@ pub(crate) struct HiArgs {
sort: Option<SortMode>,
stats: Option<grep::printer::Stats>,
stop_on_nonmatch: bool,
squash: SquashMode,
threads: usize,
trim: bool,
types: ignore::types::Types,
@@ -262,6 +266,7 @@ impl HiArgs {
context: low.context,
context_separator: low.context_separator,
crlf: low.crlf,
cwd: state.cwd,
dfa_size_limit: low.dfa_size_limit,
encoding: low.encoding,
engine: low.engine,
@@ -276,6 +281,7 @@ impl HiArgs {
ignore_file: low.ignore_file,
ignore_file_case_insensitive: low.ignore_file_case_insensitive,
include_zero: low.include_zero,
in_file_index: low.in_file_index,
invert_match: low.invert_match,
is_terminal_stdout: state.is_terminal_stdout,
line_number,
@@ -287,6 +293,7 @@ impl HiArgs {
mmap_choice,
multiline: low.multiline,
multiline_dotall: low.multiline_dotall,
multiline_window: low.multiline_window,
no_ignore_dot: low.no_ignore_dot,
no_ignore_exclude: low.no_ignore_exclude,
no_ignore_files: low.no_ignore_files,
@@ -311,6 +318,7 @@ impl HiArgs {
sort: low.sort,
stats,
stop_on_nonmatch: low.stop_on_nonmatch,
squash: low.squash,
threads,
trim: low.trim,
types,
@@ -517,7 +525,7 @@ impl HiArgs {
/// When this returns false, it is impossible for ripgrep to ever report
/// a match.
pub(crate) fn matches_possible(&self) -> bool {
if self.patterns.patterns.is_empty() {
if self.patterns.patterns.is_empty() && !self.invert_match {
return false;
}
if self.max_count == Some(0) {
@@ -562,7 +570,16 @@ impl HiArgs {
wtr: W,
) -> Printer<W> {
let summary_kind = if self.quiet {
SummaryKind::Quiet
match search_mode {
SearchMode::FilesWithMatches
| SearchMode::Count
| SearchMode::CountMatches
| SearchMode::JSON
| SearchMode::Standard => SummaryKind::QuietWithMatch,
SearchMode::FilesWithoutMatch => {
SummaryKind::QuietWithoutMatch
}
}
} else {
match search_mode {
SearchMode::FilesWithMatches => SummaryKind::PathWithMatch,
@@ -570,10 +587,10 @@ impl HiArgs {
SearchMode::Count => SummaryKind::Count,
SearchMode::CountMatches => SummaryKind::CountMatches,
SearchMode::JSON => {
return Printer::JSON(self.printer_json(wtr))
return Printer::JSON(self.printer_json(wtr));
}
SearchMode::Standard => {
return Printer::Standard(self.printer_standard(wtr))
return Printer::Standard(self.printer_standard(wtr));
}
}
};
@@ -587,8 +604,8 @@ impl HiArgs {
) -> grep::printer::JSON<W> {
grep::printer::JSONBuilder::new()
.pretty(false)
.max_matches(self.max_count)
.always_begin_end(false)
.replacement(self.replace.clone().map(|r| r.into()))
.build(wtr)
}
@@ -605,15 +622,16 @@ impl HiArgs {
.column(self.column)
.heading(self.heading)
.hyperlink(self.hyperlink_config.clone())
.in_file_index(self.in_file_index)
.max_columns_preview(self.max_columns_preview)
.max_columns(self.max_columns)
.max_matches(self.max_count)
.only_matching(self.only_matching)
.path(self.with_filename)
.path_terminator(self.path_terminator.clone())
.per_match_one_line(true)
.per_match(self.vimgrep)
.replacement(self.replace.clone().map(|r| r.into()))
.squash(self.squash)
.separator_context(self.context_separator.clone().into_bytes())
.separator_field_context(
self.field_context_separator.clone().into_bytes(),
@@ -647,7 +665,6 @@ impl HiArgs {
.exclude_zero(!self.include_zero)
.hyperlink(self.hyperlink_config.clone())
.kind(kind)
.max_matches(self.max_count)
.path(self.with_filename)
.path_terminator(self.path_terminator.clone())
.separator_field(b":".to_vec())
@@ -709,10 +726,12 @@ impl HiArgs {
};
let mut builder = grep::searcher::SearcherBuilder::new();
builder
.max_matches(self.max_count)
.line_terminator(line_term)
.invert_match(self.invert_match)
.line_number(self.line_number)
.multi_line(self.multiline)
.multiline_window(self.multiline_window)
.memory_map(self.mmap_choice.clone())
.stop_on_nonmatch(self.stop_on_nonmatch);
match self.context {
@@ -788,7 +807,7 @@ impl HiArgs {
attach_timestamps(haystacks, |md| md.created()).collect()
}
};
with_timestamps.sort_by(|(_, ref t1), (_, ref t2)| {
with_timestamps.sort_by(|(_, t1), (_, t2)| {
let ordering = match (*t1, *t2) {
// Both have metadata, do the obvious thing.
(Some(t1), Some(t2)) => t1.cmp(&t2),
@@ -799,11 +818,7 @@ impl HiArgs {
// When both error, we can't distinguish, so treat as equal.
(None, None) => Ordering::Equal,
};
if sort.reverse {
ordering.reverse()
} else {
ordering
}
if sort.reverse { ordering.reverse() } else { ordering }
});
Box::new(with_timestamps.into_iter().map(|(s, _)| s))
}
@@ -893,7 +908,8 @@ impl HiArgs {
.git_ignore(!self.no_ignore_vcs)
.git_exclude(!self.no_ignore_vcs && !self.no_ignore_exclude)
.require_git(!self.no_require_git)
.ignore_case_insensitive(self.ignore_file_case_insensitive);
.ignore_case_insensitive(self.ignore_file_case_insensitive)
.current_dir(&self.cwd);
if !self.no_ignore_dot {
builder.add_custom_ignore_filename(".rgignore");
}
@@ -943,10 +959,12 @@ impl State {
fn new() -> anyhow::Result<State> {
use std::io::IsTerminal;
let cwd = current_dir()?;
log::debug!("read CWD from environment: {}", cwd.display());
Ok(State {
is_terminal_stdout: std::io::stdout().is_terminal(),
stdin_consumed: false,
cwd: current_dir()?,
cwd,
})
}
}
@@ -1178,7 +1196,7 @@ fn types(low: &LowArgs) -> anyhow::Result<ignore::types::Types> {
let mut builder = ignore::types::TypesBuilder::new();
builder.add_defaults();
for tychange in low.type_changes.iter() {
match tychange {
match *tychange {
TypeChange::Clear { ref name } => {
builder.clear(name);
}

View File

@@ -9,7 +9,7 @@ use std::{
use {
bstr::{BString, ByteVec},
grep::printer::{HyperlinkFormat, UserColorSpec},
grep::printer::{HyperlinkFormat, SquashMode, UserColorSpec},
};
/// A collection of "low level" arguments.
@@ -65,6 +65,7 @@ pub(crate) struct LowArgs {
pub(crate) ignore_file: Vec<PathBuf>,
pub(crate) ignore_file_case_insensitive: bool,
pub(crate) include_zero: bool,
pub(crate) in_file_index: bool,
pub(crate) invert_match: bool,
pub(crate) line_number: Option<bool>,
pub(crate) logging: Option<LoggingMode>,
@@ -76,6 +77,7 @@ pub(crate) struct LowArgs {
pub(crate) mmap: MmapMode,
pub(crate) multiline: bool,
pub(crate) multiline_dotall: bool,
pub(crate) multiline_window: Option<usize>,
pub(crate) no_config: bool,
pub(crate) no_ignore_dot: bool,
pub(crate) no_ignore_exclude: bool,
@@ -101,6 +103,7 @@ pub(crate) struct LowArgs {
pub(crate) sort: Option<SortMode>,
pub(crate) stats: bool,
pub(crate) stop_on_nonmatch: bool,
pub(crate) squash: SquashMode,
pub(crate) threads: Option<usize>,
pub(crate) trim: bool,
pub(crate) type_changes: Vec<TypeChange>,
@@ -229,13 +232,14 @@ pub(crate) enum GenerateMode {
}
/// Indicates how ripgrep should treat binary data.
#[derive(Debug, Eq, PartialEq)]
#[derive(Debug, Default, Eq, PartialEq)]
pub(crate) enum BinaryMode {
/// Automatically determine the binary mode to use. Essentially, when
/// a file is searched explicitly, then it will be searched using the
/// `SearchAndSuppress` strategy. Otherwise, it will be searched in a way
/// that attempts to skip binary files as much as possible. That is, once
/// a file is classified as binary, searching will immediately stop.
#[default]
Auto,
/// Search files even when they have binary data, but if a match is found,
/// suppress it and emit a warning.
@@ -251,12 +255,6 @@ pub(crate) enum BinaryMode {
AsText,
}
impl Default for BinaryMode {
fn default() -> BinaryMode {
BinaryMode::Auto
}
}
/// Indicates what kind of boundary mode to use (line or word).
#[derive(Debug, Eq, PartialEq)]
pub(crate) enum BoundaryMode {
@@ -269,10 +267,11 @@ pub(crate) enum BoundaryMode {
/// Indicates the buffer mode that ripgrep should use when printing output.
///
/// The default is `Auto`.
#[derive(Debug, Eq, PartialEq)]
#[derive(Debug, Default, Eq, PartialEq)]
pub(crate) enum BufferMode {
/// Select the buffer mode, 'line' or 'block', automatically based on
/// whether stdout is connected to a tty.
#[default]
Auto,
/// Flush the output buffer whenever a line terminator is seen.
///
@@ -287,18 +286,13 @@ pub(crate) enum BufferMode {
Block,
}
impl Default for BufferMode {
fn default() -> BufferMode {
BufferMode::Auto
}
}
/// Indicates the case mode for how to interpret all patterns given to ripgrep.
///
/// The default is `Sensitive`.
#[derive(Debug, Eq, PartialEq)]
#[derive(Debug, Default, Eq, PartialEq)]
pub(crate) enum CaseMode {
/// Patterns are matched case sensitively. i.e., `a` does not match `A`.
#[default]
Sensitive,
/// Patterns are matched case insensitively. i.e., `a` does match `A`.
Insensitive,
@@ -308,21 +302,16 @@ pub(crate) enum CaseMode {
Smart,
}
impl Default for CaseMode {
fn default() -> CaseMode {
CaseMode::Sensitive
}
}
/// Indicates whether ripgrep should include color/hyperlinks in its output.
///
/// The default is `Auto`.
#[derive(Debug, Eq, PartialEq)]
#[derive(Debug, Default, Eq, PartialEq)]
pub(crate) enum ColorChoice {
/// Color and hyperlinks will never be used.
Never,
/// Color and hyperlinks will be used only when stdout is connected to a
/// tty.
#[default]
Auto,
/// Color will always be used.
Always,
@@ -335,12 +324,6 @@ pub(crate) enum ColorChoice {
Ansi,
}
impl Default for ColorChoice {
fn default() -> ColorChoice {
ColorChoice::Auto
}
}
impl ColorChoice {
/// Convert this color choice to the corresponding termcolor type.
pub(crate) fn to_termcolor(&self) -> termcolor::ColorChoice {
@@ -529,9 +512,10 @@ impl ContextSeparator {
/// The encoding mode the searcher will use.
///
/// The default is `Auto`.
#[derive(Debug, Eq, PartialEq)]
#[derive(Debug, Default, Eq, PartialEq)]
pub(crate) enum EncodingMode {
/// Use only BOM sniffing to auto-detect an encoding.
#[default]
Auto,
/// Use an explicit encoding forcefully, but let BOM sniffing override it.
Some(grep::searcher::Encoding),
@@ -541,21 +525,16 @@ pub(crate) enum EncodingMode {
Disabled,
}
impl Default for EncodingMode {
fn default() -> EncodingMode {
EncodingMode::Auto
}
}
/// The regex engine to use.
///
/// The default is `Default`.
#[derive(Debug, Eq, PartialEq)]
#[derive(Debug, Default, Eq, PartialEq)]
pub(crate) enum EngineChoice {
/// Uses the default regex engine: Rust's `regex` crate.
///
/// (Well, technically it uses `regex-automata`, but `regex-automata` is
/// the implementation of the `regex` crate.)
#[default]
Default,
/// Dynamically select the right engine to use.
///
@@ -566,12 +545,6 @@ pub(crate) enum EngineChoice {
PCRE2,
}
impl Default for EngineChoice {
fn default() -> EngineChoice {
EngineChoice::Default
}
}
/// The field context separator to use to between metadata for each contextual
/// line.
///
@@ -651,10 +624,11 @@ pub(crate) enum LoggingMode {
/// Indicates when to use memory maps.
///
/// The default is `Auto`.
#[derive(Debug, Eq, PartialEq)]
#[derive(Debug, Default, Eq, PartialEq)]
pub(crate) enum MmapMode {
/// This instructs ripgrep to use heuristics for selecting when to and not
/// to use memory maps for searching.
#[default]
Auto,
/// This instructs ripgrep to always try memory maps when possible. (Memory
/// maps are not possible to use in all circumstances, for example, for
@@ -666,12 +640,6 @@ pub(crate) enum MmapMode {
Never,
}
impl Default for MmapMode {
fn default() -> MmapMode {
MmapMode::Auto
}
}
/// Represents a source of patterns that ripgrep should search for.
///
/// The reason to unify these is so that we can retain the order of `-f/--flag`

View File

@@ -36,7 +36,7 @@ pub(crate) use crate::flags::{
},
hiargs::HiArgs,
lowargs::{GenerateMode, Mode, SearchMode, SpecialMode},
parse::{parse, ParseResult},
parse::{ParseResult, parse},
};
mod complete;

View File

@@ -7,10 +7,10 @@ use std::{borrow::Cow, collections::BTreeSet, ffi::OsString};
use anyhow::Context;
use crate::flags::{
Flag, FlagValue,
defs::FLAGS,
hiargs::HiArgs,
lowargs::{LoggingMode, LowArgs, SpecialMode},
Flag, FlagValue,
};
/// The result of parsing CLI arguments.

View File

@@ -37,7 +37,7 @@ mod search;
// i686.
#[cfg(all(target_env = "musl", target_pointer_width = "64"))]
#[global_allocator]
static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
static ALLOC: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
/// Then, as it was, then again it will be.
fn main() -> ExitCode {
@@ -468,7 +468,7 @@ fn print_stats<W: Write>(
{bytes_printed} bytes printed
{bytes_searched} bytes searched
{search_time:0.6} seconds spent searching
{process_time:0.6} seconds
{process_time:0.6} seconds total
",
matches = stats.matches(),
lines = stats.matched_lines(),

View File

@@ -99,19 +99,19 @@ macro_rules! ignore_message {
/// Returns true if and only if messages should be shown.
pub(crate) fn messages() -> bool {
MESSAGES.load(Ordering::SeqCst)
MESSAGES.load(Ordering::Relaxed)
}
/// Set whether messages should be shown or not.
///
/// By default, they are not shown.
pub(crate) fn set_messages(yes: bool) {
MESSAGES.store(yes, Ordering::SeqCst)
MESSAGES.store(yes, Ordering::Relaxed)
}
/// Returns true if and only if "ignore" related messages should be shown.
pub(crate) fn ignore_messages() -> bool {
IGNORE_MESSAGES.load(Ordering::SeqCst)
IGNORE_MESSAGES.load(Ordering::Relaxed)
}
/// Set whether "ignore" related messages should be shown or not.
@@ -122,12 +122,12 @@ pub(crate) fn ignore_messages() -> bool {
/// `messages` is disabled, then "ignore" messages are never shown, regardless
/// of this setting.
pub(crate) fn set_ignore_messages(yes: bool) {
IGNORE_MESSAGES.store(yes, Ordering::SeqCst)
IGNORE_MESSAGES.store(yes, Ordering::Relaxed)
}
/// Returns true if and only if ripgrep came across a non-fatal error.
pub(crate) fn errored() -> bool {
ERRORED.load(Ordering::SeqCst)
ERRORED.load(Ordering::Relaxed)
}
/// Indicate that ripgrep has come across a non-fatal error.
@@ -135,5 +135,5 @@ pub(crate) fn errored() -> bool {
/// Callers should not use this directly. Instead, it is called automatically
/// via the `err_message` macro.
pub(crate) fn set_errored() {
ERRORED.store(true, Ordering::SeqCst);
ERRORED.store(true, Ordering::Relaxed);
}

View File

@@ -41,7 +41,6 @@ impl Default for Config {
pub(crate) struct SearchWorkerBuilder {
config: Config,
command_builder: grep::cli::CommandReaderBuilder,
decomp_builder: grep::cli::DecompressionReaderBuilder,
}
impl Default for SearchWorkerBuilder {
@@ -53,17 +52,10 @@ impl Default for SearchWorkerBuilder {
impl SearchWorkerBuilder {
/// Create a new builder for configuring and constructing a search worker.
pub(crate) fn new() -> SearchWorkerBuilder {
let mut cmd_builder = grep::cli::CommandReaderBuilder::new();
cmd_builder.async_stderr(true);
let mut command_builder = grep::cli::CommandReaderBuilder::new();
command_builder.async_stderr(true);
let mut decomp_builder = grep::cli::DecompressionReaderBuilder::new();
decomp_builder.async_stderr(true);
SearchWorkerBuilder {
config: Config::default(),
command_builder: cmd_builder,
decomp_builder,
}
SearchWorkerBuilder { config: Config::default(), command_builder }
}
/// Create a new search worker using the given searcher, matcher and
@@ -76,7 +68,12 @@ impl SearchWorkerBuilder {
) -> SearchWorker<W> {
let config = self.config.clone();
let command_builder = self.command_builder.clone();
let decomp_builder = self.decomp_builder.clone();
let decomp_builder = config.search_zip.then(|| {
let mut decomp_builder =
grep::cli::DecompressionReaderBuilder::new();
decomp_builder.async_stderr(true);
decomp_builder
});
SearchWorker {
config,
command_builder,
@@ -233,7 +230,11 @@ impl<W: WriteColor> Printer<W> {
pub(crate) struct SearchWorker<W> {
config: Config,
command_builder: grep::cli::CommandReaderBuilder,
decomp_builder: grep::cli::DecompressionReaderBuilder,
/// This is `None` when `search_zip` is not enabled, since in this case it
/// can never be used. We do this because building the reader can sometimes
/// do non-trivial work (like resolving the paths of decompression binaries
/// on Windows).
decomp_builder: Option<grep::cli::DecompressionReaderBuilder>,
matcher: PatternMatcher,
searcher: grep::searcher::Searcher,
printer: Printer<W>,
@@ -273,10 +274,9 @@ impl<W: WriteColor> SearchWorker<W> {
/// Returns true if and only if the given file path should be
/// decompressed before searching.
fn should_decompress(&self, path: &Path) -> bool {
if !self.config.search_zip {
return false;
}
self.decomp_builder.get_matcher().has_command(path)
self.decomp_builder.as_ref().is_some_and(|decomp_builder| {
decomp_builder.get_matcher().has_command(path)
})
}
/// Returns true if and only if the given file path should be run through
@@ -307,15 +307,14 @@ impl<W: WriteColor> SearchWorker<W> {
io::Error::new(
io::ErrorKind::Other,
format!(
"preprocessor command could not start: '{:?}': {}",
cmd, err,
"preprocessor command could not start: '{cmd:?}': {err}",
),
)
})?;
let result = self.search_reader(path, &mut rdr).map_err(|err| {
io::Error::new(
io::ErrorKind::Other,
format!("preprocessor command failed: '{:?}': {}", cmd, err),
format!("preprocessor command failed: '{cmd:?}': {err}"),
)
});
let close_result = rdr.close();
@@ -328,7 +327,10 @@ impl<W: WriteColor> SearchWorker<W> {
/// result. If the given file path isn't recognized as a compressed file,
/// then search it without doing any decompression.
fn search_decompress(&mut self, path: &Path) -> io::Result<SearchResult> {
let mut rdr = self.decomp_builder.build(path)?;
let Some(ref decomp_builder) = self.decomp_builder else {
return self.search_path(path);
};
let mut rdr = decomp_builder.build(path)?;
let result = self.search_reader(path, &mut rdr);
let close_result = rdr.close();
let search_result = result?;

View File

@@ -1,6 +1,6 @@
[package]
name = "globset"
version = "0.4.15" #:version
version = "0.4.18" #:version
authors = ["Andrew Gallant <jamslam@gmail.com>"]
description = """
Cross platform single glob and glob set matching. Glob set matching is the
@@ -13,7 +13,7 @@ repository = "https://github.com/BurntSushi/ripgrep/tree/master/crates/globset"
readme = "README.md"
keywords = ["regex", "glob", "multiple", "set", "pattern"]
license = "Unlicense OR MIT"
edition = "2021"
edition = "2024"
[lib]
name = "globset"
@@ -21,6 +21,7 @@ bench = false
[dependencies]
aho-corasick = "1.1.1"
arbitrary = { version = "1.3.2", optional = true, features = ["derive"] }
bstr = { version = "1.6.2", default-features = false, features = ["std"] }
log = { version = "0.4.20", optional = true }
serde = { version = "1.0.188", optional = true }
@@ -41,6 +42,7 @@ serde_json = "1.0.107"
[features]
default = ["log"]
arbitrary = ["dep:arbitrary"]
# DEPRECATED. It is a no-op. SIMD is done automatically through runtime
# dispatch.
simd-accel = []

View File

@@ -1,9 +1,9 @@
use std::fmt::Write;
use std::path::{is_separator, Path};
use std::path::{Path, is_separator};
use regex_automata::meta::Regex;
use crate::{new_regex, Candidate, Error, ErrorKind};
use crate::{Candidate, Error, ErrorKind, new_regex};
/// Describes a matching strategy for a particular pattern.
///
@@ -71,7 +71,8 @@ impl MatchStrategy {
///
/// It cannot be used directly to match file paths, but it can be converted
/// to a regular expression string or a matcher.
#[derive(Clone, Debug, Eq)]
#[derive(Clone, Eq)]
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
pub struct Glob {
glob: String,
re: String,
@@ -79,6 +80,12 @@ pub struct Glob {
tokens: Tokens,
}
impl AsRef<Glob> for Glob {
fn as_ref(&self) -> &Glob {
self
}
}
impl PartialEq for Glob {
fn eq(&self, other: &Glob) -> bool {
self.glob == other.glob && self.opts == other.opts
@@ -92,6 +99,21 @@ impl std::hash::Hash for Glob {
}
}
impl std::fmt::Debug for Glob {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
if f.alternate() {
f.debug_struct("Glob")
.field("glob", &self.glob)
.field("re", &self.re)
.field("opts", &self.opts)
.field("tokens", &self.tokens)
.finish()
} else {
f.debug_tuple("Glob").field(&self.glob).finish()
}
}
}
impl std::fmt::Display for Glob {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.glob.fmt(f)
@@ -194,6 +216,7 @@ pub struct GlobBuilder<'a> {
}
#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
struct GlobOptions {
/// Whether to match case insensitively.
case_insensitive: bool,
@@ -206,6 +229,11 @@ struct GlobOptions {
/// Whether or not an empty case in an alternate will be removed.
/// e.g., when enabled, `{,a}` will match "" and "a".
empty_alternates: bool,
/// Whether or not an unclosed character class is allowed. When an unclosed
/// character class is found, the opening `[` is treated as a literal `[`.
/// When this isn't enabled, an opening `[` without a corresponding `]` is
/// treated as an error.
allow_unclosed_class: bool,
}
impl GlobOptions {
@@ -215,11 +243,13 @@ impl GlobOptions {
literal_separator: false,
backslash_escape: !is_separator('\\'),
empty_alternates: false,
allow_unclosed_class: false,
}
}
}
#[derive(Clone, Debug, Default, Eq, PartialEq)]
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
struct Tokens(Vec<Token>);
impl std::ops::Deref for Tokens {
@@ -236,6 +266,7 @@ impl std::ops::DerefMut for Tokens {
}
#[derive(Clone, Debug, Eq, PartialEq)]
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
enum Token {
Literal(char),
Any,
@@ -309,11 +340,7 @@ impl Glob {
let Token::Literal(c) = *t else { return None };
lit.push(c);
}
if lit.is_empty() {
None
} else {
Some(lit)
}
if lit.is_empty() { None } else { Some(lit) }
}
/// Returns an extension if this pattern matches a file path if and only
@@ -354,11 +381,7 @@ impl Glob {
_ => return None,
}
}
if lit.is_empty() {
None
} else {
Some(lit)
}
if lit.is_empty() { None } else { Some(lit) }
}
/// This is like `ext`, but returns an extension even if it isn't sufficient
@@ -421,11 +444,7 @@ impl Glob {
if need_sep {
lit.push('/');
}
if lit.is_empty() {
None
} else {
Some(lit)
}
if lit.is_empty() { None } else { Some(lit) }
}
/// Returns a literal suffix of this pattern if the entire pattern matches
@@ -474,11 +493,7 @@ impl Glob {
let Token::Literal(c) = *t else { return None };
lit.push(c);
}
if lit.is_empty() || lit == "/" {
None
} else {
Some((lit, entire))
}
if lit.is_empty() || lit == "/" { None } else { Some((lit, entire)) }
}
/// If this pattern only needs to inspect the basename of a file path,
@@ -564,25 +579,27 @@ impl<'a> GlobBuilder<'a> {
pub fn build(&self) -> Result<Glob, Error> {
let mut p = Parser {
glob: &self.glob,
stack: vec![Tokens::default()],
alternates_stack: Vec::new(),
branches: vec![Tokens::default()],
chars: self.glob.chars().peekable(),
prev: None,
cur: None,
found_unclosed_class: false,
opts: &self.opts,
};
p.parse()?;
if p.stack.is_empty() {
Err(Error {
glob: Some(self.glob.to_string()),
kind: ErrorKind::UnopenedAlternates,
})
} else if p.stack.len() > 1 {
if p.branches.is_empty() {
// OK because of how the the branches/alternate_stack are managed.
// If we end up here, then there *must* be a bug in the parser
// somewhere.
unreachable!()
} else if p.branches.len() > 1 {
Err(Error {
glob: Some(self.glob.to_string()),
kind: ErrorKind::UnclosedAlternates,
})
} else {
let tokens = p.stack.pop().unwrap();
let tokens = p.branches.pop().unwrap();
Ok(Glob {
glob: self.glob.to_string(),
re: tokens.to_regex_with(&self.opts),
@@ -631,6 +648,22 @@ impl<'a> GlobBuilder<'a> {
self.opts.empty_alternates = yes;
self
}
/// Toggle whether unclosed character classes are allowed. When allowed,
/// a `[` without a matching `]` is treated literally instead of resulting
/// in a parse error.
///
/// For example, if this is set then the glob `[abc` will be treated as the
/// literal string `[abc` instead of returning an error.
///
/// By default, this is false. Generally speaking, enabling this leads to
/// worse failure modes since the glob parser becomes more permissive. You
/// might want to enable this when compatibility (e.g., with POSIX glob
/// implementations) is more important than good error messages.
pub fn allow_unclosed_class(&mut self, yes: bool) -> &mut GlobBuilder<'a> {
self.opts.allow_unclosed_class = yes;
self
}
}
impl Tokens {
@@ -756,11 +789,29 @@ fn bytes_to_escaped_literal(bs: &[u8]) -> String {
}
struct Parser<'a> {
/// The glob to parse.
glob: &'a str,
stack: Vec<Tokens>,
/// Marks the index in `stack` where the alternation started.
alternates_stack: Vec<usize>,
/// The set of active alternation branches being parsed.
/// Tokens are added to the end of the last one.
branches: Vec<Tokens>,
/// A character iterator over the glob pattern to parse.
chars: std::iter::Peekable<std::str::Chars<'a>>,
/// The previous character seen.
prev: Option<char>,
/// The current character.
cur: Option<char>,
/// Whether we failed to find a closing `]` for a character
/// class. This can only be true when `GlobOptions::allow_unclosed_class`
/// is enabled. When enabled, it is impossible to ever parse another
/// character class with this glob. That's because classes cannot be
/// nested *and* the only way this happens is when there is never a `]`.
///
/// We track this state so that we don't end up spending quadratic time
/// trying to parse something like `[[[[[[[[[[[[[[[[[[[[[[[...`.
found_unclosed_class: bool,
/// Glob options, which may influence parsing.
opts: &'a GlobOptions,
}
@@ -774,7 +825,7 @@ impl<'a> Parser<'a> {
match c {
'?' => self.push_token(Token::Any)?,
'*' => self.parse_star()?,
'[' => self.parse_class()?,
'[' if !self.found_unclosed_class => self.parse_class()?,
'{' => self.push_alternate()?,
'}' => self.pop_alternate()?,
',' => self.parse_comma()?,
@@ -786,36 +837,37 @@ impl<'a> Parser<'a> {
}
fn push_alternate(&mut self) -> Result<(), Error> {
if self.stack.len() > 1 {
return Err(self.error(ErrorKind::NestedAlternates));
}
Ok(self.stack.push(Tokens::default()))
self.alternates_stack.push(self.branches.len());
self.branches.push(Tokens::default());
Ok(())
}
fn pop_alternate(&mut self) -> Result<(), Error> {
let mut alts = vec![];
while self.stack.len() >= 2 {
alts.push(self.stack.pop().unwrap());
}
self.push_token(Token::Alternates(alts))
let Some(start) = self.alternates_stack.pop() else {
return Err(self.error(ErrorKind::UnopenedAlternates));
};
assert!(start <= self.branches.len());
let alts = Token::Alternates(self.branches.drain(start..).collect());
self.push_token(alts)?;
Ok(())
}
fn push_token(&mut self, tok: Token) -> Result<(), Error> {
if let Some(ref mut pat) = self.stack.last_mut() {
if let Some(ref mut pat) = self.branches.last_mut() {
return Ok(pat.push(tok));
}
Err(self.error(ErrorKind::UnopenedAlternates))
}
fn pop_token(&mut self) -> Result<Token, Error> {
if let Some(ref mut pat) = self.stack.last_mut() {
if let Some(ref mut pat) = self.branches.last_mut() {
return Ok(pat.pop().unwrap());
}
Err(self.error(ErrorKind::UnopenedAlternates))
}
fn have_tokens(&self) -> Result<bool, Error> {
match self.stack.last() {
match self.branches.last() {
None => Err(self.error(ErrorKind::UnopenedAlternates)),
Some(ref pat) => Ok(!pat.is_empty()),
}
@@ -824,11 +876,11 @@ impl<'a> Parser<'a> {
fn parse_comma(&mut self) -> Result<(), Error> {
// If we aren't inside a group alternation, then don't
// treat commas specially. Otherwise, we need to start
// a new alternate.
if self.stack.len() <= 1 {
// a new alternate branch.
if self.alternates_stack.is_empty() {
self.push_token(Token::Literal(','))
} else {
Ok(self.stack.push(Tokens::default()))
Ok(self.branches.push(Tokens::default()))
}
}
@@ -865,7 +917,7 @@ impl<'a> Parser<'a> {
}
if !prev.map(is_separator).unwrap_or(false) {
if self.stack.len() <= 1
if self.branches.len() <= 1
|| (prev != Some(',') && prev != Some('{'))
{
self.push_token(Token::ZeroOrMore)?;
@@ -878,7 +930,7 @@ impl<'a> Parser<'a> {
assert!(self.bump().is_none());
true
}
Some(',') | Some('}') if self.stack.len() >= 2 => true,
Some(',') | Some('}') if self.branches.len() >= 2 => true,
Some(c) if is_separator(c) => {
assert!(self.bump().map(is_separator).unwrap_or(false));
false
@@ -908,6 +960,11 @@ impl<'a> Parser<'a> {
}
fn parse_class(&mut self) -> Result<(), Error> {
// Save parser state for potential rollback to literal '[' parsing.
let saved_chars = self.chars.clone();
let saved_prev = self.prev;
let saved_cur = self.cur;
fn add_to_last_range(
glob: &str,
r: &mut (char, char),
@@ -935,11 +992,17 @@ impl<'a> Parser<'a> {
let mut first = true;
let mut in_range = false;
loop {
let c = match self.bump() {
Some(c) => c,
// The only way to successfully break this loop is to observe
// a ']'.
None => return Err(self.error(ErrorKind::UnclosedClass)),
let Some(c) = self.bump() else {
return if self.opts.allow_unclosed_class == true {
self.chars = saved_chars;
self.cur = saved_cur;
self.prev = saved_prev;
self.found_unclosed_class = true;
self.push_token(Token::Literal('['))
} else {
Err(self.error(ErrorKind::UnclosedClass))
};
};
match c {
']' => {
@@ -1024,6 +1087,7 @@ mod tests {
litsep: Option<bool>,
bsesc: Option<bool>,
ealtre: Option<bool>,
unccls: Option<bool>,
}
macro_rules! syntax {
@@ -1066,6 +1130,10 @@ mod tests {
if let Some(ealtre) = $options.ealtre {
builder.empty_alternates(ealtre);
}
if let Some(unccls) = $options.unccls {
builder.allow_unclosed_class(unccls);
}
let pat = builder.build().unwrap();
assert_eq!(format!("(?-u){}", $re), pat.regex());
}
@@ -1206,25 +1274,80 @@ mod tests {
syntaxerr!(err_unclosed4, "[!]", ErrorKind::UnclosedClass);
syntaxerr!(err_range1, "[z-a]", ErrorKind::InvalidRange('z', 'a'));
syntaxerr!(err_range2, "[z--]", ErrorKind::InvalidRange('z', '-'));
syntaxerr!(err_alt1, "{a,b", ErrorKind::UnclosedAlternates);
syntaxerr!(err_alt2, "{a,{b,c}", ErrorKind::UnclosedAlternates);
syntaxerr!(err_alt3, "a,b}", ErrorKind::UnopenedAlternates);
syntaxerr!(err_alt4, "{a,b}}", ErrorKind::UnopenedAlternates);
const CASEI: Options =
Options { casei: Some(true), litsep: None, bsesc: None, ealtre: None };
const SLASHLIT: Options =
Options { casei: None, litsep: Some(true), bsesc: None, ealtre: None };
const CASEI: Options = Options {
casei: Some(true),
litsep: None,
bsesc: None,
ealtre: None,
unccls: None,
};
const SLASHLIT: Options = Options {
casei: None,
litsep: Some(true),
bsesc: None,
ealtre: None,
unccls: None,
};
const NOBSESC: Options = Options {
casei: None,
litsep: None,
bsesc: Some(false),
ealtre: None,
unccls: None,
};
const BSESC: Options = Options {
casei: None,
litsep: None,
bsesc: Some(true),
ealtre: None,
unccls: None,
};
const BSESC: Options =
Options { casei: None, litsep: None, bsesc: Some(true), ealtre: None };
const EALTRE: Options = Options {
casei: None,
litsep: None,
bsesc: Some(true),
ealtre: Some(true),
unccls: None,
};
const UNCCLS: Options = Options {
casei: None,
litsep: None,
bsesc: None,
ealtre: None,
unccls: Some(true),
};
toregex!(allow_unclosed_class_single, r"[", r"^\[$", &UNCCLS);
toregex!(allow_unclosed_class_many, r"[abc", r"^\[abc$", &UNCCLS);
toregex!(allow_unclosed_class_empty1, r"[]", r"^\[\]$", &UNCCLS);
toregex!(allow_unclosed_class_empty2, r"[][", r"^\[\]\[$", &UNCCLS);
toregex!(allow_unclosed_class_negated_unclosed, r"[!", r"^\[!$", &UNCCLS);
toregex!(allow_unclosed_class_negated_empty, r"[!]", r"^\[!\]$", &UNCCLS);
toregex!(
allow_unclosed_class_brace1,
r"{[abc,xyz}",
r"^(?:\[abc|xyz)$",
&UNCCLS
);
toregex!(
allow_unclosed_class_brace2,
r"{[abc,[xyz}",
r"^(?:\[abc|\[xyz)$",
&UNCCLS
);
toregex!(
allow_unclosed_class_brace3,
r"{[abc],[xyz}",
r"^(?:[abc]|\[xyz)$",
&UNCCLS
);
toregex!(re_empty, "", "^$");
toregex!(re_casei, "a", "(?i)^a$", &CASEI);
@@ -1265,7 +1388,9 @@ mod tests {
toregex!(re32, "/a**", r"^/a.*.*$");
toregex!(re33, "/**a", r"^/.*.*a$");
toregex!(re34, "/a**b", r"^/a.*.*b$");
toregex!(re35, "{a,b}", r"^(?:b|a)$");
toregex!(re35, "{a,b}", r"^(?:a|b)$");
toregex!(re36, "{a,{b,c}}", r"^(?:a|(?:b|c))$");
toregex!(re37, "{{a,b},{c,d}}", r"^(?:(?:a|b)|(?:c|d))$");
matches!(match1, "a", "a");
matches!(match2, "a*b", "a_b");
@@ -1353,6 +1478,9 @@ mod tests {
matches!(matchalt14, "foo{,.txt}", "foo.txt");
nmatches!(matchalt15, "foo{,.txt}", "foo");
matches!(matchalt16, "foo{,.txt}", "foo", EALTRE);
matches!(matchalt17, "{a,b{c,d}}", "bc");
matches!(matchalt18, "{a,b{c,d}}", "bd");
matches!(matchalt19, "{a,b{c,d}}", "a");
matches!(matchslash1, "abc/def", "abc/def", SLASHLIT);
#[cfg(unix)]

View File

@@ -94,6 +94,19 @@ Standard Unix-style glob syntax is supported:
A `GlobBuilder` can be used to prevent wildcards from matching path separators,
or to enable case insensitive matching.
# Crate Features
This crate includes optional features that can be enabled if necessary.
These features are not required but may be useful depending on the use case.
The following features are available:
* **arbitrary** -
Enabling this feature introduces a public dependency on the
[`arbitrary`](https://crates.io/crates/arbitrary)
crate. Namely, it implements the `Arbitrary` trait from that crate for the
[`Glob`] type. This feature is disabled by default.
*/
#![deny(missing_docs)]
@@ -107,11 +120,11 @@ use std::{
use {
aho_corasick::AhoCorasick,
bstr::{ByteSlice, ByteVec, B},
bstr::{B, ByteSlice, ByteVec},
regex_automata::{
PatternSet,
meta::Regex,
util::pool::{Pool, PoolGuard},
PatternSet,
},
};
@@ -150,6 +163,7 @@ pub struct Error {
/// The kind of error that can occur when parsing a glob pattern.
#[derive(Clone, Debug, Eq, PartialEq)]
#[non_exhaustive]
pub enum ErrorKind {
/// **DEPRECATED**.
///
@@ -169,20 +183,16 @@ pub enum ErrorKind {
UnopenedAlternates,
/// Occurs when a `{` is found without a matching `}`.
UnclosedAlternates,
/// Occurs when an alternating group is nested inside another alternating
/// group, e.g., `{{a,b},{c,d}}`.
/// **DEPRECATED**.
///
/// This error used to occur when an alternating group was nested inside
/// another alternating group, e.g., `{{a,b},{c,d}}`. However, this is now
/// supported and as such this error cannot occur.
NestedAlternates,
/// Occurs when an unescaped '\' is found at the end of a glob.
DanglingEscape,
/// An error associated with parsing or compiling a regex.
Regex(String),
/// Hints that destructuring should not be exhaustive.
///
/// This enum may grow additional variants, so this makes sure clients
/// don't count on exhaustive matching. (Otherwise, adding a new variant
/// could break existing code.)
#[doc(hidden)]
__Nonexhaustive,
}
impl std::error::Error for Error {
@@ -226,7 +236,6 @@ impl ErrorKind {
}
ErrorKind::DanglingEscape => "dangling '\\'",
ErrorKind::Regex(ref err) => err,
ErrorKind::__Nonexhaustive => unreachable!(),
}
}
}
@@ -255,7 +264,6 @@ impl std::fmt::Display for ErrorKind {
ErrorKind::InvalidRange(s, e) => {
write!(f, "invalid range; '{}' > '{}'", s, e)
}
ErrorKind::__Nonexhaustive => unreachable!(),
}
}
}
@@ -314,7 +322,7 @@ impl GlobSet {
/// Create an empty `GlobSet`. An empty set matches nothing.
#[inline]
pub fn empty() -> GlobSet {
pub const fn empty() -> GlobSet {
GlobSet { len: 0, strats: vec![] }
}
@@ -351,6 +359,43 @@ impl GlobSet {
false
}
/// Returns true if all globs in this set match the path given.
///
/// This will return true if the set of globs is empty, as in that case all
/// `0` of the globs will match.
///
/// ```
/// use globset::{Glob, GlobSetBuilder};
///
/// let mut builder = GlobSetBuilder::new();
/// builder.add(Glob::new("src/*").unwrap());
/// builder.add(Glob::new("**/*.rs").unwrap());
/// let set = builder.build().unwrap();
///
/// assert!(set.matches_all("src/foo.rs"));
/// assert!(!set.matches_all("src/bar.c"));
/// assert!(!set.matches_all("test.rs"));
/// ```
pub fn matches_all<P: AsRef<Path>>(&self, path: P) -> bool {
self.matches_all_candidate(&Candidate::new(path.as_ref()))
}
/// Returns ture if all globs in this set match the path given.
///
/// This takes a Candidate as input, which can be used to amortize the cost
/// of peparing a path for matching.
///
/// This will return true if the set of globs is empty, as in that case all
/// `0` of the globs will match.
pub fn matches_all_candidate(&self, path: &Candidate<'_>) -> bool {
for strat in &self.strats {
if !strat.is_match(path) {
return false;
}
}
true
}
/// Returns the sequence number of every glob pattern that matches the
/// given path.
pub fn matches<P: AsRef<Path>>(&self, path: P) -> Vec<usize> {
@@ -410,10 +455,20 @@ impl GlobSet {
into.dedup();
}
fn new(pats: &[Glob]) -> Result<GlobSet, Error> {
if pats.is_empty() {
return Ok(GlobSet { len: 0, strats: vec![] });
/// Builds a new matcher from a collection of Glob patterns.
///
/// Once a matcher is built, no new patterns can be added to it.
pub fn new<I, G>(globs: I) -> Result<GlobSet, Error>
where
I: IntoIterator<Item = G>,
G: AsRef<Glob>,
{
let mut it = globs.into_iter().peekable();
if it.peek().is_none() {
return Ok(GlobSet::empty());
}
let mut len = 0;
let mut lits = LiteralStrategy::new();
let mut base_lits = BasenameLiteralStrategy::new();
let mut exts = ExtensionStrategy::new();
@@ -421,7 +476,10 @@ impl GlobSet {
let mut suffixes = MultiStrategyBuilder::new();
let mut required_exts = RequiredExtensionStrategyBuilder::new();
let mut regexes = MultiStrategyBuilder::new();
for (i, p) in pats.iter().enumerate() {
for (i, p) in it.enumerate() {
len += 1;
let p = p.as_ref();
match MatchStrategy::new(p) {
MatchStrategy::Literal(lit) => {
lits.add(i, lit);
@@ -445,7 +503,11 @@ impl GlobSet {
required_exts.add(i, ext, p.regex().to_owned());
}
MatchStrategy::Regex => {
debug!("glob converted to regex: {:?}", p);
debug!(
"glob `{:?}` converted to regex: `{:?}`",
p,
p.regex()
);
regexes.add(i, p.regex().to_owned());
}
}
@@ -461,20 +523,33 @@ impl GlobSet {
required_exts.0.len(),
regexes.literals.len()
);
Ok(GlobSet {
len: pats.len(),
strats: vec![
GlobSetMatchStrategy::Extension(exts),
GlobSetMatchStrategy::BasenameLiteral(base_lits),
GlobSetMatchStrategy::Literal(lits),
GlobSetMatchStrategy::Suffix(suffixes.suffix()),
GlobSetMatchStrategy::Prefix(prefixes.prefix()),
GlobSetMatchStrategy::RequiredExtension(
required_exts.build()?,
),
GlobSetMatchStrategy::Regex(regexes.regex_set()?),
],
})
let mut strats = Vec::with_capacity(7);
// Only add strategies that are populated
if !exts.0.is_empty() {
strats.push(GlobSetMatchStrategy::Extension(exts));
}
if !base_lits.0.is_empty() {
strats.push(GlobSetMatchStrategy::BasenameLiteral(base_lits));
}
if !lits.0.is_empty() {
strats.push(GlobSetMatchStrategy::Literal(lits));
}
if !suffixes.is_empty() {
strats.push(GlobSetMatchStrategy::Suffix(suffixes.suffix()));
}
if !prefixes.is_empty() {
strats.push(GlobSetMatchStrategy::Prefix(prefixes.prefix()));
}
if !required_exts.0.is_empty() {
strats.push(GlobSetMatchStrategy::RequiredExtension(
required_exts.build()?,
));
}
if !regexes.is_empty() {
strats.push(GlobSetMatchStrategy::Regex(regexes.regex_set()?));
}
Ok(GlobSet { len, strats })
}
}
@@ -504,7 +579,7 @@ impl GlobSetBuilder {
///
/// Once a matcher is built, no new patterns can be added to it.
pub fn build(&self) -> Result<GlobSet, Error> {
GlobSet::new(&self.pats)
GlobSet::new(self.pats.iter())
}
/// Add a new pattern to this set.
@@ -540,18 +615,30 @@ impl<'a> std::fmt::Debug for Candidate<'a> {
impl<'a> Candidate<'a> {
/// Create a new candidate for matching from the given path.
pub fn new<P: AsRef<Path> + ?Sized>(path: &'a P) -> Candidate<'a> {
let path = normalize_path(Vec::from_path_lossy(path.as_ref()));
Self::from_cow(Vec::from_path_lossy(path.as_ref()))
}
/// Create a new candidate for matching from the given path as a sequence
/// of bytes.
///
/// Generally speaking, this routine expects the bytes to be
/// _conventionally_ UTF-8. It is legal for the byte sequence to contain
/// invalid UTF-8. However, if the bytes are in some other encoding that
/// isn't ASCII compatible (for example, UTF-16), then the results of
/// matching are unspecified.
pub fn from_bytes<P: AsRef<[u8]> + ?Sized>(path: &'a P) -> Candidate<'a> {
Self::from_cow(Cow::Borrowed(path.as_ref()))
}
fn from_cow(path: Cow<'a, [u8]>) -> Candidate<'a> {
let path = normalize_path(path);
let basename = file_name(&path).unwrap_or(Cow::Borrowed(B("")));
let ext = file_name_ext(&basename).unwrap_or(Cow::Borrowed(B("")));
Candidate { path, basename, ext }
}
fn path_prefix(&self, max: usize) -> &[u8] {
if self.path.len() <= max {
&*self.path
} else {
&self.path[..max]
}
if self.path.len() <= max { &*self.path } else { &self.path[..max] }
}
fn path_suffix(&self, max: usize) -> &[u8] {
@@ -892,6 +979,10 @@ impl MultiStrategyBuilder {
patset: Arc::new(Pool::new(create)),
})
}
fn is_empty(&self) -> bool {
self.literals.is_empty()
}
}
#[derive(Clone, Debug)]
@@ -928,13 +1019,26 @@ impl RequiredExtensionStrategyBuilder {
///
/// The escaping works by surrounding meta-characters with brackets. For
/// example, `*` becomes `[*]`.
///
/// # Example
///
/// ```
/// use globset::escape;
///
/// assert_eq!(escape("foo*bar"), "foo[*]bar");
/// assert_eq!(escape("foo?bar"), "foo[?]bar");
/// assert_eq!(escape("foo[bar"), "foo[[]bar");
/// assert_eq!(escape("foo]bar"), "foo[]]bar");
/// assert_eq!(escape("foo{bar"), "foo[{]bar");
/// assert_eq!(escape("foo}bar"), "foo[}]bar");
/// ```
pub fn escape(s: &str) -> String {
let mut escaped = String::with_capacity(s.len());
for c in s.chars() {
match c {
// note that ! does not need escaping because it is only special
// inside brackets
'?' | '*' | '[' | ']' => {
'?' | '*' | '[' | ']' | '{' | '}' => {
escaped.push('[');
escaped.push(c);
escaped.push(']');
@@ -979,6 +1083,7 @@ mod tests {
let set = GlobSetBuilder::new().build().unwrap();
assert!(!set.is_match(""));
assert!(!set.is_match("a"));
assert!(set.matches_all("a"));
}
#[test]
@@ -1019,4 +1124,16 @@ mod tests {
let matches = set.matches("nada");
assert_eq!(0, matches.len());
}
#[test]
fn debug() {
let mut builder = GlobSetBuilder::new();
builder.add(Glob::new("*foo*").unwrap());
builder.add(Glob::new("*bar*").unwrap());
builder.add(Glob::new("*quux*").unwrap());
assert_eq!(
format!("{builder:?}"),
"GlobSetBuilder { pats: [Glob(\"*foo*\"), Glob(\"*bar*\"), Glob(\"*quux*\")] }",
);
}
}

View File

@@ -4,21 +4,25 @@ use bstr::{ByteSlice, ByteVec};
/// The final component of the path, if it is a normal file.
///
/// If the path terminates in `.`, `..`, or consists solely of a root of
/// prefix, file_name will return None.
/// If the path terminates in `..`, or consists solely of a root of prefix,
/// file_name will return `None`.
pub(crate) fn file_name<'a>(path: &Cow<'a, [u8]>) -> Option<Cow<'a, [u8]>> {
if path.last_byte().map_or(true, |b| b == b'.') {
if path.is_empty() {
return None;
}
let last_slash = path.rfind_byte(b'/').map(|i| i + 1).unwrap_or(0);
Some(match *path {
let got = match *path {
Cow::Borrowed(path) => Cow::Borrowed(&path[last_slash..]),
Cow::Owned(ref path) => {
let mut path = path.clone();
path.drain_bytes(..last_slash);
Cow::Owned(path)
}
})
};
if got == &b".."[..] {
return None;
}
Some(got)
}
/// Return a file extension given a path's file name.
@@ -84,7 +88,7 @@ pub(crate) fn normalize_path(mut path: Cow<[u8]>) -> Cow<[u8]> {
mod tests {
use std::borrow::Cow;
use bstr::{ByteVec, B};
use bstr::{B, ByteVec};
use super::{file_name_ext, normalize_path};

View File

@@ -1,6 +1,6 @@
[package]
name = "grep"
version = "0.3.1" #:version
version = "0.4.1" #:version
authors = ["Andrew Gallant <jamslam@gmail.com>"]
description = """
Fast line oriented regex searching as a library.
@@ -11,15 +11,15 @@ repository = "https://github.com/BurntSushi/ripgrep/tree/master/crates/grep"
readme = "README.md"
keywords = ["regex", "grep", "egrep", "search", "pattern"]
license = "Unlicense OR MIT"
edition = "2021"
edition = "2024"
[dependencies]
grep-cli = { version = "0.1.10", path = "../cli" }
grep-matcher = { version = "0.1.7", path = "../matcher" }
grep-pcre2 = { version = "0.1.7", path = "../pcre2", optional = true }
grep-printer = { version = "0.2.1", path = "../printer" }
grep-regex = { version = "0.1.12", path = "../regex" }
grep-searcher = { version = "0.1.13", path = "../searcher" }
grep-cli = { version = "0.1.12", path = "../cli" }
grep-matcher = { version = "0.1.8", path = "../matcher" }
grep-pcre2 = { version = "0.1.9", path = "../pcre2", optional = true }
grep-printer = { version = "0.3.1", path = "../printer" }
grep-regex = { version = "0.1.14", path = "../regex" }
grep-searcher = { version = "0.1.16", path = "../searcher" }
[dev-dependencies]
termcolor = "1.0.4"

View File

@@ -1,6 +1,6 @@
[package]
name = "ignore"
version = "0.4.23" #:version
version = "0.4.25" #:version
authors = ["Andrew Gallant <jamslam@gmail.com>"]
description = """
A fast library for efficiently matching ignore files such as `.gitignore`
@@ -12,7 +12,7 @@ repository = "https://github.com/BurntSushi/ripgrep/tree/master/crates/ignore"
readme = "README.md"
keywords = ["glob", "ignore", "gitignore", "pattern", "file"]
license = "Unlicense OR MIT"
edition = "2021"
edition = "2024"
[lib]
name = "ignore"
@@ -20,7 +20,7 @@ bench = false
[dependencies]
crossbeam-deque = "0.8.3"
globset = { version = "0.4.15", path = "../globset" }
globset = { version = "0.4.18", path = "../globset" }
log = "0.4.20"
memchr = "2.6.3"
same-file = "1.0.6"
@@ -36,7 +36,7 @@ version = "0.1.2"
[dev-dependencies]
bstr = { version = "1.6.2", default-features = false, features = ["std"] }
crossbeam-channel = "0.5.8"
crossbeam-channel = "0.5.15"
[features]
# DEPRECATED. It is a no-op. SIMD is done automatically through runtime

View File

@@ -18,8 +18,8 @@ fn main() {
let stdout_thread = std::thread::spawn(move || {
let mut stdout = std::io::BufWriter::new(std::io::stdout());
for dent in rx {
stdout.write(&*Vec::from_path_lossy(dent.path())).unwrap();
stdout.write(b"\n").unwrap();
stdout.write_all(&Vec::from_path_lossy(dent.path())).unwrap();
stdout.write_all(b"\n").unwrap();
}
});

View File

@@ -27,9 +27,10 @@ pub(crate) const DEFAULT_TYPES: &[(&[&str], &[&str])] = &[
(&["bat", "batch"], &["*.bat"]),
(&["bazel"], &[
"*.bazel", "*.bzl", "*.BUILD", "*.bazelrc", "BUILD", "MODULE.bazel",
"WORKSPACE", "WORKSPACE.bazel",
"WORKSPACE", "WORKSPACE.bazel", "WORKSPACE.bzlmod",
]),
(&["bitbake"], &["*.bb", "*.bbappend", "*.bbclass", "*.conf", "*.inc"]),
(&["boxlang"], &["*.bx", "*.bxm", "*.bxs"]),
(&["brotli"], &["*.br"]),
(&["buildstream"], &["*.bst"]),
(&["bzip2"], &["*.bz2", "*.tbz2"]),
@@ -39,6 +40,7 @@ pub(crate) const DEFAULT_TYPES: &[(&[&str], &[&str])] = &[
(&["carp"], &["*.carp"]),
(&["cbor"], &["*.cbor"]),
(&["ceylon"], &["*.ceylon"]),
(&["cfml"], &["*.cfc", "*.cfm"]),
(&["clojure"], &["*.clj", "*.cljc", "*.cljs", "*.cljx"]),
(&["cmake"], &["*.cmake", "CMakeLists.txt"]),
(&["cmd"], &["*.bat", "*.cmd"]),
@@ -62,7 +64,7 @@ pub(crate) const DEFAULT_TYPES: &[(&[&str], &[&str])] = &[
(&["cython"], &["*.pyx", "*.pxi", "*.pxd"]),
(&["d"], &["*.d"]),
(&["dart"], &["*.dart"]),
(&["devicetree"], &["*.dts", "*.dtsi"]),
(&["devicetree"], &["*.dts", "*.dtsi", "*.dtso"]),
(&["dhall"], &["*.dhall"]),
(&["diff"], &["*.patch", "*.diff"]),
(&["dita"], &["*.dita", "*.ditamap", "*.ditaval"]),
@@ -88,6 +90,8 @@ pub(crate) const DEFAULT_TYPES: &[(&[&str], &[&str])] = &[
(&["fsharp"], &["*.fs", "*.fsx", "*.fsi"]),
(&["fut"], &["*.fut"]),
(&["gap"], &["*.g", "*.gap", "*.gi", "*.gd", "*.tst"]),
(&["gdscript"], &["*.gd"]),
(&["gleam"], &["*.gleam"]),
(&["gn"], &["*.gn", "*.gni"]),
(&["go"], &["*.go"]),
(&["gprbuild"], &["*.gpr"]),
@@ -117,6 +121,7 @@ pub(crate) const DEFAULT_TYPES: &[(&[&str], &[&str])] = &[
(&["julia"], &["*.jl"]),
(&["jupyter"], &["*.ipynb", "*.jpynb"]),
(&["k"], &["*.k"]),
(&["kconfig"], &["Kconfig", "Kconfig.*"]),
(&["kotlin"], &["*.kt", "*.kts"]),
(&["lean"], &["*.lean"]),
(&["less"], &["*.less"]),
@@ -149,6 +154,7 @@ pub(crate) const DEFAULT_TYPES: &[(&[&str], &[&str])] = &[
]),
(&["lilypond"], &["*.ly", "*.ily"]),
(&["lisp"], &["*.el", "*.jl", "*.lisp", "*.lsp", "*.sc", "*.scm"]),
(&["llvm"], &["*.ll"]),
(&["lock"], &["*.lock", "package-lock.json"]),
(&["log"], &["*.log"]),
(&["lua"], &["*.lua"]),
@@ -159,6 +165,7 @@ pub(crate) const DEFAULT_TYPES: &[(&[&str], &[&str])] = &[
"[Gg][Nn][Uu]makefile", "[Mm]akefile",
"[Gg][Nn][Uu]makefile.am", "[Mm]akefile.am",
"[Gg][Nn][Uu]makefile.in", "[Mm]akefile.in",
"Makefile.*",
"*.mk", "*.mak"
]),
(&["mako"], &["*.mako", "*.mao"]),
@@ -181,7 +188,7 @@ pub(crate) const DEFAULT_TYPES: &[(&[&str], &[&str])] = &[
(&["motoko"], &["*.mo"]),
(&["msbuild"], &[
"*.csproj", "*.fsproj", "*.vcxproj", "*.proj", "*.props", "*.targets",
"*.sln",
"*.sln", "*.slnf"
]),
(&["nim"], &["*.nim", "*.nimf", "*.nimble", "*.nims"]),
(&["nix"], &["*.nix"]),
@@ -210,7 +217,9 @@ pub(crate) const DEFAULT_TYPES: &[(&[&str], &[&str])] = &[
(&["py", "python"], &["*.py", "*.pyi"]),
(&["qmake"], &["*.pro", "*.pri", "*.prf"]),
(&["qml"], &["*.qml"]),
(&["r"], &["*.R", "*.r", "*.Rmd", "*.Rnw"]),
(&["qrc"], &["*.qrc"]),
(&["qui"], &["*.ui"]),
(&["r"], &["*.R", "*.r", "*.Rmd", "*.rmd", "*.Rnw", "*.rnw"]),
(&["racket"], &["*.rkt"]),
(&["raku"], &[
"*.raku", "*.rakumod", "*.rakudoc", "*.rakutest",
@@ -227,14 +236,16 @@ pub(crate) const DEFAULT_TYPES: &[(&[&str], &[&str])] = &[
// Idiomatic files
"config.ru", "Gemfile", ".irbrc", "Rakefile",
// Extensions
"*.gemspec", "*.rb", "*.rbw"
"*.gemspec", "*.rb", "*.rbw", "*.rake"
]),
(&["rust"], &["*.rs"]),
(&["sass"], &["*.sass", "*.scss"]),
(&["scala"], &["*.scala", "*.sbt"]),
(&["scdoc"], &["*.scd", "*.scdoc"]),
(&["seed7"], &["*.sd7", "*.s7i"]),
(&["sh"], &[
// Portable/misc. init files
".login", ".logout", ".profile", "profile",
".env", ".login", ".logout", ".profile", "profile",
// bash-specific init files
".bash_login", "bash_login",
".bash_logout", "bash_logout",
@@ -253,7 +264,7 @@ pub(crate) const DEFAULT_TYPES: &[(&[&str], &[&str])] = &[
".zprofile", "zprofile",
".zshrc", "zshrc",
// Extensions
"*.bash", "*.csh", "*.ksh", "*.sh", "*.tcsh", "*.zsh",
"*.bash", "*.csh", "*.env", "*.ksh", "*.sh", "*.tcsh", "*.zsh",
]),
(&["slim"], &["*.skim", "*.slim", "*.slime"]),
(&["smarty"], &["*.tpl"]),
@@ -263,9 +274,10 @@ pub(crate) const DEFAULT_TYPES: &[(&[&str], &[&str])] = &[
(&["spark"], &["*.spark"]),
(&["spec"], &["*.spec"]),
(&["sql"], &["*.sql", "*.psql"]),
(&["ssa"], &["*.ssa"]),
(&["stylus"], &["*.styl"]),
(&["sv"], &["*.v", "*.vg", "*.sv", "*.svh", "*.h"]),
(&["svelte"], &["*.svelte"]),
(&["svelte"], &["*.svelte", "*.svelte.ts"]),
(&["svg"], &["*.svg"]),
(&["swift"], &["*.swift"]),
(&["swig"], &["*.def", "*.i"]),
@@ -280,9 +292,8 @@ pub(crate) const DEFAULT_TYPES: &[(&[&str], &[&str])] = &[
(&["texinfo"], &["*.texi"]),
(&["textile"], &["*.textile"]),
(&["tf"], &[
"*.tf", "*.auto.tfvars", "terraform.tfvars", "*.tf.json",
"*.auto.tfvars.json", "terraform.tfvars.json", "*.terraformrc",
"terraform.rc", "*.tfrc", "*.terraform.lock.hcl",
"*.tf", "*.tf.json", "*.tfvars", "*.tfvars.json",
"*.terraformrc", "terraform.rc", "*.tfrc", "*.terraform.lock.hcl",
]),
(&["thrift"], &["*.thrift"]),
(&["toml"], &["*.toml", "Cargo.lock"]),
@@ -290,6 +301,7 @@ pub(crate) const DEFAULT_TYPES: &[(&[&str], &[&str])] = &[
(&["twig"], &["*.twig"]),
(&["txt"], &["*.txt"]),
(&["typoscript"], &["*.typoscript", "*.ts"]),
(&["typst"], &["*.typ"]),
(&["usd"], &["*.usd", "*.usda", "*.usdc"]),
(&["v"], &["*.v", "*.vsh"]),
(&["vala"], &["*.vala"]),

View File

@@ -118,6 +118,18 @@ struct IgnoreInner {
/// The absolute base path of this matcher. Populated only if parent
/// directories are added.
absolute_base: Option<Arc<PathBuf>>,
/// The directory that gitignores should be interpreted relative to.
///
/// Usually this is the directory containing the gitignore file. But in
/// some cases, like for global gitignores or for gitignores specified
/// explicitly, this should generally be set to the current working
/// directory. This is only used for global gitignores or "explicit"
/// gitignores.
///
/// When `None`, this means the CWD could not be determined or is unknown.
/// In this case, global gitignore files are ignored because they otherwise
/// cannot be matched correctly.
global_gitignores_relative_to: Option<PathBuf>,
/// Explicit global ignore matchers specified by the caller.
explicit_ignores: Arc<Vec<Gitignore>>,
/// Ignore files used in addition to `.ignore`
@@ -212,7 +224,7 @@ impl Ignore {
igtmp.absolute_base = Some(absolute_base.clone());
igtmp.has_git =
if self.0.opts.require_git && self.0.opts.git_ignore {
parent.join(".git").exists()
parent.join(".git").exists() || parent.join(".jj").exists()
} else {
false
};
@@ -244,14 +256,15 @@ impl Ignore {
/// Like add_child, but takes a full path and returns an IgnoreInner.
fn add_child_path(&self, dir: &Path) -> (IgnoreInner, Option<Error>) {
let git_type = if self.0.opts.require_git
&& (self.0.opts.git_ignore || self.0.opts.git_exclude)
{
let check_vcs_dir = self.0.opts.require_git
&& (self.0.opts.git_ignore || self.0.opts.git_exclude);
let git_type = if check_vcs_dir {
dir.join(".git").metadata().ok().map(|md| md.file_type())
} else {
None
};
let has_git = git_type.map(|_| true).unwrap_or(false);
let has_git =
check_vcs_dir && (git_type.is_some() || dir.join(".jj").exists());
let mut errs = PartialErrorBuilder::default();
let custom_ig_matcher = if self.0.custom_ignore_filenames.is_empty() {
@@ -290,6 +303,7 @@ impl Ignore {
errs.maybe_push(err);
m
};
let gi_exclude_matcher = if !self.0.opts.git_exclude {
Gitignore::empty()
} else {
@@ -318,6 +332,10 @@ impl Ignore {
parent: Some(self.clone()),
is_absolute_parent: false,
absolute_base: self.0.absolute_base.clone(),
global_gitignores_relative_to: self
.0
.global_gitignores_relative_to
.clone(),
explicit_ignores: self.0.explicit_ignores.clone(),
custom_ignore_filenames: self.0.custom_ignore_filenames.clone(),
custom_ignore_matcher: custom_ig_matcher,
@@ -461,21 +479,27 @@ impl Ignore {
// off of `path`. Overall, this seems a little ham-fisted, but
// it does fix a nasty bug. It should do fine until we overhaul
// this crate.
let dirpath = self.0.dir.as_path();
let path_prefix = match strip_prefix("./", dirpath) {
None => dirpath,
Some(stripped_dot_slash) => stripped_dot_slash,
};
let path = match strip_prefix(path_prefix, path) {
None => abs_parent_path.join(path),
Some(p) => {
let p = match strip_prefix("/", p) {
None => p,
Some(p) => p,
};
abs_parent_path.join(p)
}
};
let path = abs_parent_path.join(
self.parents()
.take_while(|ig| !ig.0.is_absolute_parent)
.last()
.map_or(path, |ig| {
// This is a weird special case when ripgrep users
// search with just a `.`, as some tools do
// automatically (like consult). In this case, if
// we don't bail out now, the code below will strip
// a leading `.` from `path`, which might mangle
// a hidden file name!
if ig.0.dir.as_path() == Path::new(".") {
return path;
}
let without_dot_slash =
strip_if_is_prefix("./", ig.0.dir.as_path());
let relative_base =
strip_if_is_prefix(without_dot_slash, path);
strip_if_is_prefix("/", relative_base)
}),
);
for ig in
self.parents().skip_while(|ig| !ig.0.is_absolute_parent)
@@ -575,6 +599,16 @@ pub(crate) struct IgnoreBuilder {
explicit_ignores: Vec<Gitignore>,
/// Ignore files in addition to .ignore.
custom_ignore_filenames: Vec<OsString>,
/// The directory that gitignores should be interpreted relative to.
///
/// Usually this is the directory containing the gitignore file. But in
/// some cases, like for global gitignores or for gitignores specified
/// explicitly, this should generally be set to the current working
/// directory. This is only used for global gitignores or "explicit"
/// gitignores.
///
/// When `None`, global gitignores are ignored.
global_gitignores_relative_to: Option<PathBuf>,
/// Ignore config.
opts: IgnoreOptions,
}
@@ -582,8 +616,9 @@ pub(crate) struct IgnoreBuilder {
impl IgnoreBuilder {
/// Create a new builder for an `Ignore` matcher.
///
/// All relative file paths are resolved with respect to the current
/// working directory.
/// It is likely a bug to use this without also calling `current_dir()`
/// outside of tests. This isn't made mandatory because this is an internal
/// abstraction and it's annoying to update tests.
pub(crate) fn new() -> IgnoreBuilder {
IgnoreBuilder {
dir: Path::new("").to_path_buf(),
@@ -591,6 +626,7 @@ impl IgnoreBuilder {
types: Arc::new(Types::empty()),
explicit_ignores: vec![],
custom_ignore_filenames: vec![],
global_gitignores_relative_to: None,
opts: IgnoreOptions {
hidden: true,
ignore: true,
@@ -609,10 +645,20 @@ impl IgnoreBuilder {
/// The matcher returned won't match anything until ignore rules from
/// directories are added to it.
pub(crate) fn build(&self) -> Ignore {
self.build_with_cwd(None)
}
/// Builds a new `Ignore` matcher using the given CWD directory.
///
/// The matcher returned won't match anything until ignore rules from
/// directories are added to it.
pub(crate) fn build_with_cwd(&self, cwd: Option<PathBuf>) -> Ignore {
let global_gitignores_relative_to =
cwd.or_else(|| self.global_gitignores_relative_to.clone());
let git_global_matcher = if !self.opts.git_global {
Gitignore::empty()
} else {
let mut builder = GitignoreBuilder::new("");
} else if let Some(ref cwd) = global_gitignores_relative_to {
let mut builder = GitignoreBuilder::new(cwd);
builder
.case_insensitive(self.opts.ignore_case_insensitive)
.unwrap();
@@ -621,6 +667,11 @@ impl IgnoreBuilder {
log::debug!("{}", err);
}
gi
} else {
log::debug!(
"ignoring global gitignore file because CWD is not known"
);
Gitignore::empty()
};
Ignore(Arc::new(IgnoreInner {
@@ -631,6 +682,7 @@ impl IgnoreBuilder {
parent: None,
is_absolute_parent: true,
absolute_base: None,
global_gitignores_relative_to,
explicit_ignores: Arc::new(self.explicit_ignores.clone()),
custom_ignore_filenames: Arc::new(
self.custom_ignore_filenames.clone(),
@@ -645,6 +697,15 @@ impl IgnoreBuilder {
}))
}
/// Set the current directory used for matching global gitignores.
pub(crate) fn current_dir(
&mut self,
cwd: impl Into<PathBuf>,
) -> &mut IgnoreBuilder {
self.global_gitignores_relative_to = Some(cwd.into());
self
}
/// Add an override matcher.
///
/// By default, no override matcher is used.
@@ -874,12 +935,21 @@ fn resolve_git_commondir(
Ok(commondir_abs)
}
/// Strips `prefix` from `path` if it's a prefix, otherwise returns `path`
/// unchanged.
fn strip_if_is_prefix<'a, P: AsRef<Path> + ?Sized>(
prefix: &'a P,
path: &'a Path,
) -> &'a Path {
strip_prefix(prefix, path).map_or(path, |p| p)
}
#[cfg(test)]
mod tests {
use std::{io::Write, path::Path};
use crate::{
dir::IgnoreBuilder, gitignore::Gitignore, tests::TempDir, Error,
Error, dir::IgnoreBuilder, gitignore::Gitignore, tests::TempDir,
};
fn wfile<P: AsRef<Path>>(path: P, contents: &str) {
@@ -943,6 +1013,19 @@ mod tests {
assert!(ig.matched("baz", false).is_none());
}
#[test]
fn gitignore_with_jj() {
let td = tmpdir();
mkdirp(td.path().join(".jj"));
wfile(td.path().join(".gitignore"), "foo\n!bar");
let (ig, err) = IgnoreBuilder::new().build().add_child(td.path());
assert!(err.is_none());
assert!(ig.matched("foo", false).is_ignore());
assert!(ig.matched("bar", false).is_whitelist());
assert!(ig.matched("baz", false).is_none());
}
#[test]
fn gitignore_no_git() {
let td = tmpdir();

View File

@@ -20,8 +20,8 @@ use {
};
use crate::{
pathutil::{is_file_name, strip_prefix},
Error, Match, PartialErrorBuilder,
pathutil::{is_file_name, strip_prefix},
};
/// Glob represents a single glob in a gitignore file.
@@ -128,7 +128,10 @@ impl Gitignore {
/// `$XDG_CONFIG_HOME/git/ignore` is read. If `$XDG_CONFIG_HOME` is not
/// set or is empty, then `$HOME/.config/git/ignore` is used instead.
pub fn global() -> (Gitignore, Option<Error>) {
GitignoreBuilder::new("").build_global()
match std::env::current_dir() {
Ok(cwd) => GitignoreBuilder::new(cwd).build_global(),
Err(err) => (Gitignore::empty(), Some(err.into())),
}
}
/// Creates a new empty gitignore matcher that never matches anything.
@@ -308,6 +311,7 @@ pub struct GitignoreBuilder {
root: PathBuf,
globs: Vec<Glob>,
case_insensitive: bool,
allow_unclosed_class: bool,
}
impl GitignoreBuilder {
@@ -324,6 +328,7 @@ impl GitignoreBuilder {
root: strip_prefix("./", root).unwrap_or(root).to_path_buf(),
globs: vec![],
case_insensitive: false,
allow_unclosed_class: true,
}
}
@@ -402,6 +407,12 @@ impl GitignoreBuilder {
break;
}
};
// Match Git's handling of .gitignore files that begin with the Unicode BOM
const UTF8_BOM: &str = "\u{feff}";
let line =
if i == 0 { line.trim_start_matches(UTF8_BOM) } else { &line };
if let Err(err) = self.add_line(Some(path.to_path_buf()), &line) {
errs.push(err.tagged(path, lineno));
}
@@ -505,6 +516,7 @@ impl GitignoreBuilder {
.literal_separator(true)
.case_insensitive(self.case_insensitive)
.backslash_escape(true)
.allow_unclosed_class(self.allow_unclosed_class)
.build()
.map_err(|err| Error::Glob {
glob: Some(glob.original.clone()),
@@ -530,6 +542,26 @@ impl GitignoreBuilder {
self.case_insensitive = yes;
Ok(self)
}
/// Toggle whether unclosed character classes are allowed. When allowed,
/// a `[` without a matching `]` is treated literally instead of resulting
/// in a parse error.
///
/// For example, if this is set then the glob `[abc` will be treated as the
/// literal string `[abc` instead of returning an error.
///
/// By default, this is true in order to match established `gitignore`
/// semantics. Generally speaking, enabling this leads to worse failure
/// modes since the glob parser becomes more permissive. You might want to
/// enable this when compatibility (e.g., with POSIX glob implementations)
/// is more important than good error messages.
pub fn allow_unclosed_class(
&mut self,
yes: bool,
) -> &mut GitignoreBuilder {
self.allow_unclosed_class = yes;
self
}
}
/// Return the file path of the current environment's global gitignore file.

View File

@@ -477,11 +477,7 @@ impl<T> Match<T> {
/// Return the match if it is not none. Otherwise, return other.
pub fn or(self, other: Self) -> Self {
if self.is_none() {
other
} else {
self
}
if self.is_none() { other } else { self }
}
}
@@ -527,7 +523,7 @@ mod tests {
let tmpdir = env::temp_dir();
for _ in 0..TRIES {
let count = COUNTER.fetch_add(1, Ordering::SeqCst);
let count = COUNTER.fetch_add(1, Ordering::Relaxed);
let path = tmpdir.join("rust-ignore").join(count.to_string());
if path.is_dir() {
continue;

View File

@@ -1,5 +1,6 @@
/*!
The overrides module provides a way to specify a set of override globs.
This provides functionality similar to `--include` or `--exclude` in command
line tools.
*/
@@ -7,8 +8,8 @@ line tools.
use std::path::Path;
use crate::{
gitignore::{self, Gitignore, GitignoreBuilder},
Error, Match,
gitignore::{self, Gitignore, GitignoreBuilder},
};
/// Glob represents a single glob in an override matcher.
@@ -120,7 +121,9 @@ impl OverrideBuilder {
///
/// Matching is done relative to the directory path provided.
pub fn new<P: AsRef<Path>>(path: P) -> OverrideBuilder {
OverrideBuilder { builder: GitignoreBuilder::new(path) }
let mut builder = GitignoreBuilder::new(path);
builder.allow_unclosed_class(false);
OverrideBuilder { builder }
}
/// Builds a new override matcher from the globs added so far.
@@ -143,7 +146,8 @@ impl OverrideBuilder {
/// Toggle whether the globs should be matched case insensitively or not.
///
/// When this option is changed, only globs added after the change will be affected.
/// When this option is changed, only globs added after the change will be
/// affected.
///
/// This is disabled by default.
pub fn case_insensitive(
@@ -155,6 +159,28 @@ impl OverrideBuilder {
self.builder.case_insensitive(yes)?;
Ok(self)
}
/// Toggle whether unclosed character classes are allowed. When allowed,
/// a `[` without a matching `]` is treated literally instead of resulting
/// in a parse error.
///
/// For example, if this is set then the glob `[abc` will be treated as the
/// literal string `[abc` instead of returning an error.
///
/// By default, this is false. Generally speaking, enabling this leads to
/// worse failure modes since the glob parser becomes more permissive. You
/// might want to enable this when compatibility (e.g., with POSIX glob
/// implementations) is more important than good error messages.
///
/// This default is different from the default for [`Gitignore`]. Namely,
/// [`Gitignore`] is intended to match git's behavior as-is. But this
/// abstraction for "override" globs does not necessarily conform to any
/// other known specification and instead prioritizes better error
/// messages.
pub fn allow_unclosed_class(&mut self, yes: bool) -> &mut OverrideBuilder {
self.builder.allow_unclosed_class(yes);
self
}
}
#[cfg(test)]

View File

@@ -91,7 +91,7 @@ use {
regex_automata::util::pool::Pool,
};
use crate::{default_types::DEFAULT_TYPES, pathutil::file_name, Error, Match};
use crate::{Error, Match, default_types::DEFAULT_TYPES, pathutil::file_name};
/// Glob represents a single glob in a set of file type definitions.
///

View File

@@ -5,7 +5,7 @@ use std::{
io,
path::{Path, PathBuf},
sync::atomic::{AtomicBool, AtomicUsize, Ordering as AtomicOrdering},
sync::Arc,
sync::{Arc, OnceLock},
};
use {
@@ -15,11 +15,11 @@ use {
};
use crate::{
Error, PartialErrorBuilder,
dir::{Ignore, IgnoreBuilder},
gitignore::GitignoreBuilder,
overrides::Override,
types::Types,
Error, PartialErrorBuilder,
};
/// A directory entry with a possible error attached.
@@ -484,6 +484,7 @@ pub struct WalkBuilder {
paths: Vec<PathBuf>,
ig_builder: IgnoreBuilder,
max_depth: Option<usize>,
min_depth: Option<usize>,
max_filesize: Option<u64>,
follow_links: bool,
same_file_system: bool,
@@ -491,6 +492,18 @@ pub struct WalkBuilder {
threads: usize,
skip: Option<Arc<Handle>>,
filter: Option<Filter>,
/// The directory that gitignores should be interpreted relative to.
///
/// Usually this is the directory containing the gitignore file. But in
/// some cases, like for global gitignores or for gitignores specified
/// explicitly, this should generally be set to the current working
/// directory. This is only used for global gitignores or "explicit"
/// gitignores.
///
/// When `None`, the CWD is fetched from `std::env::current_dir()`. If
/// that fails, then global gitignores are ignored (an error is logged).
global_gitignores_relative_to:
OnceLock<Result<PathBuf, Arc<std::io::Error>>>,
}
#[derive(Clone)]
@@ -508,10 +521,18 @@ impl std::fmt::Debug for WalkBuilder {
.field("paths", &self.paths)
.field("ig_builder", &self.ig_builder)
.field("max_depth", &self.max_depth)
.field("min_depth", &self.min_depth)
.field("max_filesize", &self.max_filesize)
.field("follow_links", &self.follow_links)
.field("same_file_system", &self.same_file_system)
.field("sorter", &"<...>")
.field("threads", &self.threads)
.field("skip", &self.skip)
.field("filter", &"<...>")
.field(
"global_gitignores_relative_to",
&self.global_gitignores_relative_to,
)
.finish()
}
}
@@ -528,6 +549,7 @@ impl WalkBuilder {
paths: vec![path.as_ref().to_path_buf()],
ig_builder: IgnoreBuilder::new(),
max_depth: None,
min_depth: None,
max_filesize: None,
follow_links: false,
same_file_system: false,
@@ -535,6 +557,7 @@ impl WalkBuilder {
threads: 0,
skip: None,
filter: None,
global_gitignores_relative_to: OnceLock::new(),
}
}
@@ -542,6 +565,7 @@ impl WalkBuilder {
pub fn build(&self) -> Walk {
let follow_links = self.follow_links;
let max_depth = self.max_depth;
let min_depth = self.min_depth;
let sorter = self.sorter.clone();
let its = self
.paths
@@ -556,6 +580,9 @@ impl WalkBuilder {
if let Some(max_depth) = max_depth {
wd = wd.max_depth(max_depth);
}
if let Some(min_depth) = min_depth {
wd = wd.min_depth(min_depth);
}
if let Some(ref sorter) = sorter {
match sorter.clone() {
Sorter::ByName(cmp) => {
@@ -575,7 +602,10 @@ impl WalkBuilder {
})
.collect::<Vec<_>>()
.into_iter();
let ig_root = self.ig_builder.build();
let ig_root = self
.get_or_set_current_dir()
.map(|cwd| self.ig_builder.build_with_cwd(Some(cwd.to_path_buf())))
.unwrap_or_else(|| self.ig_builder.build());
Walk {
its,
it: None,
@@ -593,10 +623,15 @@ impl WalkBuilder {
/// Instead, the returned value must be run with a closure. e.g.,
/// `builder.build_parallel().run(|| |path| { println!("{path:?}"); WalkState::Continue })`.
pub fn build_parallel(&self) -> WalkParallel {
let ig_root = self
.get_or_set_current_dir()
.map(|cwd| self.ig_builder.build_with_cwd(Some(cwd.to_path_buf())))
.unwrap_or_else(|| self.ig_builder.build());
WalkParallel {
paths: self.paths.clone().into_iter(),
ig_root: self.ig_builder.build(),
ig_root,
max_depth: self.max_depth,
min_depth: self.min_depth,
max_filesize: self.max_filesize,
follow_links: self.follow_links,
same_file_system: self.same_file_system,
@@ -621,6 +656,26 @@ impl WalkBuilder {
/// The default, `None`, imposes no depth restriction.
pub fn max_depth(&mut self, depth: Option<usize>) -> &mut WalkBuilder {
self.max_depth = depth;
if self.min_depth.is_some()
&& self.max_depth.is_some()
&& self.max_depth < self.min_depth
{
self.max_depth = self.min_depth;
}
self
}
/// The minimum depth to recurse.
///
/// The default, `None`, imposes no minimum depth restriction.
pub fn min_depth(&mut self, depth: Option<usize>) -> &mut WalkBuilder {
self.min_depth = depth;
if self.max_depth.is_some()
&& self.min_depth.is_some()
&& self.min_depth > self.max_depth
{
self.min_depth = self.max_depth;
}
self
}
@@ -651,12 +706,25 @@ impl WalkBuilder {
///
/// This has lower precedence than all other sources of ignore rules.
///
/// # Errors
///
/// If there was a problem adding the ignore file, then an error is
/// returned. Note that the error may indicate *partial* failure. For
/// example, if an ignore file contains an invalid glob, all other globs
/// are still applied.
///
/// An error will also occur if this walker could not get the current
/// working directory (and `WalkBuilder::current_dir` isn't set).
pub fn add_ignore<P: AsRef<Path>>(&mut self, path: P) -> Option<Error> {
let mut builder = GitignoreBuilder::new("");
let path = path.as_ref();
let Some(cwd) = self.get_or_set_current_dir() else {
let err = std::io::Error::other(format!(
"CWD is not known, ignoring global gitignore {}",
path.display()
));
return Some(err.into());
};
let mut builder = GitignoreBuilder::new(cwd);
let mut errs = PartialErrorBuilder::default();
errs.maybe_push(builder.add(path));
match builder.build() {
@@ -798,6 +866,10 @@ impl WalkBuilder {
///
/// When disabled, git-related ignore rules are applied even when searching
/// outside a git repository.
///
/// In particular, if this is `false` then `.gitignore` files will be read
/// from parent directories above the git root directory containing `.git`,
/// which is different from the git behavior.
pub fn require_git(&mut self, yes: bool) -> &mut WalkBuilder {
self.ig_builder.require_git(yes);
self
@@ -894,6 +966,10 @@ impl WalkBuilder {
///
/// Note that the errors for reading entries that may not satisfy the
/// predicate will still be yielded.
///
/// Note also that only one filter predicate can be applied to a
/// `WalkBuilder`. Calling this subsequent times overrides previous filter
/// predicates.
pub fn filter_entry<P>(&mut self, filter: P) -> &mut WalkBuilder
where
P: Fn(&DirEntry) -> bool + Send + Sync + 'static,
@@ -901,6 +977,55 @@ impl WalkBuilder {
self.filter = Some(Filter(Arc::new(filter)));
self
}
/// Set the current working directory used for matching global gitignores.
///
/// If this is not set, then this walker will attempt to discover the
/// correct path from the environment's current working directory. If
/// that fails, then global gitignore files will be ignored.
///
/// Global gitignore files come from things like a user's git configuration
/// or from gitignore files added via [`WalkBuilder::add_ignore`].
pub fn current_dir(
&mut self,
cwd: impl Into<PathBuf>,
) -> &mut WalkBuilder {
let cwd = cwd.into();
self.ig_builder.current_dir(cwd.clone());
if let Err(cwd) = self.global_gitignores_relative_to.set(Ok(cwd)) {
// OK because `Err` from `set` implies a value exists.
*self.global_gitignores_relative_to.get_mut().unwrap() = cwd;
}
self
}
/// Gets the currently configured CWD on this walk builder.
///
/// This is "lazy." That is, we only ask for the CWD from the environment
/// if `WalkBuilder::current_dir` hasn't been called yet. And we ensure
/// that we only do it once.
fn get_or_set_current_dir(&self) -> Option<&Path> {
let result = self.global_gitignores_relative_to.get_or_init(|| {
let result = std::env::current_dir().map_err(Arc::new);
match result {
Ok(ref path) => {
log::trace!(
"automatically discovered CWD: {}",
path.display()
);
}
Err(ref err) => {
log::debug!(
"failed to find CWD \
(global gitignores will be ignored): \
{err}"
);
}
}
result
});
result.as_ref().ok().map(|path| &**path)
}
}
/// Walk is a recursive directory iterator over file paths in one or more
@@ -1191,6 +1316,7 @@ pub struct WalkParallel {
ig_root: Ignore,
max_filesize: Option<u64>,
max_depth: Option<usize>,
min_depth: Option<usize>,
follow_links: bool,
same_file_system: bool,
threads: usize,
@@ -1290,6 +1416,7 @@ impl WalkParallel {
quit_now: quit_now.clone(),
active_workers: active_workers.clone(),
max_depth: self.max_depth,
min_depth: self.min_depth,
max_filesize: self.max_filesize,
follow_links: self.follow_links,
skip: self.skip.clone(),
@@ -1305,7 +1432,7 @@ impl WalkParallel {
fn threads(&self) -> usize {
if self.threads == 0 {
2
std::thread::available_parallelism().map_or(1, |n| n.get()).min(12)
} else {
self.threads
}
@@ -1420,8 +1547,11 @@ impl Stack {
stealers: stealers.clone(),
})
.collect();
// Distribute the initial messages.
// Distribute the initial messages, reverse the order to cancel out
// the other reversal caused by the inherent LIFO processing of the
// per-thread stacks which are filled here.
init.into_iter()
.rev()
.zip(stacks.iter().cycle())
.for_each(|(m, s)| s.push(m));
stacks
@@ -1476,6 +1606,8 @@ struct Worker<'s> {
/// The maximum depth of directories to descend. A value of `0` means no
/// descension at all.
max_depth: Option<usize>,
/// The minimum depth of directories to descend.
min_depth: Option<usize>,
/// The maximum size a searched file can be (in bytes). If a file exceeds
/// this size it will be skipped.
max_filesize: Option<u64>,
@@ -1504,10 +1636,19 @@ impl<'s> Worker<'s> {
}
fn run_one(&mut self, mut work: Work) -> WalkState {
let should_visit = self
.min_depth
.map(|min_depth| work.dent.depth() >= min_depth)
.unwrap_or(true);
// If the work is not a directory, then we can just execute the
// caller's callback immediately and move on.
if work.is_symlink() || !work.is_dir() {
return self.visitor.visit(Ok(work.dent));
return if should_visit {
self.visitor.visit(Ok(work.dent))
} else {
WalkState::Continue
};
}
if let Some(err) = work.add_parents() {
let state = self.visitor.visit(Err(err));
@@ -1540,9 +1681,11 @@ impl<'s> Worker<'s> {
// entry before passing the error value.
let readdir = work.read_dir();
let depth = work.dent.depth();
let state = self.visitor.visit(Ok(work.dent));
if !state.is_continue() {
return state;
if should_visit {
let state = self.visitor.visit(Ok(work.dent));
if !state.is_continue() {
return state;
}
}
if !descend {
return WalkState::Skip;
@@ -1887,7 +2030,7 @@ fn device_num<P: AsRef<Path>>(path: P) -> io::Result<u64> {
#[cfg(windows)]
fn device_num<P: AsRef<Path>>(path: P) -> io::Result<u64> {
use winapi_util::{file, Handle};
use winapi_util::{Handle, file};
let h = Handle::from_path_any(path)?;
file::information(h).map(|info| info.volume_serial_number())
@@ -1933,11 +2076,7 @@ mod tests {
}
fn normal_path(unix: &str) -> String {
if cfg!(windows) {
unix.replace("\\", "/")
} else {
unix.to_string()
}
if cfg!(windows) { unix.replace("\\", "/") } else { unix.to_string() }
}
fn walk_collect(prefix: &Path, builder: &WalkBuilder) -> Vec<String> {
@@ -2149,6 +2288,51 @@ mod tests {
);
}
#[test]
fn min_depth() {
let td = tmpdir();
mkdirp(td.path().join("a/b/c"));
wfile(td.path().join("foo"), "");
wfile(td.path().join("a/foo"), "");
wfile(td.path().join("a/b/foo"), "");
wfile(td.path().join("a/b/c/foo"), "");
let builder = WalkBuilder::new(td.path());
assert_paths(
td.path(),
&builder,
&["a", "a/b", "a/b/c", "foo", "a/foo", "a/b/foo", "a/b/c/foo"],
);
let mut builder = WalkBuilder::new(td.path());
assert_paths(
td.path(),
&builder.min_depth(Some(0)),
&["a", "a/b", "a/b/c", "foo", "a/foo", "a/b/foo", "a/b/c/foo"],
);
assert_paths(
td.path(),
&builder.min_depth(Some(1)),
&["a", "a/b", "a/b/c", "foo", "a/foo", "a/b/foo", "a/b/c/foo"],
);
assert_paths(
td.path(),
builder.min_depth(Some(2)),
&["a/b", "a/b/c", "a/b/c/foo", "a/b/foo", "a/foo"],
);
assert_paths(
td.path(),
builder.min_depth(Some(3)),
&["a/b/c", "a/b/c/foo", "a/b/foo"],
);
assert_paths(td.path(), builder.min_depth(Some(10)), &[]);
assert_paths(
td.path(),
builder.min_depth(Some(2)).max_depth(Some(1)),
&["a/b", "a/foo"],
);
}
#[test]
fn max_filesize() {
let td = tmpdir();

View File

@@ -200,13 +200,17 @@ fn test_dirs_in_deep() {
assert!(m("ROOT/parent_dir/dir_deep_00", true).is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_00/file", false).is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_00/child_dir", true).is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_00/child_dir/file", false).is_ignore());
assert!(
m("ROOT/parent_dir/dir_deep_00/child_dir/file", false).is_ignore()
);
// 01
assert!(m("ROOT/parent_dir/dir_deep_01", true).is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_01/file", false).is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_01/child_dir", true).is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_01/child_dir/file", false).is_ignore());
assert!(
m("ROOT/parent_dir/dir_deep_01/child_dir/file", false).is_ignore()
);
// 02
assert!(m("ROOT/parent_dir/dir_deep_02", true).is_none());
@@ -248,51 +252,67 @@ fn test_dirs_in_deep() {
assert!(m("ROOT/parent_dir/dir_deep_20", true).is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_20/file", false).is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_20/child_dir", true).is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_20/child_dir/file", false).is_ignore());
assert!(
m("ROOT/parent_dir/dir_deep_20/child_dir/file", false).is_ignore()
);
// 21
assert!(m("ROOT/parent_dir/dir_deep_21", true).is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_21/file", false).is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_21/child_dir", true).is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_21/child_dir/file", false).is_ignore());
assert!(
m("ROOT/parent_dir/dir_deep_21/child_dir/file", false).is_ignore()
);
// 22
// dir itself doesn't match
assert!(m("ROOT/parent_dir/dir_deep_22", true).is_none());
assert!(m("ROOT/parent_dir/dir_deep_22/file", false).is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_22/child_dir", true).is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_22/child_dir/file", false).is_ignore());
assert!(
m("ROOT/parent_dir/dir_deep_22/child_dir/file", false).is_ignore()
);
// 23
// dir itself doesn't match
assert!(m("ROOT/parent_dir/dir_deep_23", true).is_none());
assert!(m("ROOT/parent_dir/dir_deep_23/file", false).is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_23/child_dir", true).is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_23/child_dir/file", false).is_ignore());
assert!(
m("ROOT/parent_dir/dir_deep_23/child_dir/file", false).is_ignore()
);
// 30
assert!(m("ROOT/parent_dir/dir_deep_30", true).is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_30/file", false).is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_30/child_dir", true).is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_30/child_dir/file", false).is_ignore());
assert!(
m("ROOT/parent_dir/dir_deep_30/child_dir/file", false).is_ignore()
);
// 31
assert!(m("ROOT/parent_dir/dir_deep_31", true).is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_31/file", false).is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_31/child_dir", true).is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_31/child_dir/file", false).is_ignore());
assert!(
m("ROOT/parent_dir/dir_deep_31/child_dir/file", false).is_ignore()
);
// 32
// dir itself doesn't match
assert!(m("ROOT/parent_dir/dir_deep_32", true).is_none());
assert!(m("ROOT/parent_dir/dir_deep_32/file", false).is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_32/child_dir", true).is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_32/child_dir/file", false).is_ignore());
assert!(
m("ROOT/parent_dir/dir_deep_32/child_dir/file", false).is_ignore()
);
// 33
// dir itself doesn't match
assert!(m("ROOT/parent_dir/dir_deep_33", true).is_none());
assert!(m("ROOT/parent_dir/dir_deep_33/file", false).is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_33/child_dir", true).is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_33/child_dir/file", false).is_ignore());
assert!(
m("ROOT/parent_dir/dir_deep_33/child_dir/file", false).is_ignore()
);
}

View File

@@ -0,0 +1,2 @@
ignore/this/path
# This file begins with a BOM (U+FEFF)

View File

@@ -0,0 +1,17 @@
use ignore::gitignore::GitignoreBuilder;
const IGNORE_FILE: &'static str = "tests/gitignore_skip_bom.gitignore";
/// Skip a Byte-Order Mark (BOM) at the beginning of the file, matching Git's
/// behavior.
///
/// Ref: <https://github.com/BurntSushi/ripgrep/issues/2177>
#[test]
fn gitignore_skip_bom() {
let mut builder = GitignoreBuilder::new("ROOT");
let error = builder.add(IGNORE_FILE);
assert!(error.is_none(), "failed to open gitignore file");
let g = builder.build().unwrap();
assert!(g.matched("ignore/this/path", false).is_ignore());
}

View File

@@ -1,6 +1,6 @@
[package]
name = "grep-matcher"
version = "0.1.7" #:version
version = "0.1.8" #:version
authors = ["Andrew Gallant <jamslam@gmail.com>"]
description = """
A trait for regular expressions, with a focus on line oriented search.
@@ -12,7 +12,7 @@ readme = "README.md"
keywords = ["regex", "pattern", "trait"]
license = "Unlicense OR MIT"
autotests = false
edition = "2021"
edition = "2024"
[dependencies]
memchr = "2.6.3"

View File

@@ -144,7 +144,7 @@ fn is_valid_cap_letter(b: &u8) -> bool {
#[cfg(test)]
mod tests {
use super::{find_cap_ref, interpolate, CaptureRef};
use super::{CaptureRef, find_cap_ref, interpolate};
macro_rules! find {
($name:ident, $text:expr) => {

View File

@@ -389,6 +389,15 @@ pub trait Captures {
/// for the overall match.
fn get(&self, i: usize) -> Option<Match>;
/// Return the overall match for the capture.
///
/// This returns the match for index `0`. That is it is equivalent to
/// `get(0).unwrap()`
#[inline]
fn as_match(&self) -> Match {
self.get(0).unwrap()
}
/// Returns true if and only if these captures are empty. This occurs
/// when `len` is `0`.
///

View File

@@ -1,6 +1,6 @@
[package]
name = "grep-pcre2"
version = "0.1.7" #:version
version = "0.1.9" #:version
authors = ["Andrew Gallant <jamslam@gmail.com>"]
description = """
Use PCRE2 with the 'grep' crate.
@@ -11,9 +11,9 @@ repository = "https://github.com/BurntSushi/ripgrep/tree/master/crates/pcre2"
readme = "README.md"
keywords = ["regex", "grep", "pcre", "backreference", "look"]
license = "Unlicense OR MIT"
edition = "2018"
edition = "2024"
[dependencies]
grep-matcher = { version = "0.1.7", path = "../matcher" }
grep-matcher = { version = "0.1.8", path = "../matcher" }
log = "0.4.20"
pcre2 = "0.2.6"

View File

@@ -55,7 +55,12 @@ impl RegexMatcherBuilder {
format!("(?:{})", p.as_ref())
});
}
let mut singlepat = pats.join("|");
let mut singlepat = if patterns.is_empty() {
// A way to spell a pattern that can never match anything.
r"[^\S\s]".to_string()
} else {
pats.join("|")
};
if self.case_smart && !has_uppercase_literal(&singlepat) {
builder.caseless(true);
}

View File

@@ -1,6 +1,6 @@
[package]
name = "grep-printer"
version = "0.2.1" #:version
version = "0.3.1" #:version
authors = ["Andrew Gallant <jamslam@gmail.com>"]
description = """
An implementation of the grep crate's Sink trait that provides standard
@@ -12,7 +12,7 @@ repository = "https://github.com/BurntSushi/ripgrep/tree/master/crates/printer"
readme = "README.md"
keywords = ["grep", "pattern", "print", "printer", "sink"]
license = "Unlicense OR MIT"
edition = "2021"
edition = "2024"
[features]
default = ["serde"]
@@ -20,22 +20,22 @@ serde = ["dep:serde", "dep:serde_json"]
[dependencies]
bstr = "1.6.2"
grep-matcher = { version = "0.1.7", path = "../matcher" }
grep-searcher = { version = "0.1.13", path = "../searcher" }
grep-matcher = { version = "0.1.8", path = "../matcher" }
grep-searcher = { version = "0.1.16", path = "../searcher" }
log = "0.4.5"
termcolor = "1.3.0"
serde = { version = "1.0.193", optional = true }
serde_json = { version = "1.0.107", optional = true }
[dev-dependencies]
grep-regex = { version = "0.1.12", path = "../regex" }
grep-regex = { version = "0.1.14", path = "../regex" }
[package.metadata.docs.rs]
# We want to document all features.
all-features = true
# This opts into a nightly unstable option to show the features that need to be
# enabled for public API items. To do that, we set 'docsrs', and when that's
# enabled, we enable the 'doc_auto_cfg' feature.
# enabled, we enable the 'doc_cfg' feature.
#
# To test this locally, run:
#

View File

@@ -51,13 +51,13 @@ impl std::fmt::Display for ColorError {
ColorError::UnrecognizedOutType(ref name) => write!(
f,
"unrecognized output type '{}'. Choose from: \
path, line, column, match.",
path, line, column, match, highlight.",
name,
),
ColorError::UnrecognizedSpecType(ref name) => write!(
f,
"unrecognized spec type '{}'. Choose from: \
fg, bg, style, none.",
fg, bg, style, none.",
name,
),
ColorError::UnrecognizedColor(_, ref msg) => write!(f, "{}", msg),
@@ -65,13 +65,13 @@ impl std::fmt::Display for ColorError {
f,
"unrecognized style attribute '{}'. Choose from: \
nobold, bold, nointense, intense, nounderline, \
underline.",
underline, noitalic, italic.",
name,
),
ColorError::InvalidFormat(ref original) => write!(
f,
"invalid color spec format: '{}'. Valid format \
is '(path|line|column|match):(fg|bg|style):(value)'.",
"invalid color spec format: '{}'. Valid format is \
'(path|line|column|match|highlight):(fg|bg|style):(value)'.",
original,
),
}
@@ -90,6 +90,7 @@ pub struct ColorSpecs {
line: ColorSpec,
column: ColorSpec,
matched: ColorSpec,
highlight: ColorSpec,
}
/// A single color specification provided by the user.
@@ -99,7 +100,7 @@ pub struct ColorSpecs {
/// The format of a `Spec` is a triple: `{type}:{attribute}:{value}`. Each
/// component is defined as follows:
///
/// * `{type}` can be one of `path`, `line`, `column` or `match`.
/// * `{type}` can be one of `path`, `line`, `column`, `match` or `highlight`.
/// * `{attribute}` can be one of `fg`, `bg` or `style`. `{attribute}` may also
/// be the special value `none`, in which case, `{value}` can be omitted.
/// * `{value}` is either a color name (for `fg`/`bg`) or a style instruction.
@@ -121,7 +122,7 @@ pub struct ColorSpecs {
/// `0x`.
///
/// Valid style instructions are `nobold`, `bold`, `intense`, `nointense`,
/// `underline`, `nounderline`.
/// `underline`, `nounderline`, `italic`, `noitalic`.
///
/// ## Example
///
@@ -181,6 +182,7 @@ enum OutType {
Line,
Column,
Match,
Highlight,
}
/// The specification type.
@@ -201,6 +203,8 @@ enum Style {
NoIntense,
Underline,
NoUnderline,
Italic,
NoItalic,
}
impl ColorSpecs {
@@ -214,6 +218,7 @@ impl ColorSpecs {
OutType::Line => spec.merge_into(&mut merged.line),
OutType::Column => spec.merge_into(&mut merged.column),
OutType::Match => spec.merge_into(&mut merged.matched),
OutType::Highlight => spec.merge_into(&mut merged.highlight),
}
}
merged
@@ -247,6 +252,12 @@ impl ColorSpecs {
pub fn matched(&self) -> &ColorSpec {
&self.matched
}
/// Return the color specification for coloring entire line if there is a
/// matched text.
pub fn highlight(&self) -> &ColorSpec {
&self.highlight
}
}
impl UserColorSpec {
@@ -286,6 +297,12 @@ impl SpecValue {
Style::NoUnderline => {
cspec.set_underline(false);
}
Style::Italic => {
cspec.set_italic(true);
}
Style::NoItalic => {
cspec.set_italic(false);
}
},
}
}
@@ -340,6 +357,7 @@ impl std::str::FromStr for OutType {
"line" => Ok(OutType::Line),
"column" => Ok(OutType::Column),
"match" => Ok(OutType::Match),
"highlight" => Ok(OutType::Highlight),
_ => Err(ColorError::UnrecognizedOutType(s.to_string())),
}
}
@@ -370,6 +388,8 @@ impl std::str::FromStr for Style {
"nointense" => Ok(Style::NoIntense),
"underline" => Ok(Style::Underline),
"nounderline" => Ok(Style::NoUnderline),
"italic" => Ok(Style::Italic),
"noitalic" => Ok(Style::NoItalic),
_ => Err(ColorError::UnrecognizedStyle(s.to_string())),
}
}

View File

@@ -0,0 +1,92 @@
use crate::hyperlink::HyperlinkAlias;
/// Aliases to well-known hyperlink schemes.
///
/// These need to be sorted by name.
pub(super) const HYPERLINK_PATTERN_ALIASES: &[HyperlinkAlias] = &[
alias(
"cursor",
"Cursor scheme (cursor://)",
"cursor://file{path}:{line}:{column}",
),
prioritized_alias(
0,
"default",
"RFC 8089 scheme (file://) (platform-aware)",
{
#[cfg(not(windows))]
{
"file://{host}{path}"
}
#[cfg(windows)]
{
"file://{path}"
}
},
),
alias(
"file",
"RFC 8089 scheme (file://) with host",
"file://{host}{path}",
),
// https://github.com/misaki-web/grepp
alias("grep+", "grep+ scheme (grep+://)", "grep+://{path}:{line}"),
alias(
"kitty",
"kitty-style RFC 8089 scheme (file://) with line number",
"file://{host}{path}#{line}",
),
// https://macvim.org/docs/gui_mac.txt.html#mvim%3A%2F%2F
alias(
"macvim",
"MacVim scheme (mvim://)",
"mvim://open?url=file://{path}&line={line}&column={column}",
),
prioritized_alias(1, "none", "disable hyperlinks", ""),
// https://macromates.com/blog/2007/the-textmate-url-scheme/
alias(
"textmate",
"TextMate scheme (txmt://)",
"txmt://open?url=file://{path}&line={line}&column={column}",
),
// https://code.visualstudio.com/docs/editor/command-line#_opening-vs-code-with-urls
alias(
"vscode",
"VS Code scheme (vscode://)",
"vscode://file{path}:{line}:{column}",
),
alias(
"vscode-insiders",
"VS Code Insiders scheme (vscode-insiders://)",
"vscode-insiders://file{path}:{line}:{column}",
),
alias(
"vscodium",
"VSCodium scheme (vscodium://)",
"vscodium://file{path}:{line}:{column}",
),
];
/// Creates a [`HyperlinkAlias`].
const fn alias(
name: &'static str,
description: &'static str,
format: &'static str,
) -> HyperlinkAlias {
HyperlinkAlias { name, description, format, display_priority: None }
}
/// Creates a [`HyperlinkAlias`] with a display priority.
const fn prioritized_alias(
priority: i16,
name: &'static str,
description: &'static str,
format: &'static str,
) -> HyperlinkAlias {
HyperlinkAlias {
name,
description,
format,
display_priority: Some(priority),
}
}

View File

@@ -5,7 +5,11 @@ use {
termcolor::{HyperlinkSpec, WriteColor},
};
use crate::{hyperlink_aliases, util::DecimalFormatter};
use crate::util::DecimalFormatter;
use self::aliases::HYPERLINK_PATTERN_ALIASES;
mod aliases;
/// Hyperlink configuration.
///
@@ -107,8 +111,8 @@ impl std::str::FromStr for HyperlinkFormat {
}
let mut builder = FormatBuilder::new();
let input = match hyperlink_aliases::find(s) {
Some(format) => format,
let input = match HyperlinkAlias::find(s) {
Some(alias) => alias.format(),
None => s,
};
let mut name = String::new();
@@ -179,6 +183,63 @@ impl std::fmt::Display for HyperlinkFormat {
}
}
/// An alias for a hyperlink format.
///
/// Hyperlink aliases are built-in formats, therefore they hold static values.
/// Some of their features are usable in const blocks.
#[derive(Clone, Debug)]
pub struct HyperlinkAlias {
name: &'static str,
description: &'static str,
format: &'static str,
display_priority: Option<i16>,
}
impl HyperlinkAlias {
/// Returns the name of the alias.
pub const fn name(&self) -> &str {
self.name
}
/// Returns a very short description of this hyperlink alias.
pub const fn description(&self) -> &str {
self.description
}
/// Returns the display priority of this alias.
///
/// If no priority is set, then `None` is returned.
///
/// The display priority is meant to reflect some special status associated
/// with an alias. For example, the `default` and `none` aliases have a
/// display priority. This is meant to encourage listing them first in
/// documentation.
///
/// A lower display priority implies the alias should be shown before
/// aliases with a higher (or absent) display priority.
///
/// Callers cannot rely on any specific display priority value to remain
/// stable across semver compatible releases of this crate.
pub const fn display_priority(&self) -> Option<i16> {
self.display_priority
}
/// Returns the format string of the alias.
const fn format(&self) -> &'static str {
self.format
}
/// Looks for the hyperlink alias defined by the given name.
///
/// If one does not exist, `None` is returned.
fn find(name: &str) -> Option<&HyperlinkAlias> {
HYPERLINK_PATTERN_ALIASES
.binary_search_by_key(&name, |alias| alias.name())
.map(|i| &HYPERLINK_PATTERN_ALIASES[i])
.ok()
}
}
/// A static environment for hyperlink interpolation.
///
/// This environment permits setting the values of variables used in hyperlink
@@ -255,15 +316,18 @@ impl std::fmt::Display for HyperlinkFormatError {
match self.kind {
NoVariables => {
let aliases = hyperlink_aliases::iter()
.map(|(name, _)| name)
.collect::<Vec<&str>>()
.join(", ");
let mut aliases = hyperlink_aliases();
aliases.sort_by_key(|alias| {
alias.display_priority().unwrap_or(i16::MAX)
});
let names: Vec<&str> =
aliases.iter().map(|alias| alias.name()).collect();
write!(
f,
"at least a {{path}} variable is required in a \
hyperlink format, or otherwise use a valid alias: {}",
aliases,
hyperlink format, or otherwise use a valid alias: \
{aliases}",
aliases = names.join(", "),
)
}
NoPathVariable => {
@@ -418,7 +482,7 @@ impl FormatBuilder {
let err_invalid_scheme = HyperlinkFormatError {
kind: HyperlinkFormatErrorKind::InvalidScheme,
};
let Some(Part::Text(ref part)) = self.parts.first() else {
let Some(Part::Text(part)) = self.parts.first() else {
return Err(err_invalid_scheme);
};
let Some(colon) = part.find_byte(b':') else {
@@ -474,7 +538,7 @@ impl Part {
values: &Values,
dest: &mut Vec<u8>,
) {
match self {
match *self {
Part::Text(ref text) => dest.extend_from_slice(text),
Part::Host => dest.extend_from_slice(
env.host.as_ref().map(|s| s.as_bytes()).unwrap_or(b""),
@@ -702,16 +766,20 @@ impl HyperlinkPath {
/// Returns a hyperlink path from an OS path.
#[cfg(windows)]
pub(crate) fn from_path(original_path: &Path) -> Option<HyperlinkPath> {
// On Windows, Path::canonicalize returns the result of
// GetFinalPathNameByHandleW with VOLUME_NAME_DOS,
// which produces paths such as the following:
// On Windows, we use `std::path::absolute` instead of `Path::canonicalize`
// as it can be much faster since it does not touch the file system.
// It wraps the [`GetFullPathNameW`][1] API, except for verbatim paths
// (those which start with `\\?\`, see [the documentation][2] for details).
//
// Here, we strip any verbatim path prefixes since we cannot use them
// in hyperlinks anyway. This can only happen if the user explicitly
// supplies a verbatim path as input, which already needs to be absolute:
//
// \\?\C:\dir\file.txt (local path)
// \\?\UNC\server\dir\file.txt (network share)
//
// The \\?\ prefix comes from VOLUME_NAME_DOS and is constant.
// It is followed either by the drive letter, or by UNC\
// (universal naming convention), which denotes a network share.
// The `\\?\` prefix is constant for verbatim paths, and can be followed
// by `UNC\` (universal naming convention), which denotes a network share.
//
// Given that the default URL format on Windows is file://{path}
// we need to return the following from this function:
@@ -750,18 +818,19 @@ impl HyperlinkPath {
//
// It doesn't parse any other number of slashes in "file//server" as a
// network path.
//
// [1]: https://learn.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-getfullpathnamew
// [2]: https://learn.microsoft.com/en-us/windows/win32/fileio/naming-a-file
const WIN32_NAMESPACE_PREFIX: &str = r"\\?\";
const UNC_PREFIX: &str = r"UNC\";
// As for Unix, we canonicalize the path to make sure we have an
// absolute path.
let path = match original_path.canonicalize() {
let path = match std::path::absolute(original_path) {
Ok(path) => path,
Err(err) => {
log::debug!(
"hyperlink creation for {:?} failed, error occurred \
during path canonicalization: {}",
during conversion to absolute path: {}",
original_path,
err,
);
@@ -784,24 +853,20 @@ impl HyperlinkPath {
return None;
}
};
// As the comment above says, we expect all canonicalized paths to
// begin with a \\?\. If it doesn't, then something weird is happening
// and we should just give up.
if !string.starts_with(WIN32_NAMESPACE_PREFIX) {
log::debug!(
"hyperlink creation for {:?} failed, canonicalization \
returned {:?}, which does not start with \\\\?\\",
original_path,
path,
);
return None;
}
string = &string[WIN32_NAMESPACE_PREFIX.len()..];
// And as above, drop the UNC prefix too, but keep the leading slash.
if string.starts_with(UNC_PREFIX) {
string = &string[(UNC_PREFIX.len() - 1)..];
// Strip verbatim path prefixes (see the comment above for details).
if string.starts_with(WIN32_NAMESPACE_PREFIX) {
string = &string[WIN32_NAMESPACE_PREFIX.len()..];
// Drop the UNC prefix if there is one, but keep the leading slash.
if string.starts_with(UNC_PREFIX) {
string = &string[(UNC_PREFIX.len() - 1)..];
}
} else if string.starts_with(r"\\") || string.starts_with(r"//") {
// Drop one of the two leading slashes of network paths, it will be added back.
string = &string[1..];
}
// Finally, add a leading slash. In the local file case, this turns
// C:\foo\bar into /C:\foo\bar (and then percent encoding turns it into
// /C:/foo/bar). In the network share case, this turns \share\foo\bar
@@ -862,6 +927,26 @@ impl HyperlinkPath {
}
}
/// Returns the set of hyperlink aliases supported by this crate.
///
/// Aliases are supported by the `FromStr` trait implementation of a
/// [`HyperlinkFormat`]. That is, if an alias is seen, then it is automatically
/// replaced with the corresponding format. For example, the `vscode` alias
/// maps to `vscode://file{path}:{line}:{column}`.
///
/// This is exposed to allow callers to include hyperlink aliases in
/// documentation in a way that is guaranteed to match what is actually
/// supported.
///
/// The list returned is guaranteed to be sorted lexicographically
/// by the alias name. Callers may want to re-sort the list using
/// [`HyperlinkAlias::display_priority`] via a stable sort when showing the
/// list to users. This will cause special aliases like `none` and `default` to
/// appear first.
pub fn hyperlink_aliases() -> Vec<HyperlinkAlias> {
HYPERLINK_PATTERN_ALIASES.iter().cloned().collect()
}
#[cfg(test)]
mod tests {
use std::str::FromStr;
@@ -1006,4 +1091,75 @@ mod tests {
err(InvalidVariable("bar{{".to_string())),
);
}
#[test]
#[cfg(windows)]
fn convert_to_hyperlink_path() {
let convert = |path| {
String::from_utf8(
HyperlinkPath::from_path(Path::new(path)).unwrap().0,
)
.unwrap()
};
assert_eq!(convert(r"C:\dir\file.txt"), "/C:/dir/file.txt");
assert_eq!(
convert(r"C:\foo\bar\..\other\baz.txt"),
"/C:/foo/other/baz.txt"
);
assert_eq!(convert(r"\\server\dir\file.txt"), "//server/dir/file.txt");
assert_eq!(
convert(r"\\server\dir\foo\..\other\file.txt"),
"//server/dir/other/file.txt"
);
assert_eq!(convert(r"\\?\C:\dir\file.txt"), "/C:/dir/file.txt");
assert_eq!(
convert(r"\\?\UNC\server\dir\file.txt"),
"//server/dir/file.txt"
);
}
#[test]
fn aliases_are_sorted() {
let aliases = hyperlink_aliases();
let mut prev =
aliases.first().expect("aliases should be non-empty").name();
for alias in aliases.iter().skip(1) {
let name = alias.name();
assert!(
name > prev,
"'{prev}' should come before '{name}' in \
HYPERLINK_PATTERN_ALIASES",
);
prev = name;
}
}
#[test]
fn alias_names_are_reasonable() {
for alias in hyperlink_aliases() {
// There's no hard rule here, but if we want to define an alias
// with a name that doesn't pass this assert, then we should
// probably flag it as worthy of consideration. For example, we
// really do not want to define an alias that contains `{` or `}`,
// which might confuse it for a variable.
assert!(alias.name().chars().all(|c| c.is_alphanumeric()
|| c == '+'
|| c == '-'
|| c == '.'));
}
}
#[test]
fn aliases_are_valid_formats() {
for alias in hyperlink_aliases() {
let (name, format) = (alias.name(), alias.format());
assert!(
format.parse::<HyperlinkFormat>().is_ok(),
"invalid hyperlink alias '{name}': {format}",
);
}
}
}

View File

@@ -1,85 +0,0 @@
/// Aliases to well-known hyperlink schemes.
///
/// These need to be sorted by name.
const HYPERLINK_PATTERN_ALIASES: &[(&str, &str)] = &[
#[cfg(not(windows))]
("default", "file://{host}{path}"),
#[cfg(windows)]
("default", "file://{path}"),
("file", "file://{host}{path}"),
// https://github.com/misaki-web/grepp
("grep+", "grep+://{path}:{line}"),
("kitty", "file://{host}{path}#{line}"),
// https://macvim.org/docs/gui_mac.txt.html#mvim%3A%2F%2F
("macvim", "mvim://open?url=file://{path}&line={line}&column={column}"),
("none", ""),
// https://macromates.com/blog/2007/the-textmate-url-scheme/
("textmate", "txmt://open?url=file://{path}&line={line}&column={column}"),
// https://code.visualstudio.com/docs/editor/command-line#_opening-vs-code-with-urls
("vscode", "vscode://file{path}:{line}:{column}"),
("vscode-insiders", "vscode-insiders://file{path}:{line}:{column}"),
("vscodium", "vscodium://file{path}:{line}:{column}"),
];
/// Look for the hyperlink format defined by the given alias name.
///
/// If one does not exist, `None` is returned.
pub(crate) fn find(name: &str) -> Option<&str> {
HYPERLINK_PATTERN_ALIASES
.binary_search_by_key(&name, |&(name, _)| name)
.map(|i| HYPERLINK_PATTERN_ALIASES[i].1)
.ok()
}
/// Return an iterator over all available alias names and their definitions.
pub(crate) fn iter() -> impl Iterator<Item = (&'static str, &'static str)> {
HYPERLINK_PATTERN_ALIASES.iter().copied()
}
#[cfg(test)]
mod tests {
use crate::HyperlinkFormat;
use super::*;
#[test]
fn is_sorted() {
let mut prev = HYPERLINK_PATTERN_ALIASES
.get(0)
.expect("aliases should be non-empty")
.0;
for &(name, _) in HYPERLINK_PATTERN_ALIASES.iter().skip(1) {
assert!(
name > prev,
"'{prev}' should come before '{name}' in \
HYPERLINK_PATTERN_ALIASES",
);
prev = name;
}
}
#[test]
fn alias_names_are_reasonable() {
for &(name, _) in HYPERLINK_PATTERN_ALIASES.iter() {
// There's no hard rule here, but if we want to define an alias
// with a name that doesn't pass this assert, then we should
// probably flag it as worthy of consideration. For example, we
// really do not want to define an alias that contains `{` or `}`,
// which might confuse it for a variable.
assert!(name.chars().all(|c| c.is_alphanumeric()
|| c == '+'
|| c == '-'
|| c == '.'));
}
}
#[test]
fn aliases_are_valid_formats() {
for (name, definition) in HYPERLINK_PATTERN_ALIASES {
assert!(
definition.parse::<HyperlinkFormat>().is_ok(),
"invalid hyperlink alias '{name}': {definition}",
);
}
}
}

View File

@@ -1,19 +1,19 @@
use std::{
io::{self, Write},
path::Path,
sync::Arc,
time::Instant,
};
use {
grep_matcher::{Match, Matcher},
grep_searcher::{
Searcher, Sink, SinkContext, SinkContextKind, SinkFinish, SinkMatch,
},
grep_searcher::{Searcher, Sink, SinkContext, SinkFinish, SinkMatch},
serde_json as json,
};
use crate::{
counter::CounterWriter, jsont, stats::Stats, util::find_iter_at_in_context,
counter::CounterWriter, jsont, stats::Stats, util::Replacer,
util::find_iter_at_in_context,
};
/// The configuration for the JSON printer.
@@ -24,13 +24,17 @@ use crate::{
#[derive(Debug, Clone)]
struct Config {
pretty: bool,
max_matches: Option<u64>,
always_begin_end: bool,
replacement: Arc<Option<Vec<u8>>>,
}
impl Default for Config {
fn default() -> Config {
Config { pretty: false, max_matches: None, always_begin_end: false }
Config {
pretty: false,
always_begin_end: false,
replacement: Arc::new(None),
}
}
}
@@ -77,16 +81,6 @@ impl JSONBuilder {
self
}
/// Set the maximum amount of matches that are printed.
///
/// If multi line search is enabled and a match spans multiple lines, then
/// that match is counted exactly once for the purposes of enforcing this
/// limit, regardless of how many lines it spans.
pub fn max_matches(&mut self, limit: Option<u64>) -> &mut JSONBuilder {
self.config.max_matches = limit;
self
}
/// When enabled, the `begin` and `end` messages are always emitted, even
/// when no match is found.
///
@@ -98,6 +92,24 @@ impl JSONBuilder {
self.config.always_begin_end = yes;
self
}
/// Set the bytes that will be used to replace each occurrence of a match
/// found.
///
/// The replacement bytes given may include references to capturing groups,
/// which may either be in index form (e.g., `$2`) or can reference named
/// capturing groups if present in the original pattern (e.g., `$foo`).
///
/// For documentation on the full format, please see the `Capture` trait's
/// `interpolate` method in the
/// [grep-printer](https://docs.rs/grep-printer) crate.
pub fn replacement(
&mut self,
replacement: Option<Vec<u8>>,
) -> &mut JSONBuilder {
self.config.replacement = Arc::new(replacement);
self
}
}
/// The JSON printer, which emits results in a JSON lines format.
@@ -256,7 +268,8 @@ impl JSONBuilder {
/// encoded, then the byte offsets correspond to the data after base64
/// decoding.) The `submatch` objects are guaranteed to be sorted by their
/// starting offsets. Note that it is possible for this array to be empty,
/// for example, when searching reports inverted matches.
/// for example, when searching reports inverted matches. If the configuration
/// specifies a replacement, the resulting replacement text is also present.
///
/// #### Message: **context**
///
@@ -286,7 +299,9 @@ impl JSONBuilder {
/// decoding.) The `submatch` objects are guaranteed to be sorted by
/// their starting offsets. Note that it is possible for this array to be
/// non-empty, for example, when searching reports inverted matches such that
/// the original matcher could match things in the contextual lines.
/// the original matcher could match things in the contextual lines. If the
/// configuration specifies a replacemement, the resulting replacement text
/// is also present.
///
/// #### Object: **submatch**
///
@@ -308,6 +323,10 @@ impl JSONBuilder {
/// the `lines` field in the
/// [`match`](#message-match) or [`context`](#message-context)
/// messages.
/// * **replacement** (optional) - An
/// [arbitrary data object](#object-arbitrary-data) corresponding to the
/// replacement text for this submatch, if the configuration specifies
/// a replacement.
///
/// #### Object: **stats**
///
@@ -447,6 +466,23 @@ impl JSONBuilder {
/// }
/// }
/// ```
/// and here's what a match type item would looks like if a replacement text
/// of 'Moriarity' was given as a parameter:
/// ```json
/// {
/// "type": "match",
/// "data": {
/// "path": {"text": "/home/andrew/sherlock"},
/// "lines": {"text": "For the Doctor Watsons of this world, as opposed to the Sherlock\n"},
/// "line_number": 1,
/// "absolute_offset": 0,
/// "submatches": [
/// {"match": {"text": "Watson"}, "replacement": {"text": "Moriarity"}, "start": 15, "end": 21}
/// ]
/// }
/// }
/// ```
#[derive(Clone, Debug)]
pub struct JSON<W> {
config: Config,
@@ -471,11 +507,11 @@ impl<W: io::Write> JSON<W> {
) -> JSONSink<'static, 's, M, W> {
JSONSink {
matcher,
replacer: Replacer::new(),
json: self,
path: None,
start_time: Instant::now(),
match_count: 0,
after_context_remaining: 0,
binary_byte_offset: None,
begin_printed: false,
stats: Stats::new(),
@@ -497,11 +533,11 @@ impl<W: io::Write> JSON<W> {
{
JSONSink {
matcher,
replacer: Replacer::new(),
json: self,
path: Some(path.as_ref()),
start_time: Instant::now(),
match_count: 0,
after_context_remaining: 0,
binary_byte_offset: None,
begin_printed: false,
stats: Stats::new(),
@@ -519,7 +555,7 @@ impl<W: io::Write> JSON<W> {
} else {
json::to_writer(&mut self.wtr, message)?;
}
self.wtr.write(&[b'\n'])?;
let _ = self.wtr.write(b"\n")?; // This will always be Ok(1) when successful.
Ok(())
}
}
@@ -559,11 +595,11 @@ impl<W> JSON<W> {
#[derive(Debug)]
pub struct JSONSink<'p, 's, M: Matcher, W> {
matcher: M,
replacer: Replacer<M>,
json: &'s mut JSON<W>,
path: Option<&'p Path>,
start_time: Instant,
match_count: u64,
after_context_remaining: u64,
binary_byte_offset: Option<u64>,
begin_printed: bool,
stats: Stats,
@@ -643,30 +679,29 @@ impl<'p, 's, M: Matcher, W: io::Write> JSONSink<'p, 's, M, W> {
Ok(())
}
/// Returns true if this printer should quit.
/// If the configuration specifies a replacement, then this executes the
/// replacement, lazily allocating memory if necessary.
///
/// This implements the logic for handling quitting after seeing a certain
/// amount of matches. In most cases, the logic is simple, but we must
/// permit all "after" contextual lines to print after reaching the limit.
fn should_quit(&self) -> bool {
let limit = match self.json.config.max_matches {
None => return false,
Some(limit) => limit,
};
if self.match_count < limit {
return false;
/// To access the result of a replacement, use `replacer.replacement()`.
fn replace(
&mut self,
searcher: &Searcher,
bytes: &[u8],
range: std::ops::Range<usize>,
) -> io::Result<()> {
self.replacer.clear();
if self.json.config.replacement.is_some() {
let replacement =
(*self.json.config.replacement).as_ref().map(|r| &*r).unwrap();
self.replacer.replace_all(
searcher,
&self.matcher,
bytes,
range,
replacement,
)?;
}
self.after_context_remaining == 0
}
/// Returns whether the current match count exceeds the configured limit.
/// If there is no limit, then this always returns false.
fn match_more_than_limit(&self) -> bool {
let limit = match self.json.config.max_matches {
None => return false,
Some(limit) => limit,
};
self.match_count > limit
Ok(())
}
/// Write the "begin" message.
@@ -689,32 +724,23 @@ impl<'p, 's, M: Matcher, W: io::Write> Sink for JSONSink<'p, 's, M, W> {
searcher: &Searcher,
mat: &SinkMatch<'_>,
) -> Result<bool, io::Error> {
self.write_begin_message()?;
self.match_count += 1;
// When we've exceeded our match count, then the remaining context
// lines should not be reset, but instead, decremented. This avoids a
// bug where we display more matches than a configured limit. The main
// idea here is that 'matched' might be called again while printing
// an after-context line. In that case, we should treat this as a
// contextual line rather than a matching line for the purposes of
// termination.
if self.match_more_than_limit() {
self.after_context_remaining =
self.after_context_remaining.saturating_sub(1);
} else {
self.after_context_remaining = searcher.after_context() as u64;
}
self.write_begin_message()?;
self.record_matches(
searcher,
mat.buffer(),
mat.bytes_range_in_buffer(),
)?;
self.replace(searcher, mat.buffer(), mat.bytes_range_in_buffer())?;
self.stats.add_matches(self.json.matches.len() as u64);
self.stats.add_matched_lines(mat.lines().count() as u64);
let submatches = SubMatches::new(mat.bytes(), &self.json.matches);
let submatches = SubMatches::new(
mat.bytes(),
&self.json.matches,
self.replacer.replacement(),
);
let msg = jsont::Message::Match(jsont::Match {
path: self.path,
lines: mat.bytes(),
@@ -723,7 +749,7 @@ impl<'p, 's, M: Matcher, W: io::Write> Sink for JSONSink<'p, 's, M, W> {
submatches: submatches.as_slice(),
});
self.json.write_message(&msg)?;
Ok(!self.should_quit())
Ok(true)
}
fn context(
@@ -734,13 +760,14 @@ impl<'p, 's, M: Matcher, W: io::Write> Sink for JSONSink<'p, 's, M, W> {
self.write_begin_message()?;
self.json.matches.clear();
if ctx.kind() == &SinkContextKind::After {
self.after_context_remaining =
self.after_context_remaining.saturating_sub(1);
}
let submatches = if searcher.invert_match() {
self.record_matches(searcher, ctx.bytes(), 0..ctx.bytes().len())?;
SubMatches::new(ctx.bytes(), &self.json.matches)
self.replace(searcher, ctx.bytes(), 0..ctx.bytes().len())?;
SubMatches::new(
ctx.bytes(),
&self.json.matches,
self.replacer.replacement(),
)
} else {
SubMatches::empty()
};
@@ -752,7 +779,7 @@ impl<'p, 's, M: Matcher, W: io::Write> Sink for JSONSink<'p, 's, M, W> {
submatches: submatches.as_slice(),
});
self.json.write_message(&msg)?;
Ok(!self.should_quit())
Ok(true)
}
fn binary_data(
@@ -776,11 +803,7 @@ impl<'p, 's, M: Matcher, W: io::Write> Sink for JSONSink<'p, 's, M, W> {
self.json.wtr.reset_count();
self.start_time = Instant::now();
self.match_count = 0;
self.after_context_remaining = 0;
self.binary_byte_offset = None;
if self.json.config.max_matches == Some(0) {
return Ok(false);
}
if !self.json.config.always_begin_end {
return Ok(true);
@@ -794,10 +817,6 @@ impl<'p, 's, M: Matcher, W: io::Write> Sink for JSONSink<'p, 's, M, W> {
_searcher: &Searcher,
finish: &SinkFinish,
) -> Result<(), io::Error> {
if !self.begin_printed {
return Ok(());
}
self.binary_byte_offset = finish.binary_byte_offset();
self.stats.add_elapsed(self.start_time.elapsed());
self.stats.add_searches(1);
@@ -807,6 +826,9 @@ impl<'p, 's, M: Matcher, W: io::Write> Sink for JSONSink<'p, 's, M, W> {
self.stats.add_bytes_searched(finish.byte_count());
self.stats.add_bytes_printed(self.json.wtr.count());
if !self.begin_printed {
return Ok(());
}
let msg = jsont::Message::End(jsont::End {
path: self.path,
binary_offset: finish.binary_byte_offset(),
@@ -831,19 +853,27 @@ enum SubMatches<'a> {
impl<'a> SubMatches<'a> {
/// Create a new set of match ranges from a set of matches and the
/// corresponding bytes that those matches apply to.
fn new(bytes: &'a [u8], matches: &[Match]) -> SubMatches<'a> {
fn new(
bytes: &'a [u8],
matches: &[Match],
replacement: Option<(&'a [u8], &'a [Match])>,
) -> SubMatches<'a> {
if matches.len() == 1 {
let mat = matches[0];
SubMatches::Small([jsont::SubMatch {
m: &bytes[mat],
replacement: replacement
.map(|(rbuf, rmatches)| &rbuf[rmatches[0]]),
start: mat.start(),
end: mat.end(),
}])
} else {
let mut match_ranges = vec![];
for &mat in matches {
for (i, &mat) in matches.iter().enumerate() {
match_ranges.push(jsont::SubMatch {
m: &bytes[mat],
replacement: replacement
.map(|(rbuf, rmatches)| &rbuf[rmatches[i]]),
start: mat.start(),
end: mat.end(),
});
@@ -873,7 +903,7 @@ mod tests {
use grep_regex::{RegexMatcher, RegexMatcherBuilder};
use grep_searcher::SearcherBuilder;
use super::{JSONBuilder, JSON};
use super::{JSON, JSONBuilder};
const SHERLOCK: &'static [u8] = b"\
For the Doctor Watsons of this world, as opposed to the Sherlock
@@ -919,9 +949,9 @@ and exhibited clearly, with a label attached.\
#[test]
fn max_matches() {
let matcher = RegexMatcher::new(r"Watson").unwrap();
let mut printer =
JSONBuilder::new().max_matches(Some(1)).build(vec![]);
let mut printer = JSONBuilder::new().build(vec![]);
SearcherBuilder::new()
.max_matches(Some(1))
.build()
.search_reader(&matcher, SHERLOCK, printer.sink(&matcher))
.unwrap();
@@ -946,10 +976,10 @@ d
e
";
let matcher = RegexMatcher::new(r"d").unwrap();
let mut printer =
JSONBuilder::new().max_matches(Some(1)).build(vec![]);
let mut printer = JSONBuilder::new().build(vec![]);
SearcherBuilder::new()
.after_context(2)
.max_matches(Some(1))
.build()
.search_reader(
&matcher,

View File

@@ -135,6 +135,7 @@ impl<'a> serde::Serialize for Context<'a> {
pub(crate) struct SubMatch<'a> {
pub(crate) m: &'a [u8],
pub(crate) replacement: Option<&'a [u8]>,
pub(crate) start: usize,
pub(crate) end: usize,
}
@@ -148,6 +149,9 @@ impl<'a> serde::Serialize for SubMatch<'a> {
let mut state = s.serialize_struct("SubMatch", 3)?;
state.serialize_field("match", &Data::from_bytes(self.m))?;
if let Some(r) = self.replacement {
state.serialize_field("replacement", &Data::from_bytes(r))?;
}
state.serialize_field("start", &self.start)?;
state.serialize_field("end", &self.end)?;
state.end()
@@ -186,7 +190,7 @@ impl<'a> Data<'a> {
}
#[cfg(not(unix))]
fn from_path(path: &Path) -> Data {
fn from_path(path: &Path) -> Data<'_> {
// Using lossy conversion means some paths won't round trip precisely,
// but it's not clear what we should actually do. Serde rejects
// non-UTF-8 paths, and OsStr's are serialized as a sequence of UTF-16

View File

@@ -58,22 +58,22 @@ assert_eq!(output, expected);
*/
#![deny(missing_docs)]
#![cfg_attr(docsrs, feature(doc_auto_cfg))]
#![cfg_attr(docsrs, feature(doc_cfg))]
pub use crate::{
color::{default_color_specs, ColorError, ColorSpecs, UserColorSpec},
color::{ColorError, ColorSpecs, UserColorSpec, default_color_specs},
hyperlink::{
HyperlinkConfig, HyperlinkEnvironment, HyperlinkFormat,
HyperlinkFormatError,
HyperlinkAlias, HyperlinkConfig, HyperlinkEnvironment,
HyperlinkFormat, HyperlinkFormatError, hyperlink_aliases,
},
path::{PathPrinter, PathPrinterBuilder},
standard::{Standard, StandardBuilder, StandardSink},
standard::{SquashMode, Standard, StandardBuilder, StandardSink},
stats::Stats,
summary::{Summary, SummaryBuilder, SummaryKind, SummarySink},
};
#[cfg(feature = "serde")]
pub use crate::json::{JSONBuilder, JSONSink, JSON};
pub use crate::json::{JSON, JSONBuilder, JSONSink};
// The maximum number of bytes to execute a search to account for look-ahead.
//
@@ -92,7 +92,6 @@ mod macros;
mod color;
mod counter;
mod hyperlink;
mod hyperlink_aliases;
#[cfg(feature = "serde")]
mod json;
#[cfg(feature = "serde")]

File diff suppressed because it is too large Load Diff

View File

@@ -17,7 +17,7 @@ use crate::{
counter::CounterWriter,
hyperlink::{self, HyperlinkConfig},
stats::Stats,
util::{find_iter_at_in_context, PrinterPath},
util::{PrinterPath, find_iter_at_in_context},
};
/// The configuration for the summary printer.
@@ -32,7 +32,6 @@ struct Config {
hyperlink: HyperlinkConfig,
stats: bool,
path: bool,
max_matches: Option<u64>,
exclude_zero: bool,
separator_field: Arc<Vec<u8>>,
separator_path: Option<u8>,
@@ -47,7 +46,6 @@ impl Default for Config {
hyperlink: HyperlinkConfig::default(),
stats: false,
path: true,
max_matches: None,
exclude_zero: true,
separator_field: Arc::new(b":".to_vec()),
separator_path: None,
@@ -87,7 +85,13 @@ pub enum SummaryKind {
///
/// Note that if `stats` is enabled, then searching continues in order to
/// compute statistics.
Quiet,
QuietWithMatch,
/// Don't show any output and the stop the search once a non-matching file
/// is found.
///
/// Note that if `stats` is enabled, then searching continues in order to
/// compute statistics.
QuietWithoutMatch,
}
impl SummaryKind {
@@ -101,7 +105,7 @@ impl SummaryKind {
match *self {
PathWithMatch | PathWithoutMatch => true,
Count | CountMatches | Quiet => false,
Count | CountMatches | QuietWithMatch | QuietWithoutMatch => false,
}
}
@@ -112,7 +116,8 @@ impl SummaryKind {
match *self {
CountMatches => true,
Count | PathWithMatch | PathWithoutMatch | Quiet => false,
Count | PathWithMatch | PathWithoutMatch | QuietWithMatch
| QuietWithoutMatch => false,
}
}
@@ -122,8 +127,10 @@ impl SummaryKind {
use self::SummaryKind::*;
match *self {
PathWithMatch | Quiet => true,
Count | CountMatches | PathWithoutMatch => false,
PathWithMatch | QuietWithMatch => true,
Count | CountMatches | PathWithoutMatch | QuietWithoutMatch => {
false
}
}
}
}
@@ -246,9 +253,9 @@ impl SummaryBuilder {
///
/// When this is enabled, this printer may need to do extra work in order
/// to compute certain statistics, which could cause the search to take
/// longer. For example, in `Quiet` mode, a search can quit after finding
/// the first match, but if `stats` is enabled, then the search will
/// continue after the first match in order to compute statistics.
/// longer. For example, in `QuietWithMatch` mode, a search can quit after
/// finding the first match, but if `stats` is enabled, then the search
/// will continue after the first match in order to compute statistics.
///
/// For a complete description of available statistics, see [`Stats`].
///
@@ -273,18 +280,6 @@ impl SummaryBuilder {
self
}
/// Set the maximum amount of matches that are printed.
///
/// If multi line search is enabled and a match spans multiple lines, then
/// that match is counted exactly once for the purposes of enforcing this
/// limit, regardless of how many lines it spans.
///
/// This is disabled by default.
pub fn max_matches(&mut self, limit: Option<u64>) -> &mut SummaryBuilder {
self.config.max_matches = limit;
self
}
/// Exclude count-related summary results with no matches.
///
/// When enabled and the mode is either `Count` or `CountMatches`, then
@@ -505,7 +500,9 @@ impl<'p, 's, M: Matcher, W: WriteColor> SummarySink<'p, 's, M, W> {
/// search.
pub fn has_match(&self) -> bool {
match self.summary.config.kind {
SummaryKind::PathWithoutMatch => self.match_count == 0,
SummaryKind::PathWithoutMatch | SummaryKind::QuietWithoutMatch => {
self.match_count == 0
}
_ => self.match_count > 0,
}
}
@@ -544,19 +541,6 @@ impl<'p, 's, M: Matcher, W: WriteColor> SummarySink<'p, 's, M, W> {
searcher.multi_line_with_matcher(&self.matcher)
}
/// Returns true if this printer should quit.
///
/// This implements the logic for handling quitting after seeing a certain
/// amount of matches. In most cases, the logic is simple, but we must
/// permit all "after" contextual lines to print after reaching the limit.
fn should_quit(&self) -> bool {
let limit = match self.summary.config.max_matches {
None => return false,
Some(limit) => limit,
};
self.match_count >= limit
}
/// If this printer has a file path associated with it, then this will
/// write that path to the underlying writer followed by a line terminator.
/// (If a path terminator is set, then that is used instead of the line
@@ -672,7 +656,11 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for SummarySink<'p, 's, M, W> {
true
},
)?;
count
// Because of `find_iter_at_in_context` being a giant
// kludge internally, it's possible that it won't find
// *any* matches even though we clearly know that there is
// at least one. So make sure we record at least one here.
count.max(1)
};
if is_multi_line {
self.match_count += sink_match_count;
@@ -685,7 +673,7 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for SummarySink<'p, 's, M, W> {
} else if self.summary.config.kind.quit_early() {
return Ok(false);
}
Ok(!self.should_quit())
Ok(true)
}
fn binary_data(
@@ -716,10 +704,6 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for SummarySink<'p, 's, M, W> {
self.start_time = Instant::now();
self.match_count = 0;
self.binary_byte_offset = None;
if self.summary.config.max_matches == Some(0) {
return Ok(false);
}
Ok(true)
}
@@ -749,14 +733,14 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for SummarySink<'p, 's, M, W> {
// don't quit and therefore search the entire contents of the file.
//
// There is an unfortunate inconsistency here. Namely, when using
// Quiet or PathWithMatch, then the printer can quit after the first
// match seen, which could be long before seeing binary data. This
// means that using PathWithMatch can print a path where as using
// QuietWithMatch or PathWithMatch, then the printer can quit after the
// first match seen, which could be long before seeing binary data.
// This means that using PathWithMatch can print a path where as using
// Count might not print it at all because of binary data.
//
// It's not possible to fix this without also potentially significantly
// impacting the performance of Quiet or PathWithMatch, so we accept
// the bug.
// impacting the performance of QuietWithMatch or PathWithMatch, so we
// accept the bug.
if self.binary_byte_offset.is_some()
&& searcher.binary_detection().quit_byte().is_some()
{
@@ -798,7 +782,7 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for SummarySink<'p, 's, M, W> {
self.write_path_line(searcher)?;
}
}
SummaryKind::Quiet => {}
SummaryKind::QuietWithMatch | SummaryKind::QuietWithoutMatch => {}
}
Ok(())
}
@@ -1012,9 +996,9 @@ and exhibited clearly, with a label attached.
let matcher = RegexMatcher::new(r"Watson").unwrap();
let mut printer = SummaryBuilder::new()
.kind(SummaryKind::Count)
.max_matches(Some(1))
.build_no_color(vec![]);
SearcherBuilder::new()
.max_matches(Some(1))
.build()
.search_reader(&matcher, SHERLOCK, printer.sink(&matcher))
.unwrap();
@@ -1122,7 +1106,7 @@ and exhibited clearly, with a label attached.
fn quiet() {
let matcher = RegexMatcher::new(r"Watson|Sherlock").unwrap();
let mut printer = SummaryBuilder::new()
.kind(SummaryKind::Quiet)
.kind(SummaryKind::QuietWithMatch)
.build_no_color(vec![]);
let match_count = {
let mut sink = printer.sink_with_path(&matcher, "sherlock");
@@ -1144,7 +1128,7 @@ and exhibited clearly, with a label attached.
fn quiet_with_stats() {
let matcher = RegexMatcher::new(r"Watson|Sherlock").unwrap();
let mut printer = SummaryBuilder::new()
.kind(SummaryKind::Quiet)
.kind(SummaryKind::QuietWithMatch)
.stats(true)
.build_no_color(vec![]);
let match_count = {

View File

@@ -8,7 +8,7 @@ use {
},
};
use crate::{hyperlink::HyperlinkPath, MAX_LOOK_AHEAD};
use crate::{MAX_LOOK_AHEAD, hyperlink::HyperlinkPath};
/// A type for handling replacements while amortizing allocation.
pub(crate) struct Replacer<M: Matcher> {
@@ -59,19 +59,24 @@ impl<M: Matcher> Replacer<M> {
// See the giant comment in 'find_iter_at_in_context' below for why we
// do this dance.
let is_multi_line = searcher.multi_line_with_matcher(&matcher);
if is_multi_line {
// Get the line_terminator that was removed (if any) so we can add it
// back.
let line_terminator = if is_multi_line {
if haystack[range.end..].len() >= MAX_LOOK_AHEAD {
haystack = &haystack[..range.end + MAX_LOOK_AHEAD];
}
&[]
} else {
// When searching a single line, we should remove the line
// terminator. Otherwise, it's possible for the regex (via
// look-around) to observe the line terminator and not match
// because of it.
let mut m = Match::new(0, range.end);
trim_line_terminator(searcher, haystack, &mut m);
let line_terminator =
trim_line_terminator(searcher, haystack, &mut m);
haystack = &haystack[..m.end()];
}
line_terminator
};
{
let &mut Space { ref mut dst, ref mut caps, ref mut matches } =
self.allocate(matcher)?;
@@ -81,6 +86,7 @@ impl<M: Matcher> Replacer<M> {
replace_with_captures_in_context(
matcher,
haystack,
line_terminator,
range.clone(),
caps,
dst,
@@ -508,6 +514,8 @@ where
// Otherwise, it's possible for the regex (via look-around) to observe
// the line terminator and not match because of it.
let mut m = Match::new(0, range.end);
// No need to rember the line terminator as we aren't doing a replace
// here.
trim_line_terminator(searcher, bytes, &mut m);
bytes = &bytes[..m.end()];
}
@@ -523,19 +531,23 @@ where
/// Given a buf and some bounds, if there is a line terminator at the end of
/// the given bounds in buf, then the bounds are trimmed to remove the line
/// terminator.
pub(crate) fn trim_line_terminator(
/// terminator, returning the slice of the removed line terminator (if any).
pub(crate) fn trim_line_terminator<'b>(
searcher: &Searcher,
buf: &[u8],
buf: &'b [u8],
line: &mut Match,
) {
) -> &'b [u8] {
let lineterm = searcher.line_terminator();
if lineterm.is_suffix(&buf[*line]) {
let mut end = line.end() - 1;
if lineterm.is_crlf() && end > 0 && buf.get(end - 1) == Some(&b'\r') {
end -= 1;
}
let orig_end = line.end();
*line = line.with_end(end);
&buf[end..orig_end]
} else {
&[]
}
}
@@ -545,6 +557,7 @@ pub(crate) fn trim_line_terminator(
fn replace_with_captures_in_context<M, F>(
matcher: M,
bytes: &[u8],
line_terminator: &[u8],
range: std::ops::Range<usize>,
caps: &mut M::Captures,
dst: &mut Vec<u8>,
@@ -564,8 +577,14 @@ where
last_match = m.end();
append(caps, dst)
})?;
let end = std::cmp::min(bytes.len(), range.end);
let end = if last_match > range.end {
bytes.len()
} else {
std::cmp::min(bytes.len(), range.end)
};
dst.extend(&bytes[last_match..end]);
// Add back any line terminator.
dst.extend(line_terminator);
Ok(())
}

View File

@@ -1,6 +1,6 @@
[package]
name = "grep-regex"
version = "0.1.12" #:version
version = "0.1.14" #:version
authors = ["Andrew Gallant <jamslam@gmail.com>"]
description = """
Use Rust's regex library with the 'grep' crate.
@@ -11,11 +11,11 @@ repository = "https://github.com/BurntSushi/ripgrep/tree/master/crates/regex"
readme = "README.md"
keywords = ["regex", "grep", "search", "pattern", "line"]
license = "Unlicense OR MIT"
edition = "2021"
edition = "2024"
[dependencies]
bstr = "1.6.2"
grep-matcher = { version = "0.1.7", path = "../matcher" }
grep-matcher = { version = "0.1.8", path = "../matcher" }
log = "0.4.20"
regex-automata = { version = "0.4.0" }
regex-syntax = "0.8.0"

View File

@@ -9,7 +9,7 @@ pub(crate) fn check(expr: &Hir, byte: u8) -> Result<(), Error> {
assert!(byte.is_ascii(), "ban byte must be ASCII");
let ch = char::from(byte);
let invalid = || Err(Error::new(ErrorKind::Banned(byte)));
match expr.kind() {
match *expr.kind() {
HirKind::Empty => {}
HirKind::Literal(hir::Literal(ref lit)) => {
if lit.iter().find(|&&b| b == byte).is_some() {

View File

@@ -341,11 +341,7 @@ impl ConfiguredHIR {
/// Returns the "end line" anchor for this configuration.
fn line_anchor_end(&self) -> hir::Look {
if self.config.crlf {
hir::Look::EndCRLF
} else {
hir::Look::EndLF
}
if self.config.crlf { hir::Look::EndCRLF } else { hir::Look::EndLF }
}
}

View File

@@ -1,9 +1,8 @@
use {
regex_automata::meta::Regex,
regex_syntax::hir::{
self,
self, Hir,
literal::{Literal, Seq},
Hir,
},
};
@@ -223,11 +222,7 @@ impl Extractor {
// extracting prefixes or suffixes.
seq = self.cross(seq, self.extract(hir));
}
if let Some(prev) = prev {
prev.choose(seq)
} else {
seq
}
if let Some(prev) = prev { prev.choose(seq) } else { seq }
}
/// Extract a sequence from the given alternation.

View File

@@ -4,8 +4,8 @@ use {
NoError,
},
regex_automata::{
meta::Regex, util::captures::Captures as AutomataCaptures, Input,
PatternID,
Input, PatternID, meta::Regex,
util::captures::Captures as AutomataCaptures,
},
};
@@ -587,10 +587,12 @@ mod tests {
// and the regex could not be modified to remove a line terminator.
#[test]
fn line_terminator_error() {
assert!(RegexMatcherBuilder::new()
.line_terminator(Some(b'\n'))
.build(r"a\nz")
.is_err())
assert!(
RegexMatcherBuilder::new()
.line_terminator(Some(b'\n'))
.build(r"a\nz")
.is_err()
)
}
// Test that enabling CRLF permits `$` to match at the end of a line.

View File

@@ -122,7 +122,7 @@ fn strip_from_match_ascii(expr: Hir, byte: u8) -> Result<Hir, Error> {
mod tests {
use regex_syntax::Parser;
use super::{strip_from_match, LineTerminator};
use super::{LineTerminator, strip_from_match};
use crate::error::Error;
fn roundtrip(pattern: &str, byte: u8) -> String {

View File

@@ -1,6 +1,6 @@
[package]
name = "grep-searcher"
version = "0.1.13" #:version
version = "0.1.16" #:version
authors = ["Andrew Gallant <jamslam@gmail.com>"]
description = """
Fast line oriented regex searching as a library.
@@ -11,19 +11,19 @@ repository = "https://github.com/BurntSushi/ripgrep/tree/master/crates/searcher"
readme = "README.md"
keywords = ["regex", "grep", "egrep", "search", "pattern"]
license = "Unlicense OR MIT"
edition = "2021"
edition = "2024"
[dependencies]
bstr = { version = "1.6.2", default-features = false, features = ["std"] }
encoding_rs = "0.8.33"
encoding_rs_io = "0.1.7"
grep-matcher = { version = "0.1.7", path = "../matcher" }
grep-matcher = { version = "0.1.8", path = "../matcher" }
log = "0.4.20"
memchr = "2.6.3"
memmap = { package = "memmap2", version = "0.9.0" }
[dev-dependencies]
grep-regex = { version = "0.1.12", path = "../regex" }
grep-regex = { version = "0.1.14", path = "../regex" }
regex = "1.9.5"
[features]

View File

@@ -4,8 +4,8 @@ use std::io;
use std::process;
use grep_regex::RegexMatcher;
use grep_searcher::sinks::UTF8;
use grep_searcher::Searcher;
use grep_searcher::sinks::UTF8;
fn main() {
if let Err(err) = example() {
@@ -18,7 +18,7 @@ fn example() -> Result<(), Box<dyn Error>> {
let pattern = match env::args().nth(1) {
Some(pattern) => pattern,
None => {
return Err(From::from(format!("Usage: search-stdin <pattern>")))
return Err(From::from(format!("Usage: search-stdin <pattern>")));
}
};
let matcher = RegexMatcher::new(&pattern)?;

View File

@@ -90,8 +90,8 @@ pub use crate::{
SearcherBuilder,
},
sink::{
sinks, Sink, SinkContext, SinkContextKind, SinkError, SinkFinish,
SinkMatch,
Sink, SinkContext, SinkContextKind, SinkError, SinkFinish, SinkMatch,
sinks,
},
};

View File

@@ -538,6 +538,11 @@ fn replace_bytes(
while let Some(i) = bytes.find_byte(src) {
bytes[i] = replacement;
bytes = &mut bytes[i + 1..];
// To search for adjacent `src` bytes we use a different strategy.
// Since binary data tends to have long runs of NUL terminators,
// it is faster to compare one-byte-at-a-time than to stop and start
// memchr (through `find_byte`) for every byte in a sequence.
while bytes.get(0) == Some(&src) {
bytes[0] = replacement;
bytes = &mut bytes[1..];
@@ -577,6 +582,9 @@ and exhibited clearly, with a label attached.\
#[test]
fn replace() {
assert_eq!(replace_str("", b'b', b'z'), (s(""), None));
assert_eq!(replace_str("a", b'a', b'a'), (s("a"), None));
assert_eq!(replace_str("a", b'b', b'z'), (s("a"), None));
assert_eq!(replace_str("abc", b'b', b'z'), (s("azc"), Some(1)));
assert_eq!(replace_str("abb", b'b', b'z'), (s("azz"), Some(1)));
assert_eq!(replace_str("aba", b'a', b'z'), (s("zbz"), Some(0)));

View File

@@ -33,6 +33,7 @@ pub(crate) struct Core<'s, M: 's, S> {
after_context_left: usize,
has_sunk: bool,
has_matched: bool,
count: u64,
}
impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
@@ -59,6 +60,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
after_context_left: 0,
has_sunk: false,
has_matched: false,
count: 0,
};
if !core.searcher.multi_line_with_matcher(&core.matcher) {
if core.is_line_by_line_fast() {
@@ -78,6 +80,14 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
self.pos = pos;
}
fn count(&self) -> u64 {
self.count
}
fn increment_count(&mut self) {
self.count += 1;
}
pub(crate) fn binary_byte_offset(&self) -> Option<u64> {
self.binary_byte_offset.map(|offset| offset as u64)
}
@@ -101,6 +111,47 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
self.sink.binary_data(&self.searcher, binary_byte_offset)
}
fn is_match(&self, line: &[u8]) -> Result<bool, S::Error> {
// We need to strip the line terminator here to match the
// semantics of line-by-line searching. Namely, regexes
// like `(?m)^$` can match at the final position beyond a
// line terminator, which is non-sensical in line oriented
// matching.
let line = lines::without_terminator(line, self.config.line_term);
self.matcher.is_match(line).map_err(S::Error::error_message)
}
pub(crate) fn find(
&mut self,
slice: &[u8],
) -> Result<Option<Range>, S::Error> {
if self.has_exceeded_match_limit() {
return Ok(None);
}
match self.matcher().find(slice) {
Err(err) => Err(S::Error::error_message(err)),
Ok(None) => Ok(None),
Ok(Some(m)) => {
self.increment_count();
Ok(Some(m))
}
}
}
fn shortest_match(
&mut self,
slice: &[u8],
) -> Result<Option<usize>, S::Error> {
if self.has_exceeded_match_limit() {
return Ok(None);
}
match self.matcher.shortest_match(slice) {
Err(err) => return Err(S::Error::error_message(err)),
Ok(None) => return Ok(None),
Ok(Some(m)) => Ok(Some(m)),
}
}
pub(crate) fn begin(&mut self) -> Result<bool, S::Error> {
self.sink.begin(&self.searcher)
}
@@ -140,10 +191,14 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
// separator (when before_context==0 and after_context>0), we
// need to know something about the position of the previous
// line visited, even if we're at the beginning of the buffer.
//
// ... however, we only need to find the N preceding lines based
// on before context. We can skip this (potentially costly, for
// large values of N) step when before_context==0.
let context_start = lines::preceding(
buf,
self.config.line_term.as_byte(),
self.config.max_context(),
self.config.before_context,
);
let consumed =
std::cmp::max(context_start, self.last_line_visited);
@@ -157,6 +212,18 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
consumed
}
pub(crate) fn advance_buffer(&mut self, buf: &[u8], consumed: usize) {
if consumed == 0 {
return;
}
self.count_lines(buf, consumed);
self.absolute_byte_offset += consumed as u64;
self.last_line_counted = 0;
self.last_line_visited =
self.last_line_visited.saturating_sub(consumed);
self.set_pos(self.pos().saturating_sub(consumed));
}
pub(crate) fn detect_binary(
&mut self,
buf: &[u8],
@@ -226,6 +293,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
if self.after_context_left == 0 {
return Ok(true);
}
let exceeded_match_limit = self.has_exceeded_match_limit();
let range = Range::new(self.last_line_visited, upto);
let mut stepper = LineStep::new(
self.config.line_term.as_byte(),
@@ -233,7 +301,16 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
range.end(),
);
while let Some(line) = stepper.next_match(buf) {
if !self.sink_after_context(buf, &line)? {
if exceeded_match_limit
&& self.is_match(&buf[line])? != self.config.invert_match
{
let after_context_left = self.after_context_left;
self.set_pos(line.end());
if !self.sink_matched(buf, &line)? {
return Ok(false);
}
self.after_context_left = after_context_left - 1;
} else if !self.sink_after_context(buf, &line)? {
return Ok(false);
}
if self.after_context_left == 0 {
@@ -272,6 +349,12 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
range.end(),
);
while let Some(line) = stepper.next_match(buf) {
if self.has_exceeded_match_limit()
&& !self.config.passthru
&& self.after_context_left == 0
{
return Ok(false);
}
let matched = {
// Stripping the line terminator is necessary to prevent some
// classes of regexes from matching the empty position *after*
@@ -281,15 +364,14 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
&buf[line],
self.config.line_term,
);
match self.matcher.shortest_match(slice) {
Err(err) => return Err(S::Error::error_message(err)),
Ok(result) => result.is_some(),
}
self.shortest_match(slice)?.is_some()
};
self.set_pos(line.end());
let success = matched != self.config.invert_match;
if success {
self.has_matched = true;
self.increment_count();
if !self.before_context_by_line(buf, line.start())? {
return Ok(false);
}
@@ -325,10 +407,11 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
}
if self.config.invert_match {
if !self.match_by_line_fast_invert(buf)? {
return Ok(Stop);
break;
}
} else if let Some(line) = self.find_by_line_fast(buf)? {
self.has_matched = true;
self.increment_count();
if self.config.max_context() > 0 {
if !self.after_context_by_line(buf, line.start())? {
return Ok(Stop);
@@ -348,6 +431,9 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
if !self.after_context_by_line(buf, buf.len())? {
return Ok(Stop);
}
if self.has_exceeded_match_limit() && self.after_context_left == 0 {
return Ok(Stop);
}
self.set_pos(buf.len());
Ok(Continue)
}
@@ -387,16 +473,20 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
invert_match.end(),
);
while let Some(line) = stepper.next_match(buf) {
self.increment_count();
if !self.sink_matched(buf, &line)? {
return Ok(false);
}
if self.has_exceeded_match_limit() {
return Ok(false);
}
}
Ok(true)
}
#[inline(always)]
fn find_by_line_fast(
&self,
&mut self,
buf: &[u8],
) -> Result<Option<Range>, S::Error> {
debug_assert!(!self.searcher.multi_line_with_matcher(&self.matcher));
@@ -404,6 +494,9 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
let mut pos = self.pos();
while !buf[pos..].is_empty() {
if self.has_exceeded_match_limit() {
return Ok(None);
}
match self.matcher.find_candidate_line(&buf[pos..]) {
Err(err) => return Err(S::Error::error_message(err)),
Ok(None) => return Ok(None),
@@ -427,23 +520,10 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
self.config.line_term.as_byte(),
Range::zero(i).offset(pos),
);
// We need to strip the line terminator here to match the
// semantics of line-by-line searching. Namely, regexes
// like `(?m)^$` can match at the final position beyond a
// line terminator, which is non-sensical in line oriented
// matching.
let slice = lines::without_terminator(
&buf[line],
self.config.line_term,
);
match self.matcher.is_match(slice) {
Err(err) => return Err(S::Error::error_message(err)),
Ok(true) => return Ok(Some(line)),
Ok(false) => {
pos = line.end();
continue;
}
if self.is_match(&buf[line])? {
return Ok(Some(line));
}
pos = line.end();
}
}
}
@@ -638,4 +718,8 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
}
false
}
fn has_exceeded_match_limit(&self) -> bool {
self.config.max_matches.map_or(false, |limit| self.count() >= limit)
}
}

View File

@@ -1,9 +1,11 @@
use grep_matcher::Matcher;
use std::collections::VecDeque;
use crate::{
line_buffer::{LineBufferReader, DEFAULT_BUFFER_CAPACITY},
line_buffer::{DEFAULT_BUFFER_CAPACITY, LineBufferReader, alloc_error},
lines::{self, LineStep},
searcher::{core::Core, Config, Range, Searcher},
searcher::{Config, Range, Searcher, core::Core},
sink::{Sink, SinkError},
};
@@ -37,7 +39,11 @@ where
pub(crate) fn run(mut self) -> Result<(), S::Error> {
if self.core.begin()? {
while self.fill()? && self.core.match_by_line(self.rdr.buffer())? {
while self.fill()? {
if !self.core.match_by_line(self.rdr.buffer())? {
self.consume_remaining();
break;
}
}
}
self.core.finish(
@@ -46,6 +52,11 @@ where
)
}
fn consume_remaining(&mut self) {
let consumed = self.core.pos();
self.rdr.consume(consumed);
}
fn fill(&mut self) -> Result<bool, S::Error> {
assert!(self.rdr.buffer()[self.core.pos()..].is_empty());
@@ -129,6 +140,348 @@ impl<'s, M: Matcher, S: Sink> SliceByLine<'s, M, S> {
}
}
#[derive(Debug)]
pub(crate) struct WindowedMultiLine<'s, M, S> {
config: &'s Config,
core: Core<'s, M, S>,
window_lines: usize,
buf: Vec<u8>,
buf_start: usize,
line_lens: VecDeque<usize>,
abs_start: u64,
current_index: usize,
eof: bool,
}
impl<'s, M: Matcher, S: Sink> WindowedMultiLine<'s, M, S> {
pub(crate) fn new(
searcher: &'s Searcher,
matcher: M,
window_lines: usize,
write_to: S,
) -> WindowedMultiLine<'s, M, S> {
debug_assert!(searcher.multi_line_with_matcher(&matcher));
debug_assert!(window_lines > 0);
WindowedMultiLine {
config: &searcher.config,
core: Core::new(searcher, matcher, write_to, true),
window_lines,
buf: Vec::new(),
buf_start: 0,
line_lens: VecDeque::new(),
abs_start: 0,
current_index: 0,
eof: false,
}
}
pub(crate) fn run_reader<R: std::io::Read>(
mut self,
mut rdr: LineBufferReader<'s, R>,
) -> Result<(), S::Error> {
if self.core.begin()? {
let mut already_binary = rdr.binary_byte_offset().is_some();
while self.fill_reader(&mut rdr, &mut already_binary)?
|| !self.line_lens.is_empty()
{
if !self.process_current_line()? {
break;
}
}
}
let byte_count = self.byte_count();
let binary_byte_offset = self.core.binary_byte_offset();
self.core.finish(byte_count, binary_byte_offset)
}
pub(crate) fn run_slice(mut self, slice: &'s [u8]) -> Result<(), S::Error> {
if self.core.begin()? {
let binary_upto =
std::cmp::min(slice.len(), DEFAULT_BUFFER_CAPACITY);
let binary_range = Range::new(0, binary_upto);
if !self.core.detect_binary(slice, &binary_range)? {
let mut stepper = LineStep::new(
self.config.line_term.as_byte(),
0,
slice.len(),
);
while let Some(line) = stepper.next_match(slice) {
self.push_line(&slice[line])?;
}
self.eof = true;
while !self.line_lens.is_empty() {
if !self.process_current_line()? {
break;
}
}
}
}
let byte_count = self.byte_count();
let binary_byte_offset = self.core.binary_byte_offset();
self.core.finish(byte_count, binary_byte_offset)
}
fn fill_reader<R: std::io::Read>(
&mut self,
rdr: &mut LineBufferReader<'s, R>,
already_binary: &mut bool,
) -> Result<bool, S::Error> {
while !self.eof
&& self.line_lens.len() < self.current_index + self.window_lines
{
let didread = match rdr.fill() {
Err(err) => return Err(S::Error::error_io(err)),
Ok(didread) => didread,
};
if !*already_binary {
if let Some(offset) = rdr.binary_byte_offset() {
*already_binary = true;
if !self.core.binary_data(offset)? {
self.eof = true;
return Ok(false);
}
}
}
if !didread {
self.eof = true;
break;
}
let buf = rdr.buffer();
let mut stepper = LineStep::new(
self.config.line_term.as_byte(),
0,
buf.len(),
);
while let Some(line) = stepper.next_match(buf) {
let bytes = &buf[line];
self.push_line(bytes)?;
}
rdr.consume(buf.len());
}
Ok(!self.eof)
}
fn push_line(&mut self, line: &[u8]) -> Result<(), S::Error> {
self.buf.extend_from_slice(line);
self.line_lens.push_back(line.len());
if let Some(limit) = self.config.heap_limit {
let used = self.buf.len() - self.buf_start;
if used > limit {
return Err(S::Error::error_io(alloc_error(limit)));
}
}
Ok(())
}
fn process_current_line(&mut self) -> Result<bool, S::Error> {
if self.current_index >= self.line_lens.len() {
return Ok(false);
}
let window_end =
std::cmp::min(self.line_lens.len(), self.current_index + self.window_lines);
let window_start_off = self.line_offset(self.current_index);
let window_end_off = self.line_offset(window_end);
let line0_len = self.line_lens[self.current_index];
{
let buffer = &self.buf[self.buf_start..];
let window_bytes =
&self.buf[self.buf_start + window_start_off
..self.buf_start + window_end_off];
if self.config.invert_match {
if !sink_inverted_line(
&mut self.core,
self.config,
buffer,
window_bytes,
window_start_off,
line0_len,
)? {
return Ok(false);
}
} else if !sink_matched_line(
&mut self.core,
self.config,
buffer,
window_bytes,
window_start_off,
line0_len,
)? {
return Ok(false);
}
let drop_upto = window_start_off + line0_len;
if self.config.passthru {
if !self.core.other_context_by_line(buffer, drop_upto)? {
return Ok(false);
}
} else if !self.core.after_context_by_line(buffer, drop_upto)? {
return Ok(false);
}
}
self.current_index += 1;
if self.current_index > self.config.before_context {
let drop_len = self.line_lens.pop_front().unwrap();
self.shift_buffer(drop_len);
self.current_index -= 1;
}
if self.eof && self.current_index >= self.line_lens.len() {
let buffer = &self.buf[self.buf_start..];
if self.config.passthru {
if !self.core.other_context_by_line(buffer, buffer.len())? {
return Ok(false);
}
} else if !self.core.after_context_by_line(buffer, buffer.len())? {
return Ok(false);
}
return Ok(false);
}
Ok(true)
}
fn line_offset(&self, idx: usize) -> usize {
self.line_lens.iter().take(idx).sum()
}
fn shift_buffer(&mut self, consumed: usize) {
let buffer = &self.buf[self.buf_start..];
self.core.advance_buffer(buffer, consumed);
self.buf_start += consumed;
self.abs_start += consumed as u64;
if self.buf_start > 0 && self.buf_start > self.buf.len() / 2 {
self.buf.copy_within(self.buf_start.., 0);
let new_len = self.buf.len() - self.buf_start;
self.buf.truncate(new_len);
self.buf_start = 0;
}
}
fn byte_count(&mut self) -> u64 {
match self.core.binary_byte_offset() {
Some(offset) if offset < self.core.pos() as u64 => offset,
_ => self.abs_start + (self.buf.len() - self.buf_start) as u64,
}
}
}
fn sink_matched_line<M: Matcher, S: Sink>(
core: &mut Core<'_, M, S>,
config: &Config,
buffer: &[u8],
window_bytes: &[u8],
window_start_off: usize,
line0_len: usize,
) -> Result<bool, S::Error> {
let mut pos = 0;
let mut last_match: Option<Range> = None;
while let Some(mat) = find_in_window(core, window_bytes, pos)? {
if mat.start() >= line0_len {
break;
}
let line = lines::locate(
window_bytes,
config.line_term.as_byte(),
mat,
)
.offset(window_start_off);
match last_match.take() {
None => {
last_match = Some(line);
}
Some(last) => {
if last.end() >= line.start() {
last_match = Some(last.with_end(line.end()));
} else {
if !sink_context(core, config, buffer, &last)? {
return Ok(false);
}
if !core.matched(buffer, &last)? {
return Ok(false);
}
last_match = Some(line);
}
}
}
pos = mat.end();
if mat.is_empty() && pos < window_bytes.len() {
pos += 1;
}
}
if let Some(last) = last_match.take() {
if !sink_context(core, config, buffer, &last)? {
return Ok(false);
}
if !core.matched(buffer, &last)? {
return Ok(false);
}
}
Ok(true)
}
fn sink_inverted_line<M: Matcher, S: Sink>(
core: &mut Core<'_, M, S>,
config: &Config,
buffer: &[u8],
window_bytes: &[u8],
window_start_off: usize,
line0_len: usize,
) -> Result<bool, S::Error> {
let mut pos = 0;
while let Some(mat) = find_in_window(core, window_bytes, pos)? {
if mat.start() >= line0_len {
break;
}
if mat.start() < line0_len {
return Ok(true);
}
pos = mat.end();
if mat.is_empty() && pos < window_bytes.len() {
pos += 1;
}
}
let line = Range::new(window_start_off, window_start_off + line0_len);
if !sink_context(core, config, buffer, &line)? {
return Ok(false);
}
if !core.matched(buffer, &line)? {
return Ok(false);
}
Ok(true)
}
fn find_in_window<M: Matcher, S: Sink>(
core: &mut Core<'_, M, S>,
window_bytes: &[u8],
pos: usize,
) -> Result<Option<Range>, S::Error> {
core.find(&window_bytes[pos..])
.map(|m| m.map(|m| m.offset(pos)))
}
fn sink_context<M: Matcher, S: Sink>(
core: &mut Core<'_, M, S>,
config: &Config,
buffer: &[u8],
range: &Range,
) -> Result<bool, S::Error> {
if config.passthru {
if !core.other_context_by_line(buffer, range.start())? {
return Ok(false);
}
} else {
if !core.after_context_by_line(buffer, range.start())? {
return Ok(false);
}
if !core.before_context_by_line(buffer, range.start())? {
return Ok(false);
}
}
Ok(true)
}
#[derive(Debug)]
pub(crate) struct MultiLine<'s, M, S> {
config: &'s Config,
@@ -316,11 +669,9 @@ impl<'s, M: Matcher, S: Sink> MultiLine<'s, M, S> {
}
fn find(&mut self) -> Result<Option<Range>, S::Error> {
match self.core.matcher().find(&self.slice[self.core.pos()..]) {
Err(err) => Err(S::Error::error_message(err)),
Ok(None) => Ok(None),
Ok(Some(m)) => Ok(Some(m.offset(self.core.pos()))),
}
self.core
.find(&self.slice[self.core.pos()..])
.map(|m| m.map(|m| m.offset(self.core.pos())))
}
/// Advance the search position based on the previous match.
@@ -511,6 +862,37 @@ byte count:366
.test();
}
#[test]
fn multi_line_window_limits_match() {
let haystack = "a\nb\nc\nd\n";
let matcher = RegexMatcher::new("a\nb\nc");
let mut builder = SearcherBuilder::new();
builder.multi_line(true).multiline_window(Some(2)).line_number(false);
let mut sink = KitchenSink::new();
let mut searcher = builder.build();
searcher
.search_slice(&matcher, haystack.as_bytes(), &mut sink)
.unwrap();
let got = String::from_utf8(sink.as_bytes().to_vec()).unwrap();
let exp = format!("\nbyte count:{}\n", haystack.len());
assert_eq!(exp, got);
let mut builder = SearcherBuilder::new();
builder.multi_line(true).multiline_window(Some(3)).line_number(false);
let mut sink = KitchenSink::new();
let mut searcher = builder.build();
searcher
.search_slice(&matcher, haystack.as_bytes(), &mut sink)
.unwrap();
let exp = format!(
"0:a\n2:b\n4:c\n\nbyte count:{}\n",
haystack.len()
);
let got = String::from_utf8(sink.as_bytes().to_vec()).unwrap();
assert_eq!(exp, got);
}
#[test]
fn multi_line_overlap2() {
let haystack = "xxx\nabc\ndefabc\ndefxxx\nxxx";

View File

@@ -13,10 +13,10 @@ use {
use crate::{
line_buffer::{
self, alloc_error, BufferAllocation, LineBuffer, LineBufferBuilder,
LineBufferReader, DEFAULT_BUFFER_CAPACITY,
self, BufferAllocation, DEFAULT_BUFFER_CAPACITY, LineBuffer,
LineBufferBuilder, LineBufferReader, alloc_error,
},
searcher::glue::{MultiLine, ReadByLine, SliceByLine},
searcher::glue::{MultiLine, ReadByLine, SliceByLine, WindowedMultiLine},
sink::{Sink, SinkError},
};
@@ -172,6 +172,8 @@ pub struct Config {
binary: BinaryDetection,
/// Whether to enable matching across multiple lines.
multi_line: bool,
/// The maximum number of lines a multi-line match may span.
multiline_window: Option<usize>,
/// An encoding that, when present, causes the searcher to transcode all
/// input from the encoding to UTF-8.
encoding: Option<Encoding>,
@@ -180,6 +182,8 @@ pub struct Config {
/// Whether to stop searching when a non-matching line is found after a
/// matching line.
stop_on_nonmatch: bool,
/// The maximum number of matches this searcher should emit.
max_matches: Option<u64>,
}
impl Default for Config {
@@ -195,9 +199,11 @@ impl Default for Config {
mmap: MmapChoice::default(),
binary: BinaryDetection::default(),
multi_line: false,
multiline_window: None,
encoding: None,
bom_sniffing: true,
stop_on_nonmatch: false,
max_matches: None,
}
}
}
@@ -387,6 +393,15 @@ impl SearcherBuilder {
self
}
/// Limit multi-line matches to a window of at most `line_count` lines.
pub fn multiline_window(
&mut self,
line_count: Option<usize>,
) -> &mut SearcherBuilder {
self.config.multiline_window = line_count;
self
}
/// Whether to include a fixed number of lines after every match.
///
/// When this is set to a non-zero number, then the searcher will report
@@ -564,6 +579,23 @@ impl SearcherBuilder {
self.config.stop_on_nonmatch = stop_on_nonmatch;
self
}
/// Sets the maximum number of matches that should be emitted by this
/// searcher.
///
/// If multi line search is enabled and a match spans multiple lines, then
/// that match is counted exactly once for the purposes of enforcing this
/// limit, regardless of how many lines it spans.
///
/// Note that `0` is a legal value. This will cause the searcher to
/// immediately quick without searching anything.
///
/// By default, no limit is set.
#[inline]
pub fn max_matches(&mut self, limit: Option<u64>) -> &mut SearcherBuilder {
self.config.max_matches = limit;
self
}
}
/// A searcher executes searches over a haystack and writes results to a caller
@@ -674,6 +706,13 @@ impl Searcher {
// enabled. This pre-allocates a buffer roughly the size of the file,
// which isn't possible when searching an arbitrary std::io::Read.
if self.multi_line_with_matcher(&matcher) {
if self.config.multiline_window.is_some() {
log::trace!(
"{:?}: searching via windowed multiline strategy",
path
);
return self.search_reader(matcher, file, write_to);
}
log::trace!(
"{:?}: reading entire file on to heap for mulitline",
path
@@ -724,6 +763,18 @@ impl Searcher {
.map_err(S::Error::error_io)?;
if self.multi_line_with_matcher(&matcher) {
if let Some(window_lines) = self.config.multiline_window {
let mut line_buffer = self.line_buffer.borrow_mut();
let rdr = LineBufferReader::new(decoder, &mut *line_buffer);
log::trace!("generic reader: searching via windowed multiline");
return WindowedMultiLine::new(
self,
matcher,
window_lines,
write_to,
)
.run_reader(rdr);
}
log::trace!(
"generic reader: reading everything to heap for multiline"
);
@@ -766,6 +817,16 @@ impl Searcher {
return self.search_reader(matcher, slice, write_to);
}
if self.multi_line_with_matcher(&matcher) {
if let Some(window_lines) = self.config.multiline_window {
log::trace!("slice reader: searching via windowed multiline");
return WindowedMultiLine::new(
self,
matcher,
window_lines,
write_to,
)
.run_slice(slice);
}
log::trace!("slice reader: searching via multiline strategy");
MultiLine::new(self, matcher, slice, write_to).run()
} else {
@@ -845,13 +906,33 @@ impl Searcher {
self.config.multi_line
}
/// Returns true if and only if this searcher is configured to stop when in
/// Returns the maximum number of lines a multi-line match may span.
#[inline]
pub fn multiline_window(&self) -> Option<usize> {
self.config.multiline_window
}
/// Returns true if and only if this searcher is configured to stop when it
/// finds a non-matching line after a matching one.
#[inline]
pub fn stop_on_nonmatch(&self) -> bool {
self.config.stop_on_nonmatch
}
/// Returns the maximum number of matches emitted by this searcher, if
/// such a limit was set.
///
/// If multi line search is enabled and a match spans multiple lines, then
/// that match is counted exactly once for the purposes of enforcing this
/// limit, regardless of how many lines it spans.
///
/// Note that `0` is a legal value. This will cause the searcher to
/// immediately quick without searching anything.
#[inline]
pub fn max_matches(&self) -> Option<u64> {
self.config.max_matches
}
/// Returns true if and only if this searcher will choose a multi-line
/// strategy given the provided matcher.
///
@@ -1004,6 +1085,7 @@ fn slice_has_bom(slice: &[u8]) -> bool {
None => return false,
Some((enc, _)) => enc,
};
log::trace!("found byte-order mark (BOM) for encoding {enc:?}");
[encoding_rs::UTF_16LE, encoding_rs::UTF_16BE, encoding_rs::UTF_8]
.contains(&enc)
}

4
fuzz/.gitignore vendored Normal file
View File

@@ -0,0 +1,4 @@
target
corpus
artifacts
coverage

188
fuzz/Cargo.lock generated Normal file
View File

@@ -0,0 +1,188 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 4
[[package]]
name = "aho-corasick"
version = "1.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0"
dependencies = [
"memchr",
]
[[package]]
name = "arbitrary"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7d5a26814d8dcb93b0e5a0ff3c6d80a8843bafb21b39e8e18a6f05471870e110"
dependencies = [
"derive_arbitrary",
]
[[package]]
name = "bstr"
version = "1.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c48f0051a4b4c5e0b6d365cd04af53aeaa209e3cc15ec2cdb69e73cc87fbd0dc"
dependencies = [
"memchr",
"serde",
]
[[package]]
name = "cc"
version = "1.0.83"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0"
dependencies = [
"jobserver",
"libc",
]
[[package]]
name = "derive_arbitrary"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67e77553c4162a157adbf834ebae5b415acbecbeafc7a74b0e886657506a7611"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "fuzz"
version = "0.0.1"
dependencies = [
"globset",
"libfuzzer-sys",
]
[[package]]
name = "globset"
version = "0.4.16"
dependencies = [
"aho-corasick",
"arbitrary",
"bstr",
"log",
"regex-automata",
"regex-syntax",
]
[[package]]
name = "jobserver"
version = "0.1.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8c37f63953c4c63420ed5fd3d6d398c719489b9f872b9fa683262f8edd363c7d"
dependencies = [
"libc",
]
[[package]]
name = "libc"
version = "0.2.152"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13e3bf6590cbc649f4d1a3eefc9d5d6eb746f5200ffb04e5e142700b8faa56e7"
[[package]]
name = "libfuzzer-sys"
version = "0.4.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a96cfd5557eb82f2b83fed4955246c988d331975a002961b07c81584d107e7f7"
dependencies = [
"arbitrary",
"cc",
"once_cell",
]
[[package]]
name = "log"
version = "0.4.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f"
[[package]]
name = "memchr"
version = "2.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149"
[[package]]
name = "once_cell"
version = "1.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
[[package]]
name = "proc-macro2"
version = "1.0.78"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef"
dependencies = [
"proc-macro2",
]
[[package]]
name = "regex-automata"
version = "0.4.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3b7fa1134405e2ec9353fd416b17f8dacd46c473d7d3fd1cf202706a14eb792a"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f"
[[package]]
name = "serde"
version = "1.0.195"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "63261df402c67811e9ac6def069e4786148c4563f4b50fd4bf30aa370d626b02"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.195"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "46fe8f8603d81ba86327b23a2e9cdf49e1255fb94a4c5f297f6ee0547178ea2c"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "syn"
version = "2.0.48"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0f3531638e407dfc0814761abb7c00a5b54992b849452a0646b7f65c9f770f3f"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "unicode-ident"
version = "1.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"

25
fuzz/Cargo.toml Normal file
View File

@@ -0,0 +1,25 @@
[package]
name = "fuzz"
version = "0.0.1"
publish = false
edition = "2024"
[package.metadata]
cargo-fuzz = true
[dependencies]
libfuzzer-sys = "0.4"
globset = { path = "../crates/globset", features = ["arbitrary"] }
# Prevent this from interfering with workspaces
[workspace]
members = ["."]
[profile.release]
debug = 1
[[bin]]
name = "fuzz_glob"
path = "fuzz_targets/fuzz_glob.rs"
test = false
doc = false

52
fuzz/README.md Normal file
View File

@@ -0,0 +1,52 @@
# Fuzz Testing
## Introduction
Fuzz testing produces pseudo-random / arbitrary data that is used to find
stability issues within a code base. While Rust provides a strong type system,
this does not guarantee that an object will convert properly from one struct
to another. It is the responsibility of the developer to ensure that a struct
is converted properly. Fuzz testing will generate input within the domain of
each property. This arbitrary data can then be used to convert from ObjectA
to ObjectB and then back. This type of testing will help catch bugs that the
type system is not able to see.
## Installation
This crate relies on the `cargo-fuzz` component. To install this component,
run the following from the `fuzz` directory:
```bash
cargo install cargo-fuzz
```
## Listing Targets
Once installed, fuzz targets can be listed by running the following command:
```bash
cargo fuzz list
```
This command will print out a list of all targets that can be tested.
## Running Fuzz Tests
To run a fuzz test, the target must be specified:
```bash
cargo fuzz run <target>
```
Note that the above will run the fuzz test indefinitely. Use the
`-max_total_time=<num seconds>` flag to specify how many seconds the test
should run for:
```bash
cargo fuzz run <target> -- -max_total_time=5
```
The above command will run the fuzz test for five seconds. If the test
completes without error it will show how many tests were run successfully.
The test will abort and return a non-zero error code if it is able to produce
an error. The arbitrary input will be displayed in the event of a failure.

View File

@@ -0,0 +1,22 @@
#![no_main]
use std::str::FromStr;
use globset::Glob;
libfuzzer_sys::fuzz_target!(|glob_str: &str| {
let Ok(glob) = Glob::new(glob_str) else {
return;
};
let Ok(glob2) = Glob::from_str(glob_str) else {
return;
};
// Verify that a `Glob` constructed with `new` is the same as a `Glob`` constructed
// with `from_str`.
assert_eq!(glob, glob2);
// Verify that `Glob::glob` produces the same string as the original.
assert_eq!(glob.glob(), glob_str);
});

View File

@@ -1,23 +1,23 @@
class RipgrepBin < Formula
version '14.1.0'
version '0.1.0'
desc "Recursively search directories for a regex pattern."
homepage "https://github.com/BurntSushi/ripgrep"
if OS.mac?
url "https://github.com/BurntSushi/ripgrep/releases/download/#{version}/ripgrep-#{version}-x86_64-apple-darwin.tar.gz"
sha256 "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
url "https://git.peisongxiao.com/peisongxiao/rgs/releases/download/#{version}/rgs-#{version}-x86_64-apple-darwin.tar.gz"
sha256 "64811cb24e77cac3057d6c40b63ac9becf9082eedd54ca411b475b755d334882"
elsif OS.linux?
url "https://github.com/BurntSushi/ripgrep/releases/download/#{version}/ripgrep-#{version}-x86_64-unknown-linux-musl.tar.gz"
sha256 "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
url "https://git.peisongxiao.com/peisongxiao/rgs/releases/download/#{version}/rgs-#{version}-x86_64-unknown-linux-musl.tar.gz"
sha256 "1c9297be4a084eea7ecaedf93eb03d058d6faae29bbc57ecdaf5063921491599"
end
conflicts_with "ripgrep"
def install
bin.install "rg"
man1.install "doc/rg.1"
bin.install "rgs"
man1.install "doc/rgs.1"
bash_completion.install "complete/rg.bash"
zsh_completion.install "complete/_rg"
bash_completion.install "complete/rgs.bash"
zsh_completion.install "complete/_rgs"
end
end

View File

@@ -1,2 +1,3 @@
max_width = 79
use_small_heuristics = "max"
edition = "2024"

View File

@@ -4,8 +4,8 @@ use crate::util::{Dir, TestCommand};
// handling of binary files. There's quite a bit of discussion on this in this
// bug report: https://github.com/BurntSushi/ripgrep/issues/306
// Our haystack is the first 500 lines of Gutenberg's copy of "A Study in
// Scarlet," with a NUL byte at line 1898: `abcdef\x00`.
// Our haystack is the first 2,133 lines of Gutenberg's copy of "A Study in
// Scarlet," with a NUL byte at line 1870: `abcdef\x00`.
//
// The position and size of the haystack is, unfortunately, significant. In
// particular, the NUL byte is specifically inserted at some point *after* the
@@ -21,10 +21,135 @@ use crate::util::{Dir, TestCommand};
// detection with memory maps is a bit different. Namely, NUL bytes are only
// searched for in the first few KB of the file and in a match. Normally, NUL
// bytes are searched for everywhere.
//
// TODO: Add tests for binary file detection when using memory maps.
const HAY: &'static [u8] = include_bytes!("./data/sherlock-nul.txt");
// Tests for binary file detection when using memory maps.
// As noted in the original comments, with memory maps binary detection
// works differently - NUL bytes are only searched for in the first few KB
// of the file and in matches.
//
// Note that we don't run these on macOS, which has memory maps forcefully
// disabled because they suck so much.
// Test that matches in a binary file with memory maps work as expected
// with implicit file search (via glob pattern).
#[cfg(not(target_os = "macos"))]
rgtest!(mmap_match_implicit, |dir: Dir, mut cmd: TestCommand| {
dir.create_bytes("hay", HAY);
cmd.args(&["--mmap", "-n", "Project Gutenberg EBook", "-g", "hay"]);
// With mmap, we get a match and a warning about binary content
let expected = "\
hay:1:The Project Gutenberg EBook of A Study In Scarlet, by Arthur Conan Doyle
";
eqnice!(expected, cmd.stdout());
});
// Test with an explicit file argument when using memory maps.
#[cfg(not(target_os = "macos"))]
rgtest!(mmap_match_explicit, |dir: Dir, mut cmd: TestCommand| {
dir.create_bytes("hay", HAY);
cmd.args(&["--mmap", "-n", "Project Gutenberg EBook", "hay"]);
let expected = "\
1:The Project Gutenberg EBook of A Study In Scarlet, by Arthur Conan Doyle
";
eqnice!(expected, cmd.stdout());
});
// Test specifically with a pattern that matches near the NUL byte which should
// trigger binary detection with memory maps.
#[cfg(not(target_os = "macos"))]
rgtest!(mmap_match_near_nul, |dir: Dir, mut cmd: TestCommand| {
dir.create_bytes("hay", HAY);
// Pattern that matches around line 1898 where the NUL byte is.
// Note: Using direct file path instead of glob.
cmd.args(&["--mmap", "-n", "abcdef", "hay"]);
let expected = "\
binary file matches (found \"\\0\" byte around offset 77041)
";
eqnice!(expected, cmd.stdout());
});
// Test with --count option to ensure full file scanning works with mmap.
#[cfg(not(target_os = "macos"))]
rgtest!(mmap_match_count, |dir: Dir, mut cmd: TestCommand| {
dir.create_bytes("hay", HAY);
cmd.args(&["--mmap", "-c", "Project Gutenberg EBook|Heaven", "hay"]);
// With mmap, since we're counting all matches and might not
// encounter the NUL byte during initial detection, the count
// should still be reported.
eqnice!("2\n", cmd.stdout());
});
// Test binary detection with mmap when pattern would match before and after NUL
// byte.
#[cfg(not(target_os = "macos"))]
rgtest!(mmap_match_multiple, |dir: Dir, mut cmd: TestCommand| {
dir.create_bytes("hay", HAY);
// Use explicit file path.
cmd.args(&["--mmap", "-n", "Project Gutenberg EBook|Heaven", "hay"]);
// With explicit file and memory maps, matches before and after NUL byte
// are shown.
let expected = "\
1:The Project Gutenberg EBook of A Study In Scarlet, by Arthur Conan Doyle
1871:\"No. Heaven knows what the objects of his studies are. But here we
";
eqnice!(expected, cmd.stdout());
});
// Test that --binary flag can have odd results when searching with a memory
// map.
#[cfg(not(target_os = "macos"))]
rgtest!(mmap_binary_flag, |dir: Dir, mut cmd: TestCommand| {
dir.create_bytes("hay", HAY);
// Use glob pattern.
cmd.args(&["--mmap", "-n", "--binary", "Heaven", "-g", "hay"]);
let expected = "\
hay:1871:\"No. Heaven knows what the objects of his studies are. But here we
";
eqnice!(expected, cmd.stdout());
});
// Test that using -a/--text flag works as expected with mmap.
#[cfg(not(target_os = "macos"))]
rgtest!(mmap_text_flag, |dir: Dir, mut cmd: TestCommand| {
dir.create_bytes("hay", HAY);
cmd.args(&["--mmap", "-n", "--text", "Heaven", "-g", "hay"]);
// With --text flag, binary detection should be disabled.
let expected = "\
hay:1871:\"No. Heaven knows what the objects of his studies are. But here we
";
eqnice!(expected, cmd.stdout());
});
// Test pattern that matches before and after the NUL byte with memory maps.
#[cfg(not(target_os = "macos"))]
rgtest!(mmap_after_nul_match, |dir: Dir, mut cmd: TestCommand| {
dir.create_bytes("hay", HAY);
// Use explicit file path.
cmd.args(&["--mmap", "-n", "medical student", "hay"]);
// With explicit file and memory maps, all matches are shown
let expected = "\
176:\"A medical student, I suppose?\" said I.
409:\"A medical student, I suppose?\" said I.
642:\"A medical student, I suppose?\" said I.
875:\"A medical student, I suppose?\" said I.
1108:\"A medical student, I suppose?\" said I.
1341:\"A medical student, I suppose?\" said I.
1574:\"A medical student, I suppose?\" said I.
1807:\"A medical student, I suppose?\" said I.
1867:\"And yet you say he is not a medical student?\"
";
eqnice!(expected, cmd.stdout());
});
// This tests that ripgrep prints a warning message if it finds and prints a
// match in a binary file before detecting that it is a binary file. The point
// here is to notify that user that the search of the file is only partially
@@ -304,3 +429,40 @@ hay:1867:\"And yet you say he is not a medical student?\"
";
eqnice!(expected, cmd.stdout());
});
// See: https://github.com/BurntSushi/ripgrep/issues/3131
rgtest!(
matching_files_inconsistent_with_count,
|dir: Dir, _cmd: TestCommand| {
let mut file1 = String::new();
file1.push_str("cat here\n");
for _ in 0..150_000 {
file1.push_str("padding line\n");
}
file1.push_str("\x00");
dir.create("file1.txt", &file1);
dir.create("file2.txt", "cat here");
let got = dir.command().args(&["--sort=path", "-l", "cat"]).stdout();
eqnice!("file1.txt\nfile2.txt\n", got);
// This is the inconsistent result that can't really be avoided without
// either making `-l/--files-with-matches` much slower or changing
// what "binary filtering" means.
let got = dir.command().args(&["--sort=path", "-c", "cat"]).stdout();
eqnice!("file2.txt:1\n", got);
let got = dir
.command()
.args(&["--sort=path", "-c", "cat", "--binary"])
.stdout();
eqnice!("file1.txt:1\nfile2.txt:1\n", got);
let got = dir
.command()
.args(&["--sort=path", "-c", "cat", "--text"])
.stdout();
eqnice!("file1.txt:1\nfile2.txt:1\n", got);
}
);

View File

@@ -1,5 +1,5 @@
use crate::hay::{SHERLOCK, SHERLOCK_CRLF};
use crate::util::{sort_lines, Dir, TestCommand};
use crate::util::{Dir, TestCommand, sort_lines};
// See: https://github.com/BurntSushi/ripgrep/issues/1
rgtest!(f1_sjis, |dir: Dir, mut cmd: TestCommand| {
@@ -154,7 +154,7 @@ test
// parent ignore files and manually specified ignore files.
let mut cmd = dir.command();
cmd.args(&["--ignore-file", "../.not-an-ignore", "-l", "test"]);
cmd.current_dir(dir.path().join("baz"));
cmd.current_dir("baz");
let expected = "
baz/bar/test
test
@@ -943,18 +943,18 @@ rgtest!(f2361_sort_nested_files, |dir: Dir, mut cmd: TestCommand| {
return;
}
dir.create("foo", "1");
sleep(Duration::from_millis(100));
sleep(Duration::from_millis(200));
dir.create_dir("dir");
sleep(Duration::from_millis(100));
sleep(Duration::from_millis(200));
dir.create(dir.path().join("dir").join("bar"), "1");
cmd.arg("--sort").arg("accessed").arg("--files");
eqnice!("foo\ndir/bar\n", cmd.stdout());
dir.create("foo", "2");
sleep(Duration::from_millis(100));
sleep(Duration::from_millis(200));
dir.create(dir.path().join("dir").join("bar"), "2");
sleep(Duration::from_millis(100));
sleep(Duration::from_millis(200));
cmd.arg("--sort").arg("accessed").arg("--files");
eqnice!("foo\ndir/bar\n", cmd.stdout());

View File

@@ -21,45 +21,47 @@ impl Message {
fn unwrap_begin(&self) -> Begin {
match *self {
Message::Begin(ref x) => x.clone(),
ref x => panic!("expected Message::Begin but got {:?}", x),
ref x => panic!("expected Message::Begin but got {x:?}"),
}
}
fn unwrap_end(&self) -> End {
match *self {
Message::End(ref x) => x.clone(),
ref x => panic!("expected Message::End but got {:?}", x),
ref x => panic!("expected Message::End but got {x:?}"),
}
}
fn unwrap_match(&self) -> Match {
match *self {
Message::Match(ref x) => x.clone(),
ref x => panic!("expected Message::Match but got {:?}", x),
ref x => panic!("expected Message::Match but got {x:?}"),
}
}
fn unwrap_context(&self) -> Context {
match *self {
Message::Context(ref x) => x.clone(),
ref x => panic!("expected Message::Context but got {:?}", x),
ref x => panic!("expected Message::Context but got {x:?}"),
}
}
fn unwrap_summary(&self) -> Summary {
match *self {
Message::Summary(ref x) => x.clone(),
ref x => panic!("expected Message::Summary but got {:?}", x),
ref x => panic!("expected Message::Summary but got {x:?}"),
}
}
}
#[derive(Clone, Debug, Deserialize, PartialEq, Eq)]
#[serde(deny_unknown_fields)]
struct Begin {
path: Option<Data>,
}
#[derive(Clone, Debug, Deserialize, PartialEq, Eq)]
#[serde(deny_unknown_fields)]
struct End {
path: Option<Data>,
binary_offset: Option<u64>,
@@ -67,12 +69,14 @@ struct End {
}
#[derive(Clone, Debug, Deserialize, PartialEq, Eq)]
#[serde(deny_unknown_fields)]
struct Summary {
elapsed_total: Duration,
stats: Stats,
}
#[derive(Clone, Debug, Deserialize, PartialEq, Eq)]
#[serde(deny_unknown_fields)]
struct Match {
path: Option<Data>,
lines: Data,
@@ -82,6 +86,7 @@ struct Match {
}
#[derive(Clone, Debug, Deserialize, PartialEq, Eq)]
#[serde(deny_unknown_fields)]
struct Context {
path: Option<Data>,
lines: Data,
@@ -91,9 +96,11 @@ struct Context {
}
#[derive(Clone, Debug, Deserialize, PartialEq, Eq)]
#[serde(deny_unknown_fields)]
struct SubMatch {
#[serde(rename = "match")]
m: Data,
replacement: Option<Data>,
start: usize,
end: usize,
}
@@ -117,6 +124,7 @@ impl Data {
}
#[derive(Clone, Debug, Deserialize, PartialEq, Eq)]
#[serde(deny_unknown_fields)]
struct Stats {
elapsed: Duration,
searches: u64,
@@ -128,6 +136,7 @@ struct Stats {
}
#[derive(Clone, Debug, Deserialize, PartialEq, Eq)]
#[serde(deny_unknown_fields)]
struct Duration {
#[serde(flatten)]
duration: time::Duration,
@@ -178,6 +187,7 @@ rgtest!(basic, |dir: Dir, mut cmd: TestCommand| {
absolute_offset: 129,
submatches: vec![SubMatch {
m: Data::text("Sherlock Holmes"),
replacement: None,
start: 48,
end: 63,
},],
@@ -189,6 +199,57 @@ rgtest!(basic, |dir: Dir, mut cmd: TestCommand| {
assert_eq!(msgs[4].unwrap_summary().stats.bytes_printed, 494);
});
rgtest!(replacement, |dir: Dir, mut cmd: TestCommand| {
dir.create("sherlock", SHERLOCK);
cmd.arg("--json")
.arg("-B1")
.arg("Sherlock Holmes")
.args(["-r", "John Watson"])
.arg("sherlock");
let msgs = json_decode(&cmd.stdout());
assert_eq!(
msgs[0].unwrap_begin(),
Begin { path: Some(Data::text("sherlock")) }
);
assert_eq!(
msgs[1].unwrap_context(),
Context {
path: Some(Data::text("sherlock")),
lines: Data::text(
"Holmeses, success in the province of \
detective work must always\n",
),
line_number: Some(2),
absolute_offset: 65,
submatches: vec![],
}
);
assert_eq!(
msgs[2].unwrap_match(),
Match {
path: Some(Data::text("sherlock")),
lines: Data::text(
"be, to a very large extent, the result of luck. \
Sherlock Holmes\n",
),
line_number: Some(3),
absolute_offset: 129,
submatches: vec![SubMatch {
m: Data::text("Sherlock Holmes"),
replacement: Some(Data::text("John Watson")),
start: 48,
end: 63,
},],
}
);
assert_eq!(msgs[3].unwrap_end().path, Some(Data::text("sherlock")));
assert_eq!(msgs[3].unwrap_end().binary_offset, None);
assert_eq!(msgs[4].unwrap_summary().stats.searches_with_match, 1);
assert_eq!(msgs[4].unwrap_summary().stats.bytes_printed, 531);
});
rgtest!(quiet_stats, |dir: Dir, mut cmd: TestCommand| {
dir.create("sherlock", SHERLOCK);
cmd.arg("--json")
@@ -244,6 +305,7 @@ rgtest!(notutf8, |dir: Dir, mut cmd: TestCommand| {
absolute_offset: 0,
submatches: vec![SubMatch {
m: Data::bytes("/w=="),
replacement: None,
start: 4,
end: 5,
},],
@@ -285,6 +347,7 @@ rgtest!(notutf8_file, |dir: Dir, mut cmd: TestCommand| {
absolute_offset: 0,
submatches: vec![SubMatch {
m: Data::bytes("/w=="),
replacement: None,
start: 4,
end: 5,
},],
@@ -305,7 +368,12 @@ rgtest!(crlf, |dir: Dir, mut cmd: TestCommand| {
assert_eq!(
msgs[1].unwrap_match().submatches[0].clone(),
SubMatch { m: Data::text("Sherlock"), start: 56, end: 64 },
SubMatch {
m: Data::text("Sherlock"),
replacement: None,
start: 56,
end: 64
},
);
});

View File

@@ -1,5 +1,5 @@
use crate::hay::SHERLOCK;
use crate::util::{cmd_exists, sort_lines, Dir, TestCommand};
use crate::util::{Dir, TestCommand, cmd_exists, sort_lines};
// This file contains "miscellaneous" tests that were either written before
// features were tracked more explicitly, or were simply written without
@@ -627,7 +627,7 @@ rgtest!(ignore_git_parent, |dir: Dir, mut cmd: TestCommand| {
// Even though we search in foo/, which has no .gitignore, ripgrep will
// traverse parent directories and respect the gitignore files found.
cmd.current_dir(dir.path().join("foo"));
cmd.current_dir("foo");
cmd.assert_err();
});
@@ -651,7 +651,7 @@ rgtest!(ignore_git_parent_stop, |dir: Dir, mut cmd: TestCommand| {
dir.create_dir("foo/bar");
dir.create("foo/bar/sherlock", SHERLOCK);
cmd.arg("Sherlock");
cmd.current_dir(dir.path().join("foo").join("bar"));
cmd.current_dir("foo/bar");
let expected = "\
sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
@@ -682,7 +682,7 @@ rgtest!(ignore_git_parent_stop_file, |dir: Dir, mut cmd: TestCommand| {
dir.create_dir("foo/bar");
dir.create("foo/bar/sherlock", SHERLOCK);
cmd.arg("Sherlock");
cmd.current_dir(dir.path().join("foo").join("bar"));
cmd.current_dir("foo/bar");
let expected = "\
sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
@@ -700,7 +700,7 @@ rgtest!(ignore_ripgrep_parent_no_stop, |dir: Dir, mut cmd: TestCommand| {
dir.create_dir("foo/bar");
dir.create("foo/bar/sherlock", SHERLOCK);
cmd.arg("Sherlock");
cmd.current_dir(dir.path().join("foo").join("bar"));
cmd.current_dir("foo/bar");
// The top-level .rgignore applies.
cmd.assert_err();
@@ -733,7 +733,7 @@ rgtest!(no_parent_ignore_git, |dir: Dir, mut cmd: TestCommand| {
dir.create("foo/sherlock", SHERLOCK);
dir.create("foo/watson", SHERLOCK);
cmd.arg("--no-ignore-parent").arg("Sherlock");
cmd.current_dir(dir.path().join("foo"));
cmd.current_dir("foo");
let expected = "\
sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
@@ -749,7 +749,7 @@ rgtest!(symlink_nofollow, |dir: Dir, mut cmd: TestCommand| {
dir.create_dir("foo/baz");
dir.create("foo/baz/sherlock", SHERLOCK);
cmd.arg("Sherlock");
cmd.current_dir(dir.path().join("foo/bar"));
cmd.current_dir("foo/bar");
cmd.assert_err();
});
@@ -762,7 +762,7 @@ rgtest!(symlink_follow, |dir: Dir, mut cmd: TestCommand| {
dir.create("foo/baz/sherlock", SHERLOCK);
dir.link_dir("foo/baz", "foo/bar/baz");
cmd.arg("-L").arg("Sherlock");
cmd.current_dir(dir.path().join("foo/bar"));
cmd.current_dir("foo/bar");
let expected = "\
baz/sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
@@ -922,6 +922,8 @@ be, to a very large extent, the result of luck. Sherlock Holmes
eqnice!(expected, cmd.stdout());
});
// lz4 decompression tool doesn't work under RISC-V QEMU emulation in CI
#[cfg(not(target_arch = "riscv64"))]
rgtest!(compressed_lz4, |dir: Dir, mut cmd: TestCommand| {
if !cmd_exists("lz4") {
return;
@@ -952,6 +954,8 @@ be, to a very large extent, the result of luck. Sherlock Holmes
eqnice!(expected, cmd.stdout());
});
// brotli decompression tool doesn't work under RISC-V QEMU emulation in CI
#[cfg(not(target_arch = "riscv64"))]
rgtest!(compressed_brotli, |dir: Dir, mut cmd: TestCommand| {
if !cmd_exists("brotli") {
return;
@@ -967,6 +971,8 @@ be, to a very large extent, the result of luck. Sherlock Holmes
eqnice!(expected, cmd.stdout());
});
// zstd decompression tool doesn't work under RISC-V QEMU emulation in CI
#[cfg(not(target_arch = "riscv64"))]
rgtest!(compressed_zstd, |dir: Dir, mut cmd: TestCommand| {
if !cmd_exists("zstd") {
return;
@@ -1092,16 +1098,22 @@ rgtest!(type_list, |_: Dir, mut cmd: TestCommand| {
// This order is important when sorting them by system time-stamps.
fn sort_setup(dir: Dir) {
use std::{thread::sleep, time::Duration};
// As reported in https://github.com/BurntSushi/ripgrep/issues/3071
// this test fails if sufficient delay is not given on Windows/Aarch64.
let delay = if cfg!(all(windows, target_arch = "aarch64")) {
Duration::from_millis(1000)
} else {
Duration::from_millis(100)
};
let sub_dir = dir.path().join("dir");
dir.create("a", "test");
sleep(Duration::from_millis(100));
sleep(delay);
dir.create_dir(&sub_dir);
sleep(Duration::from_millis(100));
sleep(delay);
dir.create(sub_dir.join("c"), "test");
sleep(Duration::from_millis(100));
sleep(delay);
dir.create("b", "test");
sleep(Duration::from_millis(100));
sleep(delay);
dir.create(sub_dir.join("d"), "test");
}

View File

@@ -1,5 +1,5 @@
use crate::hay::SHERLOCK;
use crate::util::{sort_lines, Dir, TestCommand};
use crate::util::{Dir, TestCommand, sort_lines};
// See: https://github.com/BurntSushi/ripgrep/issues/16
rgtest!(r16, |dir: Dir, mut cmd: TestCommand| {
@@ -23,7 +23,7 @@ rgtest!(r25, |dir: Dir, mut cmd: TestCommand| {
cmd.arg("test");
eqnice!("src/llvm/foo:test\n", cmd.stdout());
cmd.current_dir(dir.path().join("src"));
cmd.current_dir("src");
eqnice!("llvm/foo:test\n", cmd.stdout());
});
@@ -244,7 +244,7 @@ rgtest!(r184, |dir: Dir, mut cmd: TestCommand| {
cmd.arg("test");
eqnice!("foo/bar/baz:test\n", cmd.stdout());
cmd.current_dir(dir.path().join("./foo/bar"));
cmd.current_dir("./foo/bar");
eqnice!("baz:test\n", cmd.stdout());
});
@@ -404,7 +404,7 @@ rgtest!(r428_unrecognized_style, |dir: Dir, mut cmd: TestCommand| {
let expected = "\
rg: error parsing flag --colors: \
unrecognized style attribute ''. Choose from: nobold, bold, nointense, \
intense, nounderline, underline.
intense, nounderline, underline, noitalic, italic.
";
eqnice!(expected, stderr);
});
@@ -569,6 +569,197 @@ rgtest!(r807, |dir: Dir, mut cmd: TestCommand| {
eqnice!(".a/c/file:test\n", cmd.arg("--hidden").arg("test").stdout());
});
// See: https://github.com/BurntSushi/ripgrep/pull/2711
//
// Note that this isn't a regression test. In particular, this didn't fail
// with ripgrep 14.1.1. I couldn't figure out how to turn what the OP gave me
// into a failing test.
rgtest!(r2711, |dir: Dir, _cmd: TestCommand| {
dir.create_dir("a/b");
dir.create("a/.ignore", ".foo");
dir.create("a/b/.foo", "");
{
let mut cmd = dir.command();
eqnice!("a/.ignore\n", cmd.arg("--hidden").arg("--files").stdout());
}
{
let mut cmd = dir.command();
eqnice!(
"./a/.ignore\n",
cmd.arg("--hidden").arg("--files").arg("./").stdout()
);
}
{
let mut cmd = dir.command();
eqnice!(
"a/.ignore\n",
cmd.arg("--hidden").arg("--files").arg("a").stdout()
);
}
{
let mut cmd = dir.command();
cmd.arg("--hidden").arg("--files").arg("a/b").assert_err();
}
{
let mut cmd = dir.command();
eqnice!(
"./a/.ignore\n",
cmd.arg("--hidden").arg("--files").arg("./a").stdout()
);
}
{
let mut cmd = dir.command();
cmd.current_dir("a");
eqnice!(".ignore\n", cmd.arg("--hidden").arg("--files").stdout());
}
{
let mut cmd = dir.command();
cmd.current_dir("a/b");
cmd.arg("--hidden").arg("--files").assert_err();
}
{
let mut cmd = dir.command();
cmd.current_dir("./a");
eqnice!(".ignore\n", cmd.arg("--hidden").arg("--files").stdout());
}
});
// See: https://github.com/BurntSushi/ripgrep/issues/829
rgtest!(r829_original, |dir: Dir, _cmd: TestCommand| {
dir.create_dir("a/b");
dir.create(".ignore", "/a/b");
dir.create("a/b/test.txt", "Sample text");
{
let mut cmd = dir.command();
cmd.args(&["Sample"]).assert_err();
}
{
let mut cmd = dir.command();
cmd.args(&["Sample", "a"]).assert_err();
}
{
let mut cmd = dir.command();
cmd.current_dir("a");
cmd.args(&["Sample"]).assert_err();
}
});
// See: https://github.com/BurntSushi/ripgrep/issues/2731
rgtest!(r829_2731, |dir: Dir, _cmd: TestCommand| {
dir.create_dir("some_dir/build");
dir.create("some_dir/build/foo", "string");
dir.create(".ignore", "build/\n!/some_dir/build/");
{
let mut cmd = dir.command();
eqnice!("some_dir/build/foo\n", cmd.arg("-l").arg("string").stdout());
}
{
let mut cmd = dir.command();
eqnice!(
"some_dir/build/foo\n",
cmd.arg("-l").arg("string").arg("some_dir").stdout()
);
}
{
let mut cmd = dir.command();
eqnice!(
"./some_dir/build/foo\n",
cmd.arg("-l").arg("string").arg("./some_dir").stdout()
);
}
{
let mut cmd = dir.command();
eqnice!(
"some_dir/build/foo\n",
cmd.arg("-l").arg("string").arg("some_dir/build").stdout()
);
}
{
let mut cmd = dir.command();
eqnice!(
"./some_dir/build/foo\n",
cmd.arg("-l").arg("string").arg("./some_dir/build").stdout()
);
}
});
// See: https://github.com/BurntSushi/ripgrep/issues/2747
rgtest!(r829_2747, |dir: Dir, _cmd: TestCommand| {
dir.create_dir("a/c/b");
dir.create_dir("a/src/f/b");
dir.create("a/c/b/foo", "");
dir.create("a/src/f/b/foo", "");
dir.create(".ignore", "/a/*/b");
{
let mut cmd = dir.command();
eqnice!("a/src/f/b/foo\n", cmd.arg("--files").stdout());
}
{
let mut cmd = dir.command();
eqnice!("a/src/f/b/foo\n", cmd.arg("--files").arg("a/src").stdout());
}
{
let mut cmd = dir.command();
cmd.current_dir("a/src");
eqnice!("f/b/foo\n", cmd.arg("--files").stdout());
}
});
// See: https://github.com/BurntSushi/ripgrep/issues/2778
rgtest!(r829_2778, |dir: Dir, _cmd: TestCommand| {
dir.create_dir("parent/subdir");
dir.create(".ignore", "/parent/*.txt");
dir.create("parent/ignore-me.txt", "");
dir.create("parent/subdir/dont-ignore-me.txt", "");
{
let mut cmd = dir.command();
eqnice!(
"parent/subdir/dont-ignore-me.txt\n",
cmd.arg("--files").stdout()
);
}
{
let mut cmd = dir.command();
cmd.current_dir("parent");
eqnice!("subdir/dont-ignore-me.txt\n", cmd.arg("--files").stdout());
}
});
// See: https://github.com/BurntSushi/ripgrep/issues/2836
rgtest!(r829_2836, |dir: Dir, _cmd: TestCommand| {
dir.create_dir("testdir/sub/sub2");
dir.create(".ignore", "/testdir/sub/sub2/\n");
dir.create("testdir/sub/sub2/foo", "");
{
let mut cmd = dir.command();
cmd.arg("--files").assert_err();
}
{
let mut cmd = dir.command();
cmd.current_dir("testdir");
cmd.arg("--files").assert_err();
}
});
// See: https://github.com/BurntSushi/ripgrep/pull/2933
rgtest!(r829_2933, |dir: Dir, mut cmd: TestCommand| {
dir.create_dir("testdir/sub/sub2");
dir.create(".ignore", "/testdir/sub/sub2/");
dir.create("testdir/sub/sub2/testfile", "needle");
let args = &["--files-with-matches", "needle"];
cmd.current_dir("testdir");
cmd.args(args).assert_err();
});
// See: https://github.com/BurntSushi/ripgrep/issues/900
rgtest!(r900, |dir: Dir, mut cmd: TestCommand| {
dir.create("sherlock", SHERLOCK);
@@ -764,6 +955,43 @@ rgtest!(r1319, |dir: Dir, mut cmd: TestCommand| {
);
});
// See: https://github.com/BurntSushi/ripgrep/issues/1332
rgtest!(r1334_invert_empty_patterns, |dir: Dir, _cmd: TestCommand| {
dir.create("zero-patterns", "");
dir.create("one-pattern", "\n");
dir.create("haystack", "one\ntwo\nthree\n");
// zero patterns matches nothing
{
let mut cmd = dir.command();
cmd.arg("-f").arg("zero-patterns").arg("haystack").assert_err();
}
// one pattern that matches empty string matches everything
{
let mut cmd = dir.command();
eqnice!(
"one\ntwo\nthree\n",
cmd.arg("-f").arg("one-pattern").arg("haystack").stdout()
);
}
// inverting zero patterns matches everything
// (This is the regression. ripgrep used to match nothing because of an
// incorrect optimization.)
{
let mut cmd = dir.command();
eqnice!(
"one\ntwo\nthree\n",
cmd.arg("-vf").arg("zero-patterns").arg("haystack").stdout()
);
}
// inverting one pattern that matches empty string matches nothing
{
let mut cmd = dir.command();
cmd.arg("-vf").arg("one-pattern").arg("haystack").assert_err();
}
});
// See: https://github.com/BurntSushi/ripgrep/issues/1334
rgtest!(r1334_crazy_literals, |dir: Dir, mut cmd: TestCommand| {
dir.create("patterns", &"1.208.0.0/12\n".repeat(40));
@@ -965,6 +1193,15 @@ rgtest!(f1757, |dir: Dir, _: TestCommand| {
eqnice!("rust/source.rs\n", dir.command().args(args).stdout());
let args = &["--files-with-matches", "needle", "./rust"];
eqnice!("./rust/source.rs\n", dir.command().args(args).stdout());
dir.create_dir("rust1/target/onemore");
dir.create(".ignore", "rust1/target/onemore");
dir.create("rust1/source.rs", "needle");
dir.create("rust1/target/onemore/rustdoc-output.html", "needle");
let args = &["--files-with-matches", "needle", "rust1"];
eqnice!("rust1/source.rs\n", dir.command().args(args).stdout());
let args = &["--files-with-matches", "needle", "./rust1"];
eqnice!("./rust1/source.rs\n", dir.command().args(args).stdout());
});
// See: https://github.com/BurntSushi/ripgrep/issues/1765
@@ -1067,6 +1304,30 @@ rgtest!(r1891, |dir: Dir, mut cmd: TestCommand| {
eqnice!("1:\n2:\n2:\n2:\n", cmd.args(&["-won", "", "test"]).stdout());
});
// See: https://github.com/BurntSushi/ripgrep/issues/2094
rgtest!(r2094, |dir: Dir, mut cmd: TestCommand| {
dir.create("haystack", "a\nb\nc\na\nb\nc");
cmd.args(&[
"--no-line-number",
"--no-filename",
"--multiline",
"--max-count=1",
"--passthru",
"--replace=B",
"b",
"haystack",
]);
let expected = "\
a
B
c
a
b
c
";
eqnice!(expected, cmd.stdout());
});
// See: https://github.com/BurntSushi/ripgrep/issues/2095
rgtest!(r2095, |dir: Dir, mut cmd: TestCommand| {
dir.create(
@@ -1217,3 +1478,242 @@ rgtest!(r2658_null_data_line_regexp, |dir: Dir, mut cmd: TestCommand| {
let got = cmd.args(&["--null-data", "--line-regexp", r"bar"]).stdout();
eqnice!("haystack:bar\0", got);
});
// See: https://github.com/BurntSushi/ripgrep/issues/2770
rgtest!(r2770_gitignore_error, |dir: Dir, _cmd: TestCommand| {
dir.create(".git", "");
dir.create(".gitignore", "**/bar/*");
dir.create_dir("foo/bar");
dir.create("foo/bar/baz", "quux");
dir.command().arg("-l").arg("quux").assert_err();
dir.command().current_dir("foo").arg("-l").arg("quux").assert_err();
});
// See: https://github.com/BurntSushi/ripgrep/pull/2944
rgtest!(r2944_incorrect_bytes_searched, |dir: Dir, mut cmd: TestCommand| {
dir.create("haystack", "foo1\nfoo2\nfoo3\nfoo4\nfoo5\n");
let got = cmd.args(&["--stats", "-m2", "foo", "."]).stdout();
assert!(got.contains("10 bytes searched\n"));
});
// See: https://github.com/BurntSushi/ripgrep/issues/2990
#[cfg(unix)]
rgtest!(r2990_trip_over_trailing_dot, |dir: Dir, _cmd: TestCommand| {
dir.create_dir("asdf");
dir.create_dir("asdf.");
dir.create("asdf/foo", "");
dir.create("asdf./foo", "");
let got = dir.command().args(&["--files", "-g", "!asdf/"]).stdout();
eqnice!("asdf./foo\n", got);
// This used to ignore the glob given and included `asdf./foo` in output.
let got = dir.command().args(&["--files", "-g", "!asdf./"]).stdout();
eqnice!("asdf/foo\n", got);
});
// See: https://github.com/BurntSushi/ripgrep/issues/3067
rgtest!(r3067_gitignore_error, |dir: Dir, mut cmd: TestCommand| {
dir.create(".git", "");
dir.create(".gitignore", "foobar/debug");
dir.create_dir("foobar/some/debug");
dir.create_dir("foobar/debug");
dir.create("foobar/some/debug/flag", "baz");
dir.create("foobar/debug/flag2", "baz");
let got = cmd.arg("baz").stdout();
eqnice!("foobar/some/debug/flag:baz\n", got);
});
// See: https://github.com/BurntSushi/ripgrep/issues/3108
rgtest!(r3108_files_without_match_quiet_exit, |dir: Dir, _: TestCommand| {
dir.create("yes-match", "abc");
dir.create("non-match", "xyz");
dir.command().args(&["-q", "abc", "non-match"]).assert_exit_code(1);
dir.command().args(&["-q", "abc", "yes-match"]).assert_exit_code(0);
dir.command()
.args(&["--files-with-matches", "-q", "abc", "non-match"])
.assert_exit_code(1);
dir.command()
.args(&["--files-with-matches", "-q", "abc", "yes-match"])
.assert_exit_code(0);
dir.command()
.args(&["--files-without-match", "abc", "non-match"])
.assert_exit_code(0);
dir.command()
.args(&["--files-without-match", "abc", "yes-match"])
.assert_exit_code(1);
let got = dir
.command()
.args(&["--files-without-match", "abc", "non-match"])
.stdout();
eqnice!("non-match\n", got);
dir.command()
.args(&["--files-without-match", "-q", "abc", "non-match"])
.assert_exit_code(0);
dir.command()
.args(&["--files-without-match", "-q", "abc", "yes-match"])
.assert_exit_code(1);
let got = dir
.command()
.args(&["--files-without-match", "-q", "abc", "non-match"])
.stdout();
eqnice!("", got);
});
// See: https://github.com/BurntSushi/ripgrep/issues/3127
rgtest!(
r3127_gitignore_allow_unclosed_class,
|dir: Dir, mut cmd: TestCommand| {
dir.create_dir(".git");
dir.create(".gitignore", "[abc");
dir.create("[abc", "");
dir.create("test", "");
let got = cmd.args(&["--files"]).stdout();
eqnice!("test\n", got);
}
);
// See: https://github.com/BurntSushi/ripgrep/issues/3127
rgtest!(
r3127_glob_flag_not_allow_unclosed_class,
|dir: Dir, mut cmd: TestCommand| {
dir.create("[abc", "");
dir.create("test", "");
cmd.args(&["--files", "-g", "[abc"]).assert_err();
}
);
// See: https://github.com/BurntSushi/ripgrep/issues/3139
rgtest!(
r3139_multiline_lookahead_files_with_matches,
|dir: Dir, _cmd: TestCommand| {
// Only PCRE2 supports look-around.
if !dir.is_pcre2() {
return;
}
dir.create(
"test",
"\
Start \n \n\n \
XXXXXXXXXXXXXXXXXXXXXXXXXX\n \
YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY\n \
\n thing2 \n\n",
);
let got = dir
.command()
.args(&[
"--multiline",
"--pcre2",
r"(?s)Start(?=.*thing2)",
"test",
])
.stdout();
eqnice!("Start \n", got);
let got = dir
.command()
.args(&[
"--multiline",
"--pcre2",
"--files-with-matches",
r"(?s)Start(?=.*thing2)",
"test",
])
.stdout();
eqnice!("test\n", got);
}
);
// See: https://github.com/BurntSushi/ripgrep/issues/3173
rgtest!(r3173_hidden_whitelist_only_dot, |dir: Dir, _: TestCommand| {
dir.create_dir("subdir");
dir.create("subdir/.foo.txt", "text");
dir.create(".ignore", "!.foo.txt");
let cmd = || dir.command();
eqnice!(cmd().args(&["--files"]).stdout(), "subdir/.foo.txt\n");
eqnice!(cmd().args(&["--files", "."]).stdout(), "./subdir/.foo.txt\n");
eqnice!(cmd().args(&["--files", "./"]).stdout(), "./subdir/.foo.txt\n");
let cmd = || {
let mut cmd = dir.command();
cmd.current_dir("subdir");
cmd
};
eqnice!(cmd().args(&["--files"]).stdout(), ".foo.txt\n");
eqnice!(cmd().args(&["--files", "."]).stdout(), "./.foo.txt\n");
eqnice!(cmd().args(&["--files", "./"]).stdout(), "./.foo.txt\n");
});
// See: https://github.com/BurntSushi/ripgrep/issues/3179
rgtest!(r3179_global_gitignore_cwd, |dir: Dir, mut cmd: TestCommand| {
dir.create_dir("a/b/c");
dir.create("a/b/c/haystack", "");
dir.create(".test.gitignore", "/haystack");
// I'm not sure in which cases this can fail. If it
// does and it's unavoidable, feel free to submit a
// patch that skips this test when this canonicalization
// fails.
//
// The reason we canonicalize here is strange, and it is
// perhaps papering over a bug in ripgrep. But on macOS,
// `TMPDIR` is set to `/var/blah/blah`. However, `/var`
// is symlinked to `/private/var`. So the CWD detected by
// the process is `/private/var`. So it turns out that the
// CWD is not a proper prefix of `dir.path()` here. So we
// cheat around this by forcing our path to be canonicalized
// so it's `/private/var` everywhere.
//
// Arguably, ripgrep should still work here without
// canonicalization. But it's not actually quite clear
// to me how to do it. I *believe* the solution here is
// that gitignore matching should be relative to the directory
// path given to `WalkBuider::{add,new}`, and *not* to the
// CWD. But this is a very big change to how `ignore` works
// I think. At least conceptually. So that will need to be
// something we do when we rewrite `ignore`. Sigh.
//
// ... but, on Windows, path canonicalization seems to
// totally fuck things up, so skip it there. HEAVY sigh.
let dir_path = if cfg!(windows) {
dir.path().to_path_buf()
} else {
dir.path().canonicalize().unwrap()
};
let ignore_file_path = dir_path.join(".test.gitignore");
cmd.current_dir("a/b/c")
.arg("--files")
.arg("--ignore-file")
.arg(ignore_file_path.display().to_string())
// This is a key part of the reproduction. When just providing `.`
// to ignore's walker (as ripgrep does when a path to search isn't
// provided), then everything works as one expects. Because there's
// nothing to strip off of the paths being searched. But when one
// provides an absolute path, the stripping didn't work.
.arg(&dir_path)
.assert_err();
});
// See: https://github.com/BurntSushi/ripgrep/issues/3180
rgtest!(r3180_look_around_panic, |dir: Dir, mut cmd: TestCommand| {
dir.create("haystack", " b b b b b b b b\nc\n");
let got = cmd
.arg(r#"(^|[^a-z])((([a-z]+)?)\s)?b(\s([a-z]+)?)($|[^a-z])"#)
.arg("haystack")
.arg("-U")
.arg("-rx")
.stdout();
eqnice!("xbxbx\n", got);
});

View File

@@ -68,14 +68,14 @@ impl Dir {
/// does not need to be distinct for each invocation, but should correspond
/// to a logical grouping of tests.
pub fn new(name: &str) -> Dir {
let id = NEXT_ID.fetch_add(1, Ordering::SeqCst);
let id = NEXT_ID.fetch_add(1, Ordering::Relaxed);
let root = env::current_exe()
.unwrap()
.parent()
.expect("executable's directory")
.to_path_buf();
let dir =
env::temp_dir().join(TEST_DIR).join(name).join(&format!("{}", id));
env::temp_dir().join(TEST_DIR).join(name).join(&format!("{id}"));
if dir.exists() {
nice_err(&dir, fs::remove_dir_all(&dir));
}
@@ -273,11 +273,14 @@ impl TestCommand {
/// Set the working directory for this command.
///
/// The path given is interpreted relative to the directory that this
/// command was created for.
///
/// Note that this does not need to be called normally, since the creation
/// of this TestCommand causes its working directory to be set to the
/// test's directory automatically.
pub fn current_dir<P: AsRef<Path>>(&mut self, dir: P) -> &mut TestCommand {
self.cmd.current_dir(dir);
self.cmd.current_dir(self.dir.path().join(dir));
self
}