Compare commits

...

251 Commits

Author SHA1 Message Date
9d164f4b2c modified metadata for rgs
Some checks failed
ci / test (beta, ubuntu-latest, beta) (push) Has been cancelled
ci / test (macos, macos-latest, nightly) (push) Has been cancelled
ci / test (nightly, ubuntu-latest, nightly) (push) Has been cancelled
ci / test (pinned, ubuntu-latest, 1.85.0) (push) Has been cancelled
ci / test (stable, ubuntu-latest, stable) (push) Has been cancelled
ci / test (stable-aarch64, ubuntu-latest, stable, aarch64-unknown-linux-gnu) (push) Has been cancelled
ci / test (stable-arm-gnueabihf, ubuntu-latest, stable, armv7-unknown-linux-gnueabihf) (push) Has been cancelled
ci / test (stable-arm-musleabi, ubuntu-latest, stable, armv7-unknown-linux-musleabi) (push) Has been cancelled
ci / test (stable-arm-musleabihf, ubuntu-latest, stable, armv7-unknown-linux-musleabihf) (push) Has been cancelled
ci / test (stable-musl, ubuntu-latest, stable, x86_64-unknown-linux-musl) (push) Has been cancelled
ci / test (stable-powerpc64, ubuntu-latest, stable, powerpc64-unknown-linux-gnu) (push) Has been cancelled
ci / test (stable-riscv64, ubuntu-latest, stable, riscv64gc-unknown-linux-gnu) (push) Has been cancelled
ci / test (stable-s390x, ubuntu-latest, stable, s390x-unknown-linux-gnu) (push) Has been cancelled
ci / test (stable-x86, ubuntu-latest, stable, i686-unknown-linux-gnu) (push) Has been cancelled
ci / test (win-gnu, windows-latest, nightly-x86_64-gnu) (push) Has been cancelled
ci / test (win-msvc, windows-latest, nightly) (push) Has been cancelled
ci / test (winaarch64-msvc, windows-11-arm, nightly) (push) Has been cancelled
ci / wasm (push) Has been cancelled
ci / rustfmt (push) Has been cancelled
ci / docs (push) Has been cancelled
ci / Compile Fuzz Test Targets (push) Has been cancelled
2026-01-13 20:45:15 -05:00
b60b31706a Merge pull request 'rgs-dev done' (#1) from rgs-dev into master
Some checks failed
ci / test (beta, ubuntu-latest, beta) (push) Has been cancelled
ci / test (macos, macos-latest, nightly) (push) Has been cancelled
ci / test (nightly, ubuntu-latest, nightly) (push) Has been cancelled
ci / test (pinned, ubuntu-latest, 1.85.0) (push) Has been cancelled
ci / test (stable, ubuntu-latest, stable) (push) Has been cancelled
ci / test (stable-aarch64, ubuntu-latest, stable, aarch64-unknown-linux-gnu) (push) Has been cancelled
ci / test (stable-arm-gnueabihf, ubuntu-latest, stable, armv7-unknown-linux-gnueabihf) (push) Has been cancelled
ci / test (stable-arm-musleabi, ubuntu-latest, stable, armv7-unknown-linux-musleabi) (push) Has been cancelled
ci / test (stable-arm-musleabihf, ubuntu-latest, stable, armv7-unknown-linux-musleabihf) (push) Has been cancelled
ci / test (stable-musl, ubuntu-latest, stable, x86_64-unknown-linux-musl) (push) Has been cancelled
ci / test (stable-powerpc64, ubuntu-latest, stable, powerpc64-unknown-linux-gnu) (push) Has been cancelled
ci / test (stable-riscv64, ubuntu-latest, stable, riscv64gc-unknown-linux-gnu) (push) Has been cancelled
ci / test (stable-s390x, ubuntu-latest, stable, s390x-unknown-linux-gnu) (push) Has been cancelled
ci / test (stable-x86, ubuntu-latest, stable, i686-unknown-linux-gnu) (push) Has been cancelled
ci / test (win-gnu, windows-latest, nightly-x86_64-gnu) (push) Has been cancelled
ci / test (win-msvc, windows-latest, nightly) (push) Has been cancelled
ci / test (winaarch64-msvc, windows-11-arm, nightly) (push) Has been cancelled
ci / wasm (push) Has been cancelled
ci / rustfmt (push) Has been cancelled
ci / docs (push) Has been cancelled
ci / Compile Fuzz Test Targets (push) Has been cancelled
Reviewed-on: #1
2026-01-14 01:36:52 +00:00
0994661424 added docs and migrated name to rgs, migrated repo, added squash-lines feature
Some checks failed
ci / test (beta, ubuntu-latest, beta) (pull_request) Has been cancelled
ci / test (macos, macos-latest, nightly) (pull_request) Has been cancelled
ci / test (nightly, ubuntu-latest, nightly) (pull_request) Has been cancelled
ci / test (pinned, ubuntu-latest, 1.85.0) (pull_request) Has been cancelled
ci / test (stable, ubuntu-latest, stable) (pull_request) Has been cancelled
ci / test (stable-aarch64, ubuntu-latest, stable, aarch64-unknown-linux-gnu) (pull_request) Has been cancelled
ci / test (stable-arm-gnueabihf, ubuntu-latest, stable, armv7-unknown-linux-gnueabihf) (pull_request) Has been cancelled
ci / test (stable-arm-musleabi, ubuntu-latest, stable, armv7-unknown-linux-musleabi) (pull_request) Has been cancelled
ci / test (stable-arm-musleabihf, ubuntu-latest, stable, armv7-unknown-linux-musleabihf) (pull_request) Has been cancelled
ci / test (stable-musl, ubuntu-latest, stable, x86_64-unknown-linux-musl) (pull_request) Has been cancelled
ci / test (stable-powerpc64, ubuntu-latest, stable, powerpc64-unknown-linux-gnu) (pull_request) Has been cancelled
ci / test (stable-riscv64, ubuntu-latest, stable, riscv64gc-unknown-linux-gnu) (pull_request) Has been cancelled
ci / test (stable-s390x, ubuntu-latest, stable, s390x-unknown-linux-gnu) (pull_request) Has been cancelled
ci / test (stable-x86, ubuntu-latest, stable, i686-unknown-linux-gnu) (pull_request) Has been cancelled
ci / test (win-gnu, windows-latest, nightly-x86_64-gnu) (pull_request) Has been cancelled
ci / test (win-msvc, windows-latest, nightly) (pull_request) Has been cancelled
ci / test (winaarch64-msvc, windows-11-arm, nightly) (pull_request) Has been cancelled
ci / wasm (pull_request) Has been cancelled
ci / rustfmt (pull_request) Has been cancelled
ci / docs (pull_request) Has been cancelled
ci / Compile Fuzz Test Targets (pull_request) Has been cancelled
2026-01-13 20:35:39 -05:00
ad6ec1b4c5 rgs: added multiline window limit and in-file result indexing (work in progress) 2025-12-23 04:01:55 -05:00
xtqqczze
cd1f981bea fix: derive Default when possible
Some checks failed
ci / test (beta, ubuntu-latest, beta) (push) Has been cancelled
ci / test (macos, macos-latest, nightly) (push) Has been cancelled
ci / test (nightly, ubuntu-latest, nightly) (push) Has been cancelled
ci / test (pinned, ubuntu-latest, 1.85.0) (push) Has been cancelled
ci / test (stable, ubuntu-latest, stable) (push) Has been cancelled
ci / test (stable-aarch64, ubuntu-latest, stable, aarch64-unknown-linux-gnu) (push) Has been cancelled
ci / test (stable-arm-gnueabihf, ubuntu-latest, stable, armv7-unknown-linux-gnueabihf) (push) Has been cancelled
ci / test (stable-arm-musleabi, ubuntu-latest, stable, armv7-unknown-linux-musleabi) (push) Has been cancelled
ci / test (stable-arm-musleabihf, ubuntu-latest, stable, armv7-unknown-linux-musleabihf) (push) Has been cancelled
ci / test (stable-musl, ubuntu-latest, stable, x86_64-unknown-linux-musl) (push) Has been cancelled
ci / test (stable-powerpc64, ubuntu-latest, stable, powerpc64-unknown-linux-gnu) (push) Has been cancelled
ci / test (stable-riscv64, ubuntu-latest, stable, riscv64gc-unknown-linux-gnu) (push) Has been cancelled
ci / test (stable-s390x, ubuntu-latest, stable, s390x-unknown-linux-gnu) (push) Has been cancelled
ci / test (stable-x86, ubuntu-latest, stable, i686-unknown-linux-gnu) (push) Has been cancelled
ci / test (win-gnu, windows-latest, nightly-x86_64-gnu) (push) Has been cancelled
ci / test (win-msvc, windows-latest, nightly) (push) Has been cancelled
ci / test (winaarch64-msvc, windows-11-arm, nightly) (push) Has been cancelled
ci / wasm (push) Has been cancelled
ci / rustfmt (push) Has been cancelled
ci / docs (push) Has been cancelled
ci / Compile Fuzz Test Targets (push) Has been cancelled
Ref https://rust-lang.github.io/rust-clippy/master/index.html#/derivable_impls
2025-11-29 14:11:38 -05:00
Andrew Gallant
57c190d56e ignore-0.4.25 2025-10-30 13:30:14 -04:00
Ian McKellar
85edf4c796 ignore: only stat .jj if we actually care
I was comparing the work being done by fd and find and noticed (with
`strace -f -c -S` calls) that fd was doing a ton of failed `statx`
calls. Upon closer inspection it was stating `.jj` even though I
was passing `--no-ignore`. Eventually I turned up this check in
`Ignore::add_child_path` that was doing stat on `.jj` regardless of
whether the options request it.

With this patch it'll only stat `.jj` if that's relevant to the query.

PR #3212
2025-10-30 13:29:58 -04:00
Andrew Gallant
36b7597693 changelog: start next section 2025-10-22 09:02:40 -04:00
Andrew Gallant
a132e56b8c pkg/brew: update tap 2025-10-22 09:01:12 -04:00
Andrew Gallant
af60c2de9d 15.1.0
Some checks failed
release / create-release (push) Has been cancelled
release / build-release (linux, ubuntu-latest, nightly, x86_64-linux-musl-strip, x86_64-unknown-linux-musl) (push) Has been cancelled
release / build-release (macos, macos-latest, nightly, aarch64-apple-darwin) (push) Has been cancelled
release / build-release (macos, macos-latest, nightly, x86_64-apple-darwin) (push) Has been cancelled
release / build-release (stable-aarch64, ubuntu-latest, qemu-aarch64, stable, aarch64-linux-gnu-strip, aarch64-unknown-linux-gnu) (push) Has been cancelled
release / build-release (stable-arm-gnueabihf, ubuntu-latest, qemu-arm, stable, arm-linux-gnueabihf-strip, armv7-unknown-linux-gnueabihf) (push) Has been cancelled
release / build-release (stable-arm-musleabi, ubuntu-latest, qemu-arm, stable, arm-linux-musleabi-strip, armv7-unknown-linux-musleabi) (push) Has been cancelled
release / build-release (stable-arm-musleabihf, ubuntu-latest, qemu-arm, stable, arm-linux-musleabihf-strip, armv7-unknown-linux-musleabihf) (push) Has been cancelled
release / build-release (stable-s390x, ubuntu-latest, qemu-s390x, stable, s390x-linux-gnu-strip, s390x-unknown-linux-gnu) (push) Has been cancelled
release / build-release (stable-x86, ubuntu-latest, i386, stable, x86_64-linux-gnu-strip, i686-unknown-linux-gnu) (push) Has been cancelled
release / build-release (win-gnu, windows-latest, nightly-x86_64-gnu, x86_64-pc-windows-gnu) (push) Has been cancelled
release / build-release (win-msvc, windows-latest, nightly, x86_64-pc-windows-msvc) (push) Has been cancelled
release / build-release (win32-msvc, windows-latest, nightly, i686-pc-windows-msvc) (push) Has been cancelled
release / build-release (winaarch64-msvc, windows-11-arm, nightly, aarch64-pc-windows-msvc) (push) Has been cancelled
release / build-release-deb (push) Has been cancelled
2025-10-22 08:30:04 -04:00
Andrew Gallant
a63671efb0 deps: bump to grep 0.4.1 2025-10-22 08:29:19 -04:00
Andrew Gallant
2ea06d69aa grep-0.4.1 2025-10-22 08:28:53 -04:00
Andrew Gallant
85006b08d6 deps: bump to grep-printer 0.3.1 2025-10-22 08:28:32 -04:00
Andrew Gallant
423afb8513 grep-printer-0.3.1 2025-10-22 08:28:06 -04:00
Andrew Gallant
4694800be5 deps: bump to grep-searcher 0.1.16 2025-10-22 08:26:22 -04:00
Andrew Gallant
86e0ab12ef grep-searcher-0.1.16 2025-10-22 08:25:01 -04:00
Andrew Gallant
7189950799 deps: bump to globset 0.4.18 2025-10-22 08:24:51 -04:00
Andrew Gallant
0b0e013f5a globset-0.4.18 2025-10-22 08:23:57 -04:00
Andrew Gallant
cac9870a02 doc: update date in man page template 2025-10-22 08:23:05 -04:00
Andrew Gallant
bee13375ed deps: update everything 2025-10-22 08:21:56 -04:00
Andrew Gallant
f5be160839 changelog: 15.1.0 2025-10-22 08:21:34 -04:00
Jorge Gomez
24e88dc15b ignore/types: add ssa type
This PR adds support for [.ssa](https://en.wikipedia.org/wiki/Static_single-assignment_form) files as read by [qbe](https://c9x.me/compile/):

See: https://c9x.me/compile/doc/il.html#Input-Files
2025-10-22 08:18:30 -04:00
Andrew Gallant
5748f81bb1 printer: use doc_cfg instead of doc_auto_cfg
Fixes #3202
2025-10-22 07:47:07 -04:00
Andrew Gallant
d47663b1b4 searcher: fix regression with --line-buffered flag
In my fix for #3184, I actually had two fixes. One was a tweak to how we
read data and the other was a tweak to how we determined how much of the
buffer we needed to keep around. It turns out that fixing #3184 only
required the latter fix, found in commit
d4b77a8d89. The former fix also helped the
specific case of #3184, but it ended up regressing `--line-buffered`.

Specifically, previous to 8c6595c215 (the
first fix), we would do one `read` syscall. This call might not fill our
caller provided buffer. And in particular, `stdin` seemed to fill fewer
bytes than reading from a file. So the "fix" was to put `read` in a loop
and keep calling it until the caller provided buffer was full or until
the stream was exhausted. This helped alleviate #3184 by amortizing
`read` syscalls better.

But of course, in retrospect, this change is clearly contrary to how
`--line-buffered` works. We specifically do _not_ want to wait around
until the buffer is full. We want to read what we can, search it and
move on.

So this reverts the first fix but leaves the second, which still
keeps #3184 fixed and also fixes #3194 (the regression).

This reverts commit 8c6595c215.

Fixes #3194
2025-10-19 11:06:39 -04:00
Enoch
38d630261a printer: add Cursor hyperlink alias
This is similar to the other aliases used by
VS Code forks.

PR #3192
2025-10-17 14:59:17 -04:00
Andrew Gallant
b3dc4b0998 globset: improve debug log
This shows the regex that the glob was compiled to.
2025-10-17 10:27:19 -04:00
Andrew Gallant
f09b55b8e7 changelog: start next section 2025-10-15 23:32:00 -04:00
Andrew Gallant
0551c6b931 pkg/brew: update tap 2025-10-15 23:31:35 -04:00
Andrew Gallant
3a612f88b8 15.0.0
Some checks failed
release / create-release (push) Has been cancelled
release / build-release (linux, ubuntu-latest, nightly, x86_64-linux-musl-strip, x86_64-unknown-linux-musl) (push) Has been cancelled
release / build-release (macos, macos-latest, nightly, aarch64-apple-darwin) (push) Has been cancelled
release / build-release (macos, macos-latest, nightly, x86_64-apple-darwin) (push) Has been cancelled
release / build-release (stable-aarch64, ubuntu-latest, qemu-aarch64, stable, aarch64-linux-gnu-strip, aarch64-unknown-linux-gnu) (push) Has been cancelled
release / build-release (stable-arm-gnueabihf, ubuntu-latest, qemu-arm, stable, arm-linux-gnueabihf-strip, armv7-unknown-linux-gnueabihf) (push) Has been cancelled
release / build-release (stable-arm-musleabi, ubuntu-latest, qemu-arm, stable, arm-linux-musleabi-strip, armv7-unknown-linux-musleabi) (push) Has been cancelled
release / build-release (stable-arm-musleabihf, ubuntu-latest, qemu-arm, stable, arm-linux-musleabihf-strip, armv7-unknown-linux-musleabihf) (push) Has been cancelled
release / build-release (stable-s390x, ubuntu-latest, qemu-s390x, stable, s390x-linux-gnu-strip, s390x-unknown-linux-gnu) (push) Has been cancelled
release / build-release (stable-x86, ubuntu-latest, i386, stable, x86_64-linux-gnu-strip, i686-unknown-linux-gnu) (push) Has been cancelled
release / build-release (win-gnu, windows-latest, nightly-x86_64-gnu, x86_64-pc-windows-gnu) (push) Has been cancelled
release / build-release (win-msvc, windows-latest, nightly, x86_64-pc-windows-msvc) (push) Has been cancelled
release / build-release (win32-msvc, windows-latest, nightly, i686-pc-windows-msvc) (push) Has been cancelled
release / build-release (winaarch64-msvc, windows-11-arm, nightly, aarch64-pc-windows-msvc) (push) Has been cancelled
release / build-release-deb (push) Has been cancelled
2025-10-15 23:07:50 -04:00
Andrew Gallant
ca2e34f37c grep-0.4.0 2025-10-15 23:06:34 -04:00
Andrew Gallant
a6092beee4 deps: bump to grep-printer 0.3.0 2025-10-15 23:05:10 -04:00
Andrew Gallant
a0d61a063f grep-printer-0.3.0 2025-10-15 23:04:24 -04:00
Andrew Gallant
c22fc0f13c deps: bump to grep-searcher 0.1.15 2025-10-15 23:02:59 -04:00
Andrew Gallant
087f82273d grep-searcher-0.1.15 2025-10-15 23:02:33 -04:00
Andrew Gallant
a3a30896be deps: bump to grep-pcre2 0.1.9 2025-10-15 23:01:31 -04:00
Andrew Gallant
7397ab7d97 grep-pcre2-0.1.9 2025-10-15 23:01:07 -04:00
Andrew Gallant
cf1dab0d5a deps: bump to grep-regex 0.1.14 2025-10-15 23:00:58 -04:00
Andrew Gallant
e523c6bf32 grep-regex-0.1.14 2025-10-15 23:00:22 -04:00
Andrew Gallant
720376ead6 deps: bump to grep-matcher 0.1.8 2025-10-15 23:00:12 -04:00
Andrew Gallant
a5ba50ceaf grep-matcher-0.1.8 2025-10-15 22:59:35 -04:00
Andrew Gallant
a766f79710 deps: bump to grep-cli 0.1.12 2025-10-15 22:59:17 -04:00
Andrew Gallant
4aafe45760 grep-cli-0.1.12 2025-10-15 22:58:42 -04:00
Andrew Gallant
c03e49b8c5 deps: bump to ignore 0.4.24 2025-10-15 22:58:35 -04:00
Andrew Gallant
70ae7354e1 ignore-0.4.24 2025-10-15 22:57:50 -04:00
Andrew Gallant
19c2a6e0d9 deps: bump to globset 0.4.17 2025-10-15 22:57:28 -04:00
Andrew Gallant
064b36b115 globset-0.4.17 2025-10-15 22:55:55 -04:00
Andrew Gallant
365384a5c1 doc: move CHANGELOG update before dependency updates
It seems better to write this first. Especially so it gets included into
crate publishes.
2025-10-15 22:54:51 -04:00
Andrew Gallant
72a5291b4e doc: update date in man page template 2025-10-15 22:54:11 -04:00
Andrew Gallant
62e676843a deps: update everything 2025-10-15 22:53:30 -04:00
Andrew Gallant
3780168c13 changelog: 15.0.0 2025-10-15 22:53:30 -04:00
Andrew Gallant
4c953731c4 release: finally switch to LTO for release binaries
There seems to be a modest improvement on some workloads:

```
$ time rg -co '\w+' sixteenth.txt
158520346

real    8.457
user    8.426
sys     0.020
maxmem  779 MB
faults  0

$ time rg-lto -co '\w+' sixteenth.txt
158520346

real    8.200
user    8.178
sys     0.012
maxmem  778 MB
faults  0
```

I've somewhat reversed course on my previous thoughts here. The
improvement isn't much, but the hit to compile times in CI isn't
terrible. Mostly I'm doing this out of "good sense," and I think it's
generally unlikely to make it more difficult for me to diagnose
performance problems. (Since I still use the default `release` profile
locally, since it's about an order of magnitude quicker to compile.)

Ref #325, Ref #413, Ref #1187, Ref #1255
2025-10-15 22:51:41 -04:00
Andrew Gallant
79d393a302 release: remove riscv64 and powerpc64 artifacts
Their CI workflows broke for different reasons.

I perceive these as niche platforms that aren't worth blocking
a release on. And not worth my time investigating CI problems.
2025-10-15 22:42:51 -04:00
Andrew Gallant
85eaf95833 ci: testing release 2025-10-15 22:41:46 -04:00
Andrew Gallant
63209ae0b9 printer: fix --stats for --json
Somehow, the JSON printer seems to have never emitted correct summary
statistics. And I believe #3178 is the first time anyone has ever
reported it. I believe this bug has persisted for years. That's
surprising.

Anyway, the problem here was that we were bailing out of `finish()` on
the sink if we weren't supposed to print anything. But we bailed out
before we tallied our summary statistics. Obviously we shouldn't do
that.

Fixes #3178
2025-10-15 21:21:20 -04:00
Andrew Gallant
b610d1cb15 ignore: fix global gitignore bug that arises with absolute paths
The `ignore` crate currently handles two different kinds of "global"
gitignore files: gitignores from `~/.gitconfig`'s `core.excludesFile`
and gitignores passed in via `WalkBuilder::add_ignore` (corresponding to
ripgrep's `--ignore-file` flag).

In contrast to any other kind of gitignore file, these gitignore files
should have their patterns interpreted relative to the current working
directory. (Arguably there are other choices we could make here, e.g.,
based on the paths given. But the `ignore` infrastructure can't handle
that, and it's not clearly correct to me.) Normally, a gitignore file
has its patterns interpreted relative to where the gitignore file is.
This relative interpretation matters for patterns like `/foo`, which are
anchored to _some_ directory.

Previously, we would generally get the global gitignores correct because
it's most common to use ripgrep without providing a path. Thus, it
searches the current working directory. In this case, no stripping of
the paths is needed in order for the gitignore patterns to be applied
directly.

But if one provides an absolute path (or something else) to ripgrep to
search, the paths aren't stripped correctly. Indeed, in the core, I had
just given up and not provided a "root" path to these global gitignores.
So it had no hope of getting this correct.

We fix this assigning the CWD to the `Gitignore` values created from
global gitignore files. This was a painful thing to do because we'd
ideally:

1. Call `std::env::current_dir()` at most once for each traversal.
2. Provide a way to avoid the library calling `std::env::current_dir()`
   at all. (Since this is global process state and folks might want to
   set it to different values for $reasons.)

The `ignore` crate's internals are a total mess. But I think I've
addressed the above 2 points in a semver compatible manner.

Fixes #3179
2025-10-15 19:44:23 -04:00
Luke Hannan
9ec08522be ignore/types: add lowercase R extensions
PR #3186
2025-10-14 15:15:07 -04:00
Andrew Gallant
d4b77a8d89 searcher: fix a performance bug with -A/--after-context
Previously (with the previous commit):

```
$ cat bigger.txt | (time rg ZQZQZQZQZQ -A999) | wc -l

real    2.321
user    0.674
sys     0.735
maxmem  30 MB
faults  0
1000

$ cat bigger.txt | (time rg ZQZQZQZQZQ -A9999) | wc -l

real    2.513
user    0.823
sys     0.686
maxmem  30 MB
faults  0
10000

$ cat bigger.txt | (time rg ZQZQZQZQZQ -A99999) | wc -l

real    5.067
user    3.254
sys     0.676
maxmem  30 MB
faults  0
100000

$ cat bigger.txt | (time rg ZQZQZQZQZQ -A999999) | wc -l

real    6.658
user    4.841
sys     0.778
maxmem  51 MB
faults  0
1000000
```

Now with this commit:

```
$ cat bigger.txt | (time rg ZQZQZQZQZQ -A999) | wc -l

real    1.845
user    0.328
sys     0.757
maxmem  30 MB
faults  0
1000

$ cat bigger.txt | (time rg ZQZQZQZQZQ -A9999) | wc -l

real    1.917
user    0.334
sys     0.771
maxmem  30 MB
faults  0
10000

$ cat bigger.txt | (time rg ZQZQZQZQZQ -A99999) | wc -l

real    1.972
user    0.319
sys     0.812
maxmem  30 MB
faults  0
100000

$ cat bigger.txt | (time rg ZQZQZQZQZQ -A999999) | wc -l

real    2.005
user    0.333
sys     0.855
maxmem  30 MB
faults  0
1000000
```

And compare to GNU grep:

```
$ cat bigger.txt | (time grep ZQZQZQZQZQ -A999) | wc -l

real    1.488
user    0.143
sys     0.866
maxmem  30 MB
faults  0
1000

$ cat bigger.txt | (time grep ZQZQZQZQZQ -A9999) | wc -l

real    1.697
user    0.170
sys     0.986
maxmem  30 MB
faults  1
10000

$ cat bigger.txt | (time grep ZQZQZQZQZQ -A99999) | wc -l

real    1.515
user    0.166
sys     0.856
maxmem  29 MB
faults  0
100000

$ cat bigger.txt | (time grep ZQZQZQZQZQ -A999999) | wc -l

real    1.490
user    0.174
sys     0.851
maxmem  30 MB
faults  0
1000000
```

Interestingly, GNU grep is still a bit faster. But both commands remain
roughly invariant in search time as `-A` is increased.

There is definitely something "odd" about searching `stdin`, where it
seems substantially slower. We can also observe with GNU grep:

```
$ (time grep ZQZQZQZQZQ -A999999 bigger.txt) | wc -l

real    0.692
user    0.184
sys     0.506
maxmem  30 MB
faults  0
1000000

$ cat bigger.txt | (time grep ZQZQZQZQZQ -A999999) | wc -l

real    1.700
user    0.201
sys     0.954
maxmem  30 MB
faults  0
1000000

$ (time rg ZQZQZQZQZQ -A999999 bigger.txt) | wc -l

real    0.640
user    0.428
sys     0.209
maxmem  7734 MB
faults  0
1000000

$ (time rg ZQZQZQZQZQ --no-mmap -A999999 bigger.txt) | wc -l

real    0.866
user    0.282
sys     0.581
maxmem  30 MB
faults  0
1000000

$ cat bigger.txt | (time rg ZQZQZQZQZQ -A999999) | wc -l

real    1.991
user    0.338
sys     0.819
maxmem  30 MB
faults  0
1000000
```

I wonder if this is related to my discovery in the previous commit where
`read` calls on `stdin` seem to never return anything more than ~64K. Oh
well, I'm satisfied at this point, especially given that GNU grep seems
to do a lot worse than ripgrep with bigger values of
`-B/--before-context`:

```
$ cat bigger.txt | (time grep ZQZQZQZQZQ -B9) | wc -l

real    1.568
user    0.170
sys     0.885
maxmem  30 MB
faults  0
1

$ cat bigger.txt | (time grep ZQZQZQZQZQ -B99) | wc -l

real    1.734
user    0.338
sys     0.879
maxmem  30 MB
faults  0
1

$ cat bigger.txt | (time grep ZQZQZQZQZQ -B999) | wc -l

real    2.349
user    1.723
sys     0.620
maxmem  30 MB
faults  0
1

$ cat bigger.txt | (time grep ZQZQZQZQZQ -B9999) | wc -l

real    16.459
user    15.848
sys     0.586
maxmem  30 MB
faults  0
1

$ time grep ZQZQZQZQZQ -B99999 bigger.txt
ZQZQZQZQZQ

real    1:45.06
user    1:44.12
sys     0.772
maxmem  30 MB
faults  0
```

The above pattern occurs regardless of whether you put `bigger.txt` on
stdin or whether you search it directly.

And now ripgrep:

```
$ cat bigger.txt | (time rg ZQZQZQZQZQ -B9) | wc -l

real    1.965
user    0.326
sys     0.814
maxmem  29 MB
faults  0
1

$ cat bigger.txt | (time rg ZQZQZQZQZQ -B99) | wc -l

real    1.941
user    0.423
sys     0.813
maxmem  29 MB
faults  0
1

$ cat bigger.txt | (time rg ZQZQZQZQZQ -B999) | wc -l

real    2.372
user    0.759
sys     0.703
maxmem  30 MB
faults  0
1

$ cat bigger.txt | (time rg ZQZQZQZQZQ -B9999) | wc -l

real    2.638
user    0.895
sys     0.665
maxmem  29 MB
faults  0
1

$ cat bigger.txt | (time rg ZQZQZQZQZQ -B99999) | wc -l

real    5.172
user    3.282
sys     0.748
maxmem  29 MB
faults  0
1
```

NOTE: To get `bigger.txt`:

```
$ curl -LO 'https://burntsushi.net/stuff/opensubtitles/2018/en/sixteenth.txt.gz'
$ gzip -d sixteenth.txt.gz
$ (echo ZQZQZQZQZQ && for ((i=0;i<10;i++)); do cat sixteenth.txt; done) > bigger.txt
```
2025-10-14 14:27:43 -04:00
Andrew Gallant
8c6595c215 searcher: fix performance bug with -A/--after-context when searching stdin
This was a crazy subtle bug where ripgrep could slow down exponentially
as increasingly larger values of `-A/--after-context` were used. But,
interestingly, this would only occur when searching `stdin` and _not_
when searching the same data as a regular file.

This confounded me because ripgrep, pretty early on, erases the
difference between searching a single file and `stdin`. So it wasn't
like there were different code paths. And I mistakenly assumed that they
would otherwise behave the same as they are just treated as streams.

But... it turns out that running `read` on a `stdin` versus a regular
file seems to behave differently. At least on my Linux system, with
`stdin`, `read` never seems to fill the buffer with more than 64K. But
with a regular file, `read` pretty reliably fills the caller's buffer
with as much space as declared.

Of course, it is expected that `read` doesn't *have* to fill up the
caller's buffer, and ripgrep is generally fine with that. But when
`-A/--after-context` is used with a very large value---big enough that
the default buffer capacity is too small---then more heap memory needs
to be allocated to correctly handle all cases. This can result in
passing buffers bigger than 64K to `read`.

While we *correctly* handle `read` calls that don't fill the buffer,
it turns out that if we don't fill the buffer, then we get into a
pathological case where we aren't processing as many bytes as we could.
That is, because of the `-A/--after-context` causing us to keep a lot of
bytes around while we roll the buffer and because reading from `stdin`
gives us fewer bytes than normal, we weren't amortizing our `read` calls
as well as we should have been. Indeed, our buffer capacity increases
specifically take this amortization into account, but we weren't taking
advantage of it.

We fix this by putting `read` into an inner loop that ensures our
buffer gets filled up. This fixes the performance bug:

```
$ (time rg ZQZQZQZQZQ bigger.txt --no-mmap -A9999) | wc -l

real    1.330
user    0.767
sys     0.559
maxmem  29 MB
faults  0
10000

$ cat bigger.txt | (time rg ZQZQZQZQZQ --no-mmap -A9999) | wc -l

real    2.355
user    0.860
sys     0.613
maxmem  29 MB
faults  0
10000

$ (time rg ZQZQZQZQZQ bigger.txt --no-mmap -A99999) | wc -l

real    3.636
user    3.091
sys     0.537
maxmem  29 MB
faults  0
100000

$ cat bigger.txt | (time rg ZQZQZQZQZQ --no-mmap -A99999) | wc -l

real    4.918
user    3.236
sys     0.710
maxmem  29 MB
faults  0
100000

$ (time rg ZQZQZQZQZQ bigger.txt --no-mmap -A999999) | wc -l

real    5.430
user    4.666
sys     0.750
maxmem  51 MB
faults  0
1000000

$ cat bigger.txt | (time rg ZQZQZQZQZQ --no-mmap -A999999) | wc -l

real    6.894
user    4.907
sys     0.850
maxmem  51 MB
faults  0
1000000
```

For comparison, here is GNU grep:

```
$ cat bigger.txt | (time grep ZQZQZQZQZQ -A9999) | wc -l

real    1.466
user    0.159
sys     0.839
maxmem  29 MB
faults  0
10000

$ cat bigger.txt | (time grep ZQZQZQZQZQ -A99999) | wc -l

real    1.663
user    0.166
sys     0.941
maxmem  29 MB
faults  0
100000

$ cat bigger.txt | (time grep ZQZQZQZQZQ -A999999) | wc -l

real    1.631
user    0.204
sys     0.910
maxmem  29 MB
faults  0
1000000
```

GNU grep is still notably faster. We'll fix that in the next commit.

Fixes #3184
2025-10-14 14:27:43 -04:00
Andrew Gallant
de2567a4c7 printer: fix panic in replacements in look-around corner case
The abstraction boundary fuck up is the gift that keeps on giving. It
turns out that the invariant that the match would never exceed the range
given is not always true. So we kludge around it.

Also, update the CHANGELOG to include the fix for #2111.

Fixes #3180
2025-10-12 17:25:19 -04:00
Andrew Gallant
916415857f core: don't build decompression reader unless we intend to use it
Building it can consume resources. In particular, on Windows, the
various binaries are eagerly resolved.

I think this originally wasn't done. The eager resolution was added
later for security purposes. But the "eager" part isn't actually
necessary.

It would probably be better to change the decompression reader to do
lazy resolution only when the binary is needed. But this will at least
avoid doing anything when the `-z/--search-zip` flag isn't used. But
when it is, ripgrep will still eagerly resolve all possible binaries.

Fixes #2111
2025-10-12 16:31:20 -04:00
Andrew Gallant
5c42c8c48f test: add regression test for fixed bug
It turns out that #2094 was fixed in my `--max-count` refactor a few
commits back. This commit adds a regression test for it.

Closes #2094
2025-10-12 12:45:34 -04:00
Andrew Gallant
f0faa91c68 doc: clarify --ignore-file precedence
Fixes #2777
2025-10-10 22:06:59 -04:00
Andrew Gallant
a5d9e03c68 test: attempt to fix flaky time-reliant test
Fixes #2794
2025-10-10 22:06:59 -04:00
Andrew Gallant
924ba101ee test: fix Command::current_dir API
Every single call site wants to pass a path relative to the directory
the command was created for. So just make it do that automatically,
similar to `Dir::create` and friends.
2025-10-10 22:06:59 -04:00
Andrew Gallant
293ef80eaf test: add another regression test for gitignore matching bug
I believe this was also fixed by #2933.

Closes #2770
2025-10-10 22:06:59 -04:00
Andrew Gallant
fa80aab6b0 test: add regression test for fixed gitignore bug
I believe this was actually fixed by #2933.

Closes #3067
2025-10-10 22:06:59 -04:00
mariano-m13
7c2161d687 release: add binaries for riscv64gc-unknown-linux-gnu target
Note that we skip lz4/brotli/zstd tests on RISC-V.

The CI runs RISC-V tests using cross/QEMU emulation. The decompression
tools (lz4, brotli, zstd) are x86_64 binaries on the host that cannot
execute in the RISC-V QEMU environment.

Skip these three tests at compile-time on RISC-V to avoid test failures.
The -z/--search-zip functionality itself works correctly on real RISC-V
hardware where native decompression tools are available.

PR #3165
2025-10-10 20:50:28 -04:00
Andrew Gallant
096f79ab98 deps: update everything
This includes an update to `regex 1.12.1`, which fixes a couple of
outstanding bugs in ripgrep.

Fixes #2750, Fixes #3135
2025-10-10 20:13:29 -04:00
Andrew Gallant
0407e104f6 ignore: fix problem with searching whitelisted hidden files
... specifically, when the whitelist comes from a _parent_ gitignore
file.

Our handling of parent gitignores is pretty ham-fisted and has been a
source of some unfortunate bugs. The problem is that we need to strip
the parent path from the path we're searching in order to correctly
apply the globs. But getting this stripping correct seems to be a subtle
affair.

Fixes #3173
2025-10-08 21:16:59 -04:00
Andrew Gallant
bb88a1ac45 deps: semver compatible updated to dependencies 2025-10-05 10:52:46 -04:00
Alvaro Parker
2924d0c4c0 ignore: add min_depth option
This mimics the eponymous option in `walkdir`.

Closes #3158, PR #3162
2025-10-05 10:05:26 -04:00
Andrew Gallant
9d8016d10c printer: finish removal of max_matches
This finishes what I started in commit
a6e0be3c90.
Specifically, the `max_matches` configuration has been moved to the
`grep-searcher` crate and *removed* from the `grep-printer` crate. The
commit message has the details for why we're doing this, but the short
story is to fix #3076.

Note that this is a breaking change for `grep-printer`, so this will
require a semver incompatible release.
2025-10-04 09:19:53 -04:00
Andrew Gallant
9802945e63 doc: update the CentOS, RHEL and Rocky Linux installation instructions
I've split the previously singular "CentOS/RHEL/Rocky" section into 3
sections. They each benefit from having their own steps.

I've also copied steps from [EPEL Getting Started] documentation,
including steps that don't seem to be required because it seems to be
best practice (although I do not understand it). Notably, this is not
required for CentOS Stream:

```
dnf config-manager --set-enabled crb
```

And this is not required for Red Hat:

```
subscription-manager repos --enable codeready-builder-for-rhel-10-$(arch)-rpms
```

And neither are available on Rocky Linux 10. Hence, all 3 have slightly
different instructions.

It has been suggested (see [here][suggest1] and [here][suggest2]) that
the installation instructions should just link to the [EPEL Getting
Started] documentation and just contain this step:

```
sudo dnf install ripgrep
```

However, this is not sufficient to actually install ripgrep from a
base installation of these Linux distributions. I tested this via the
`dokken/centos-stream-10:sha-d1e294f`, `rockylinux/rockylinux:10` and
`redhat/ubi10` Docker images on DockerHub.

While this does mean ripgrep's installation instructions can become out
of sync from upstream, this is *always* a risk regardless of platform.
The instructions are provided on a best effort basis and generally
should work on the latest release of said platform. If the instructions
result in unhelpful errors (like `dnf install ripgrep` does if you
don't enable EPEL), then that isn't being maximally helpful to users.
I'd rather attempt to give the entire set of instructions and risk
being out of sync.

Also, since the installation instructions include URLs with version
numbers in them, I made the section names include version numbers as
well.

Note: I found using the `dokken/centos-stream-10:sha-d1e294f` Docker
image to be somewhat odd, as I could not find any official CentOS
Docker images. [This][DockerHub-CentOS] is still the first hit on
Google, but all of its tags have been deleted and the image is
deprecated. I was profoundly confused by this given that the [EPEL
Getting Started] documentation *specifically* cites CentOS 10. In fact,
it is citing CentOS *Stream* 10, which is something wholly distinct
from CentOS. What an absolute **clusterfuck**. If I had just read this
paragraph on Wikipedia from the beginning, I would have saved myself a
lot of confusion:

> In December 2020, Red Hat unilaterally terminated CentOS development in favor
> of CentOS Stream 9, a distribution positioned upstream of RHEL. In March
> 2021, CloudLinux (makers of CloudLinux OS) released a RHEL derivative called
> AlmaLinux. Later in May 2021, one of the CentOS founders (Gregory Kurtzer)
> created the competing Rocky Linux project as a successor to the original
> mission of CentOS.

Ref #2981, Ref #2924

[EPEL Getting Started]: https://docs.fedoraproject.org/en-US/epel/getting-started/
[suggest1]: https://github.com/BurntSushi/ripgrep/pull/2981#issuecomment-3204114293
[suggest2]: https://github.com/BurntSushi/ripgrep/issues/2924#issuecomment-3326357254
[DockerHub-CentOS]: https://hub.docker.com/_/centos
2025-09-24 10:02:46 -04:00
Andrew Gallant
fdea9723ca doc: clarify a case where -m/--max-count is not strictly respected
In #2843, it's requested that these trailing contextual lines should be
displayed as non-matching because they exceed the limit. While
reasonable, I think that:

1. This would be a weird complication to the implementation.
2. This would overall be less intuitive and more complex. Today, there
   is never a case where ripgrep emits a matching line in a way where
   the match isn't highlighted.

Closes #2843
2025-09-22 22:12:15 -04:00
Andrew Gallant
c45ec16360 doc: clarify --multiline --count
Specifically, it is only equivalent to `--count-matches` when the
pattern(s) given can match over multiple lines.

We could have instead made `--multiline --count` always equivalent to
`--multiline --count-matches`, but this seems plausibly less useful.
Indeed, I think it's generally a good thing that users can enable
`-U/--multiline` but still use patterns that only match a single line.
Changing how that behaves would I think be more surprising.

Either way we slice this, it's unfortunately pretty subtle.

Fixes #2852
2025-09-22 22:00:15 -04:00
Andrew Gallant
e42432cc5d ignore: clarify WalkBuilder::filter_entry
Fixes #2913
2025-09-22 21:49:29 -04:00
Andrew Gallant
6e77339f30 cli: tweak docs for resolve_binary
Fixes #2928
2025-09-22 21:38:08 -04:00
Andrew Gallant
1b07c6616a cli: document that -c/--count can be inconsistent with -l/--files-with-matches
This is unfortunate, but is a known bug that I don't think can be fixed
without either making `-l/--files-with-matches` much slower or changing
what "binary filtering" means by default.

In this PR, we document this inconsistency since users may find it quite
surprising. The actual work-around is to disable binary filtering with
the `--binary` flag.

We add a test confirming this behavior.

Closes #3131
2025-09-22 20:24:53 -04:00
Andrew Gallant
c1fc6a5eb8 release: build aarch64 artifacts for macos on GitHub Actions
GitHub now supports this natively, so there's no need for me to do it
any more.

Fixes #3155
2025-09-22 11:56:33 -04:00
Andrew Gallant
8b5d3d1c1e printer: hack in a fix for -l/--files-with-matches when using --pcre2 --multiline with look-around
The underlying issue here is #2528, which was introduced by commit
efd9cfb2fc which fixed another bug.

For the specific case of "did a file match," we can always assume the
match count is at least 1 here. But this doesn't fix the underlying
problem.

Fixes #3139
2025-09-22 09:12:16 -04:00
Andrew Gallant
491bf3f6d5 deps: update everything else 2025-09-21 11:39:04 -04:00
Andrew Gallant
81bed78654 deps: update to PCRE2 10.46
This is for completely static builds of ripgrep.
2025-09-21 11:39:04 -04:00
Andrew Gallant
1b6177bc5c cargo: set MSRV to 1.85
I believe the current stable version of Debian packages 1.85 rustc. So
if the next release of ripgrep uses a higher MSRV, then I think Debian
won't be able to package it.

It also turned out that I wasn't using anything from beyond Rust 1.85
anyway.

It's likely that I could make use of let-chains in various places, but I
don't think it's worth combing through the code to switch to them at
this point.
2025-09-21 09:51:15 -04:00
Lucas Trzesniewski
a7b7d81d66 lint: fix a few Clippy errors
PR #3151
2025-09-21 09:15:48 -04:00
Andrew Gallant
bb8172fe9b style: apply rustfmt
Maybe 2024 changes?

Note that we now set `edition = "2024"` explicitly in `rustfmt.toml`.
Without this, it seems like it's possible in some cases for rustfmt to
run under an older edition's style. Not sure how though.
2025-09-19 21:08:19 -04:00
Isaac
64174b8e68 printer: preserve line terminator when using --crlf and --replace
Ref #3097, Closes #3100
2025-09-19 21:08:19 -04:00
mostafa
f596a5d875 globset: add allow_unclosed_class toggle
When enabled, patterns like `[abc`, `[]`, `[!]` are treated as if the
opening `[` is just a literal. This is in contrast the default behavior,
which prioritizes better error messages, of returning a parse error.

Fixes #3127, Closes #3145
2025-09-19 21:08:19 -04:00
Thomas ten Cate
556623684e ignore/types: add GDScript files (*.gd) for the Godot Engine
Closes #3142
2025-09-19 21:08:19 -04:00
Pavel Safronov
a6e0be3c90 searcher: move "max matches" from printer to searcher
This is a bit of a brutal change, but I believe is necessary in order to
fix a bug in how we handle the "max matches" limit in multi-line mode
while simultaneously handling context lines correctly.

The main problem here is that "max matches" refers to the shorter of
"one match per line" or "a single match." In typical grep, matches
*can't* span multiple lines, so there's never a difference. But in
multi-line mode, they can. So match counts necessarily must be handled
differently for multi-line mode.

The printer was previously responsible for this. But for $reasons, the
printer is fundamentally not in charge of how matches are found and
reported.

See my comments in #3094 for even more context.

This is a breaking change for `grep-printer`.

Fixes #3076, Closes #3094
2025-09-19 21:08:19 -04:00
Andrew Gallant
a60e62d9ac rust: move to Rust 2024
I'd like to use let chains.

Probably this isn't necessary to do for every crate, but I don't feel
like maintaining a mismash.
2025-09-19 21:08:19 -04:00
Cristián Maureira-Fredes
3f565b58cc ignore/types: add Qt types for resource files and ui declaration
qrc[1] are the resource files for data related to user interfaces, and
ui[2] is the extension that the Qt Designer generates, for Widget based
projects.

Note that the initial PR used `ui` as a name for `*.ui`, but this seems
overly general. Instead, we use `qui` here instead.

Closes #3141

[1]: https://doc.qt.io/qt-6/resources.html
[2]: https://doc.qt.io/qt-6/uic.html
2025-09-19 21:08:19 -04:00
Andrew Gallant
74959a14cb man: escape all hyphens in flag names
Apparently, if we don't do this, some roff renderers with use a special
Unicode hyphen. That in turn makes searching a man page not work as one
would expect.

Fixes #3140
2025-09-19 21:08:19 -04:00
dana
78383de9b2 complete/zsh: improve --hyperlink-format completion
Also don't re-define helper functions if they exist.

Closes #3102
2025-09-19 21:08:19 -04:00
Ilya Grigoriev
519c1bd5cf complete: improvements for the --hyperlink-format flag
The goal is to make the completion for `rg --hyperlink-format v<TAB>`
work in the fish shell.

These are not exhaustive (the user can also specify custom formats).
This is somewhat unfortunate, but is probably better than not doing
anything at all.

The `grep+` value necessitated a change to a test.

Closes #3096
2025-09-19 21:08:19 -04:00
Lucas Trzesniewski
66aa4a63bb printer: deduplicate hyperlink alias names
This exports a new `HyperlinkAlias` type in the `grep-printer` crate.
This includes a "display priority" with each alias and a function for
getting all supported aliases from the crate.

This should hopefully make it possible for downstream users of this
crate to include a list of supported aliases in the documentation.

Closes #3103
2025-09-19 21:08:19 -04:00
Andrew Gallant
fdfda9ae73 doc: actually fix deb download link
Amazingly, there were about a dozen PRs fixing this same thing, and I
happened to choose the one that didn't actually fix the URL completely.

Apparently some users found this "interesting":
https://github.com/BurntSushi/ripgrep/pull/3065#issuecomment-3204275122
2025-09-19 21:08:19 -04:00
wackget
c037310050 doc: update installation instructions for RHEL/CentOS/Rocky Linux 9
Closes #2924, Closes #2981, Closes #3124
2025-09-19 21:08:19 -04:00
emrebengue
99fe884536 colors: add highlight type support for matching lines
This lets users highlight non-matching text in matching lines.

Closes #3024, Closes #3107
2025-09-19 21:08:19 -04:00
Andrew Gallant
126bbeab8c printer: fix handling of has_match for summary printer
Previously, `Quiet` mode in the summary printer always acted like
"print matching paths," except without the printing. This happened even
if we wanted to "print non-matching paths." Since this only afflicted
quiet mode, this had the effect of flipping the exit status when
`--files-without-match --quiet` was used.

Fixes #3108, Ref #3118
2025-09-19 21:08:19 -04:00
Ben Heidemann
859d54270e globset: make GlobSet::new public
For users of globset who already have a `Vec<Glob>` (or similar),
the current API requires them to iterate over their `Vec<Glob>`,
calling `GlobSetBuilder::add` for each `Glob`, thus constructing a new
`Vec<Glob>` internal to the GlobSetBuilder. This makes the consuming
code unnecessarily verbose. (There is unlikely to be any meaningful
performance impact of this, however, since the cost of allocating a new
`Vec` is likely marginal compared to the cost of glob compilation.)

Instead of taking a `&[Glob]`, we accept an iterator of anything that
can be borrowed as a `&Glob`. This required some light refactoring of
the constructor, but nothing onerous.

Closes #3066
2025-09-19 21:08:19 -04:00
David Tolnay
33b44812c0 globset: make GlobSet::empty const
Closes #3098
2025-09-19 21:08:19 -04:00
Lucas Garron
c007d89145 doc: clarify that .git is covered by --hidden and not --ignore-vcs
Fixes #3121, Closes #3122
2025-09-19 21:08:19 -04:00
Vishva Natarajan
60aa9f1727 tests: increase sleep duration for sort file metadata tests on Windows AArch64
Use `cfg!` to assign a 1000ms delay only on Windows Aarch64 targets.

This was done because it has been observed to be necessary on this
platform. The conditional logic is used because 1s is quite long to
wait on every other more sensible platform.

Closes #3071, Closes #3072
2025-09-19 21:08:19 -04:00
Porkepix
56d03a1e2f ignore/types: include missing files for the tf type
Existing matches were too restrictives, so we simplify those to every
type of tfvars file we can encounter.

Closes #3117
2025-09-19 21:08:19 -04:00
Tomek
e166f271df ignore/types: add gleam
[Gleam] is a general-purpose, concurrent, functional high-level
programming language that compiles to Erlang or JavaScript source code.

Closes #3105

[Gleam]: https://gleam.run/
2025-09-19 21:08:19 -04:00
Andrew McNulty
83d94672ae ignore/types: add LLVM to default types
This PR adds llvm to the list of default types, matching files with
extension ll which is used widely for the textual form of LLVM's
Intermediate Representation.

Ref: https://llvm.org/docs/LangRef.html

Closes #3079
2025-09-19 21:08:19 -04:00
James Moberg
6887122e5b ignore/types: add ColdFusion and BoxLang
Closes #3090
2025-09-19 21:08:19 -04:00
Lilian A. Moraru
06210b382a ignore/types: add .env to sh file type
`.env` or "dotenv" is used quite often in cross-compilation/embedded
development environments to load environment variables, define shell
functions or even to execute shell commands. Just like `.zshenv` in
this list, I think `.env` should also be added here.

Closes #3063
2025-09-19 21:08:19 -04:00
kevichi7
00e501b529 build: emit warning if git is missing during build
Closes #3057
2025-09-19 21:08:19 -04:00
Andrew Gallant
2ebd768d40 doc: remove CentOS/RHEL installation instructions
These distros, or their Docker images, appear FUBAR. The UX is so poor
that I cannot verify the correct installation instructions. So I'm
removing them.

Ref https://github.com/BurntSushi/ripgrep/pull/2981#issuecomment-3202063173

Closes #2981, Closes #3124
2025-09-19 21:08:19 -04:00
Andrew Gallant
4df1298127 globset: fix bug where trailing . in file name was incorrectly handled
I'm not sure why I did this, but I think I was trying to imitate the
contract of [`std::path::Path::file_name`]:

> Returns None if the path terminates in `..`.

But the status quo clearly did not implement this. And as a result, if
you have a glob that ends in a `.`, it was instead treated as the empty
string (which only matches the empty string).

We fix this by implementing the semantic from the standard library
correctly.

Fixes #2990

[`std::path::Path::file_name`]: https://doc.rust-lang.org/std/path/struct.Path.html#method.file_name
2025-09-19 21:08:19 -04:00
bbb651
ba23ced817 ignore/types: add scdoc
Ref https://sr.ht/~sircmpwn/scdoc/

Closes #3007
2025-09-19 21:08:19 -04:00
Nadir Ishiguro
28cce895ff doc: fix nixpkgs link
Closes #3006
2025-09-19 21:08:19 -04:00
Andrew Gallant
7339bdf4b5 test: check binary file detection when using memory maps
This resolves a TODO comment I wrote a while back.

Memory maps behave a little differently in terms of detecting binary
data, so the tests have somewhat different results than the tests that
disable memory maps.

Closes #3002
2025-09-19 21:08:19 -04:00
Alexander Weiss
79f5a5a66e globset: add Candidate::from_bytes constructor
This is already technically possible to do on Unix by going through
`OsStr` and `&[u8]` conversions. This just makes it easier to do in all
circumstances and is reasonable to intentionally support.

Closes #2954, Closes #2955
2025-09-19 21:08:19 -04:00
Andrew Gallant
4ab1862dc0 stats: fix case where "bytes searched" could be wrong
Specifically, if the search was instructed to quit early, we might not
have correctly marked the number of bytes consumed.

I don't think this bug occurs when memory maps are used to read the
haystack.

Closes #2944
2025-09-19 21:08:19 -04:00
Thomas Weißschuh
6244e635a1 ignore/types: add Kconfig
Kconfig files are used to represent the configuration database of
Kbuild build system. Kbuild is developed as part of the Linux kernel.
There are numerous other users including OpenWrt and U-Boot.

Ref: https://docs.kernel.org/kbuild/index.html

Closes #2942
2025-09-19 21:08:19 -04:00
ChristopherYoung
5e2d32fe7f printer: slightly simplify code
I'm not sure why it was written with `map` previously. It almost looks
like I was trying to make it deref, but apparently that isn't needed.

Closes #2941
2025-09-19 21:08:19 -04:00
Dmitry Gerasimov
75e17fcabe ignore/types: add *.dtso to devicetree type
`dtso` files became recognized as devicetree a
couple of years ago with the following commit:
363547d219

Closes #2938
2025-09-19 21:08:19 -04:00
Martin Pool
99b7957122 ignore/doc: explain that require_git(false) will ascend above git roots
This should hopefully help avoid confusion about #2812 as encountered
in https://github.com/sourcefrog/cargo-mutants/issues/450.

Closes #2937
2025-09-19 21:08:19 -04:00
Andrew Gallant
ab4665a164 globset: remove __Nonexhaustive work-around
This existed before the `#[non_exhaustive]` attribute was a thing. Since
it was not part of the API of the crate, it is not a semver incompatible
change.
2025-09-19 21:08:19 -04:00
Luke Sandberg
5f5da48307 globset: support nested alternates
For example, `**/{node_modules/**/*/{ts,js},crates/**/*.{rs,toml}`.

I originally didn't add this I think for implementation simplicity, but
it turns out that it really isn't much work to do. There might have also
been some odd behavior in the regex engine for dealing with empty
alternates, but that has all been long fixed.

Closes #3048, Closes #3112
2025-09-19 21:08:19 -04:00
Colin Heffernan
b0c6d4c34a ignore/types: add *.svelte.ts to Svelte file type glob
I was somewhat unsure about adding this, since `.svelte.ts` seems
primarily like a TypeScript file and it could be surprising to show up
in a search for Svelte files. In particular, ripgrep doesn't know how to
only search the Svelte stuff inside of a `.svelte.ts` file, so you could
end up with lots of false positives.

However, I was swayed[1] by the argument that the extension does
actually include `svelte` in it, so maybe this is fine. Please open an
issue if this change ends up being too annoying for most users.

Closes #2874, Closes #2909

[1]: https://github.com/BurntSushi/ripgrep/issues/2874#issuecomment-3126892931
2025-09-19 21:08:19 -04:00
Andrew Gallant
d199058e77 cli: make rg -vf file behave sensibly
Previously, when `file` is empty (literally empty, as in, zero byte),
`rg -f file` and `rg -vf file` would behave identically. This is odd
and also doesn't match how GNU grep behaves. It's also not logically
correct. An empty file means _zero_ patterns which is an empty set. An
empty set matches nothing. Inverting the empty set should result in
matching everything.

This was because of an errant optimization that lets ripgrep quit early
if it can statically detect that no matches are possible.

Moreover, there was *also* a bug in how we constructed the PCRE2 pattern
when there are zero patterns. PCRE2 doesn't have a concept of sets of
patterns (unlike the `regex` crate), so we need to fake it with an empty
character class.

Fixes #1332, Fixes #3001, Closes #3041
2025-09-19 21:08:19 -04:00
Josh Cotton
bb0cbae312 ci: add aarch64 Windows
This also adds a new release artifact for aarch64 Windows.

Closes #2943, Closes #3038
2025-09-19 21:08:19 -04:00
Wilfred Hughes
8fca3cdca6 doc: fix typo in FAQ
Closes #3027
2025-09-19 21:08:19 -04:00
squidfunk
6f39f830cb globset: compact Debug impl for GlobSetBuilder and Glob
Ideally we'd have a compact impl for `GlobSet` too, but that's a lot
more work. In particular, the constituent types don't all store the
original pattern string, so that would need to be added.

Closes #3026
2025-09-19 21:08:19 -04:00
Zach Ahn
e83828fc8c ignore/types: add *.rake extension to list of Ruby file types
This PR adds the .rake extension to the Ruby type. It's a pretty common
file extension in Rails apps—in my experience, the Rakefile is often
pretty empty and only sets some stuff up while most of the code lives
in various .rake files.

See: https://ruby.github.io/rake/doc/rakefile_rdoc.html#label-Multiple+Rake+Files

Closes #2921
2025-09-19 21:08:19 -04:00
f3rn0s
72a1303238 ignore/types: add typst
Closes #2914
2025-09-19 21:08:19 -04:00
Hamir Mahal
861f6d374f style: simplify string formatting
Most of this code was written before this was supported by Rust.

Closes #2912
2025-09-19 21:08:19 -04:00
Thayne McCombs
624bbf7dce globset: add matches_all method
This returns true if all globs in the set match the supplied file.

Fixes #2869, Closes #2900
2025-09-19 21:08:19 -04:00
Aleksey Vasilenko
53279db414 deps: switch to tikv-jemallocator
It is now a recommended crate for jemalloc and it contains an
[important fix for compilation on riscv64gc-unknown-linux-musl][fix],
I bumped into this when I was trying to
[build ripgrep on OpenWrt][openwrt].

Closes #2889

[fix]: https://github.com/tikv/jemallocator/pull/67
[openwrt]: https://github.com/openwrt/packages/pull/24961
2025-09-19 21:08:19 -04:00
Stephan Badragan
292bc54e64 printer: support -r/--replace with --json
This adds a `replacement` field to each submatch object in the JSON
output. In effect, this extends the `-r/--replace` flag so that it works
with `--json`.

This adds a new field instead of replacing the match text (which is how
the standard printer works) for maximum flexibility. This way, consumers
of the JSON output can access the original match text (and always rely
on it corresponding to the original match text) while also getting the
replacement text without needing to do the replacement themselves.

Closes #1872, Closes #2883
2025-09-19 21:08:19 -04:00
Melvin Wang
5be67c1244 ignore/types: include msbuild solution filters
Closes #2871
2025-09-19 21:08:19 -04:00
Lucas Trzesniewski
119407d0a9 printer: use std::path::absolute on Windows
This specifically avoids touching the file system, which can lead to
fairly dramatic speed-ups in large repositories with lots of matches.

Closes #2865
2025-09-19 21:08:19 -04:00
Alex Povel
d869038cf6 ignore: improve multithreading heuristic
This copies the one found in ripgrep.

See also:
71d71d2d98/crates/core/flags/hiargs.rs (L172)

Closes #2854, Closes #2856
2025-09-19 21:08:19 -04:00
Thomas Otto
75970fd16b ignore: don't process command line arguments in reverse order
When searching in parallel with many more arguments than threads, the
first arguments are searched last -- unlike in the -j1 case.

This is unexpected for users who know about the parallel nature of rg
and think they can give the scheduler a hint by positioning larger
input files (L1, L2, ..) before smaller ones (█, ██). Instead, this can
result in sub-optimal thread usage and thus longer runtime (simplified
example with 2 threads):

 T1:  █ ██ █ █ █ █ ██ █ █ █ █ █ ██ ╠═════════════L1════════════╣
 T2:  █ █ ██ █ █ ██ █ █ █ ██ █ █ ╠═════L2════╣

                                       ┏━━━━┳━━━━┳━━━━┳━━━━┓
This is caused by assigning work to    ┃ T1 ┃ T2 ┃ T3 ┃ T4 ┃
 per-thread stacks in a round-robin    ┡━━━━╇━━━━╇━━━━╇━━━━┩
              manner, starting here  → │ L1 │ L2 │ L3 │ L4 │ ↵
                                       ├────├────┼────┼────┤
                                       │ s5 │ s6 │ s7 │ s8 │ ↵
                                       ├────┼────┼────┼────┤
                                       ╷ .. ╷ .. ╷ .. ╷ .. ╷
                                       ├────┼────┼────┼────┤
                                       │ st │ su │ sv │ sw │ ↵
                                       ├────┼────┼────┼────┘
                                       │ sx │ sy │ sz │
                                       └────┴────┴────┘
   and then processing them bottom-up:   ↥    ↥    ↥    ↥

                                       ╷ .. ╷ .. ╷ .. ╷ .. ╷
This patch reverses the input order    ├────┼────┼────┼────┤
so the two reversals cancel each other │ s7 │ s6 │ s5 │ L4 │ ↵
out. Now at least the first N          ├────┼────┼────┼────┘
arguments, N=number-of-threads, are    │ L3 │ L2 │ L1 │
processed before any others (then      └────┴────┴────┘
work-stealing may happen):

 T1:  ╠═════════════L1════════════╣ █ ██ █ █ █ █ █ █ ██
 T2:  ╠═════L2════╣ █ █ ██ █ █ ██ █ █ █ ██ █ █ ██ █ █ █

(With some more shuffling T1 could always be assigned L1 etc., but
that would mostly be for optics).

Closes #2849
2025-09-19 21:08:19 -04:00
Christoph Badura
380809f1e2 ignore/types: add Makefile.*
The *BSD build systems make use of "Makefile.inc" a lot. Make the
"make" type recognize this file by default. And more generally,
`Makefile.*` seems to be a convention, so just generalize it.

Closes #2846
2025-09-19 21:08:19 -04:00
Matt Kulukundis
94ea38da30 ignore: support .jj as well as .git
This makes it so the presence of `.jj` will cause ripgrep to treat it
as a VCS directory, just as if `.git` were present. This is useful for
ripgrep's default behavior when working with jj repositories that don't
have a `.git` but do have `.gitignore`. Namely, ripgrep requires the
presence of a VCS repository in order to respect `.gitignore`.

We don't handle clone-specific exclude rules for jj repositories without
`.git` though. It seems it isn't 100% set yet where we can find
those[1].

Closes #2842

[1]: https://github.com/BurntSushi/ripgrep/pull/2842#discussion_r2020076722
2025-09-19 21:08:19 -04:00
Tor Shepherd
da672f87e8 color: add italic to style attributes
Closes #2841
2025-09-19 21:08:19 -04:00
robert-bryson
edafb612d2 core: add "total" to --stats output
This makes it a little clearer. Apologies to anyone who is regex
matching on this output.

Closes #2797
2025-09-19 21:08:19 -04:00
Stephen Albert-Moore
483628469a ignore/gitignore: skip BOM at start of ignore file
This matches Git's behavior.

Fixes #2177, Closes #2782
2025-09-19 21:08:19 -04:00
Riccardo Attilio Galli
c93fc793a0 searcher: add more tests for replace_bytes
... and add a comment explaining an optimization.

Closes #2729
2025-09-19 21:08:19 -04:00
Keith Smiley
7c004f224e ignore/types: detect WORKSPACE.bzlmod for bazel file type
This file came alongside MODULE.bazel and I should have added it here
previously.

Closes #2726
2025-09-19 21:08:19 -04:00
William Johnson
52115ab633 globset: add opt-in Arbitrary trait implementations
This feature is mandatory when using `Glob` in fuzz testing.

Closes #2720
2025-09-19 21:08:19 -04:00
Andrew Gallant
bfe2def121 tests: add test for filtering hidden files
Note that this isn't a regression test. In particular, this didn't fail
with ripgrep 14.1.1. I couldn't figure out how to turn what the OP gave
me into a failing test.

With #829 fixed, if the OP can provide a better regression test, it
might make sense to re-investigate this.

Closes #2711
2025-09-19 21:08:19 -04:00
ChristopherYoung
14f4957b3d ignore: fix filtering searching subdir or .ignore in parent dir
The previous code deleted too many parts of the path when constructing
the absolute path, resulting in a shortened final path. This patch
creates the correct absolute path by only removing the necessary parts.

Fixes #829, Fixes #2731, Fixes #2747, Fixes #2778, Fixes #2836, Fixes #2933, Fixes #3144
Closes #2933
2025-09-19 21:08:19 -04:00
Jan Verbeek
f722268814 complete/fish: Take RIPGREP_CONFIG_PATH into account
The fish completions now also pay attention to the configuration file
to determine whether to suggest negation options and not just to the
current command line.

This doesn't cover all edge cases. For example the config file is
cached, and so changes may not take effect until the next shell
session. But the cases it doesn't cover are hopefully very rare.

Closes #2708
2025-09-19 21:08:19 -04:00
wang384670111
90a680ab45 impl: switch most atomic ops to Relaxed ordering
These all seem pretty straight-forward. Compared with #2706, I dropped
the changes to the atomic orderings used in `ignore` because I haven't
had time to think through that carefully. But the ops in this PR seem
fine.

Closes #2706
2025-09-19 21:08:19 -04:00
Andrew Gallant
119a58a400 msrv: bump to Rust 1.88
This is to prep for the next release. I don't know if the requirement
will actually be for Rust 1.88, but it is intended to support the latest
version of stable Rust.
2025-07-26 10:41:47 -04:00
Andrew Gallant
3b7fd442a6 deps: update everything
It looks like a new dependency on `getrandom` was added (which brings in
a few more dependencies itself) because of `jobserver`. Thankfully,
`jobserver` is only used when ripgrep's `pcre2` feature is enabled, so
this still keeps the default set of dependencies very small.
2025-07-04 10:12:38 -04:00
wm
cbc598f245 doc: update version number in dpkg installation
PR #3058
2025-05-30 08:30:52 -04:00
dependabot[bot]
6dfaec03e8 deps: bump crossbeam-channel from 0.5.13 to 0.5.15
Bumps [crossbeam-channel](https://github.com/crossbeam-rs/crossbeam) from 0.5.13 to 0.5.15.
- [Release notes](https://github.com/crossbeam-rs/crossbeam/releases)
- [Changelog](https://github.com/crossbeam-rs/crossbeam/blob/master/CHANGELOG.md)
- [Commits](https://github.com/crossbeam-rs/crossbeam/compare/crossbeam-channel-0.5.13...crossbeam-channel-0.5.15)

---
updated-dependencies:
- dependency-name: crossbeam-channel
  dependency-version: 0.5.15
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-04-10 10:55:32 -04:00
Pierre Rouleau
5fbc4fee64 ignore/types: fix Seed7 file extension
PR #3023
2025-04-07 10:53:32 -04:00
Pierre Rouleau
004370bd16 ignore/types: add support for Seed7 files
For more info on the Seed7 programming Language see:

- on Wikipedia: https://en.wikipedia.org/wiki/Seed7
- Seed7 home:   https://seed7.sourceforge.net/
- Seed7 repo:   https://github.com/ThomasMertes/seed7

PR #3022
2025-04-07 08:51:22 -04:00
Andrew Gallant
de4baa1002 globset-0.4.16 2025-02-27 12:46:58 -05:00
Andrew Gallant
163ac157d3 globset: escape { and } in escape
This appears to be an oversight from when `escape` was
implemented in #2061.
2025-02-27 12:46:48 -05:00
Andrew Gallant
e2362d4d51 searcher: add log message noting detected encoding
This helps improve diagnostics. Otherwise it can be easy to miss that
ripgrep is doing transcoding.

Fixes #2979
2025-01-25 14:27:00 -05:00
Kizhyk
d6b59feff8 github: update WASI compilation job
Ref https://blog.rust-lang.org/2024/04/09/updates-to-rusts-wasi-targets.html

PR #2970
2025-01-13 10:16:09 -05:00
Max Coplan
94305125ef zsh: support sourcing zsh completion dynamically
Previously, you needed to save the completion script to a file and
then source it. Now, you can dynamically source completions in zsh by
running

    $ source <(rg --generate complete-zsh)

Before this commit, you would get an error after step 1.
After this commit, it should work as expected.

We also improve the FAQ item for zsh completions.

Fixes #2956
2024-12-31 08:23:13 -05:00
Andrew Gallant
79cbe89deb doc: tweak wording for stdin detection
This makes it slightly more precise to cover weird cases like trying to
pass a directory on stdin.

Closes #2906
2024-09-30 07:38:05 -04:00
Thayne McCombs
bf63fe8f25 regex: add as_match method to Captures trait
Ref https://github.com/rust-lang/regex/issues/1146

PR #2898
2024-09-19 09:30:31 -04:00
Andrew Gallant
8bd5950296 changelog: add next section 2024-09-08 22:32:09 -04:00
Andrew Gallant
6e0539ab91 pkg/brew: update tap 2024-09-08 22:32:02 -04:00
Andrew Gallant
4649aa9700 14.1.1
Some checks failed
release / create-release (push) Has been cancelled
release / build-release (linux, ubuntu-latest, nightly, x86_64-linux-musl-strip, x86_64-unknown-linux-musl) (push) Has been cancelled
release / build-release (macos, macos-latest, nightly, x86_64-apple-darwin) (push) Has been cancelled
release / build-release (stable-aarch64, ubuntu-latest, qemu-aarch64, stable, aarch64-linux-gnu-strip, aarch64-unknown-linux-gnu) (push) Has been cancelled
release / build-release (stable-arm-gnueabihf, ubuntu-latest, qemu-arm, stable, arm-linux-gnueabihf-strip, armv7-unknown-linux-gnueabihf) (push) Has been cancelled
release / build-release (stable-arm-musleabi, ubuntu-latest, qemu-arm, stable, arm-linux-musleabi-strip, armv7-unknown-linux-musleabi) (push) Has been cancelled
release / build-release (stable-arm-musleabihf, ubuntu-latest, qemu-arm, stable, arm-linux-musleabihf-strip, armv7-unknown-linux-musleabihf) (push) Has been cancelled
release / build-release (stable-powerpc64, ubuntu-latest, qemu-ppc64, stable, powerpc64-linux-gnu-strip, powerpc64-unknown-linux-gnu) (push) Has been cancelled
release / build-release (stable-s390x, ubuntu-latest, qemu-s390x, stable, s390x-linux-gnu-strip, s390x-unknown-linux-gnu) (push) Has been cancelled
release / build-release (stable-x86, ubuntu-latest, i386, stable, x86_64-linux-gnu-strip, i686-unknown-linux-gnu) (push) Has been cancelled
release / build-release (win-gnu, windows-latest, nightly-x86_64-gnu, x86_64-pc-windows-gnu) (push) Has been cancelled
release / build-release (win-msvc, windows-latest, nightly, x86_64-pc-windows-msvc) (push) Has been cancelled
release / build-release (win32-msvc, windows-latest, nightly, i686-pc-windows-msvc) (push) Has been cancelled
release / build-release-deb (push) Has been cancelled
2024-09-08 22:15:00 -04:00
Andrew Gallant
c009652e77 changelog: 14.1.1 2024-09-08 22:13:53 -04:00
Andrew Gallant
b9f7a9ba2b deps: bump grep to 0.3.2 2024-09-08 22:11:17 -04:00
Andrew Gallant
a1960877cf grep-0.3.2 2024-09-08 22:11:00 -04:00
Andrew Gallant
bb0925af91 deps: bump grep-printer to 0.2.2 2024-09-08 22:10:49 -04:00
Andrew Gallant
be117dbafa grep-printer-0.2.2 2024-09-08 22:10:29 -04:00
Andrew Gallant
06dc13ad2d deps: bump grep-searcher to 0.1.14 2024-09-08 22:09:55 -04:00
Andrew Gallant
c6c2e69b8f grep-searcher-0.1.14 2024-09-08 22:09:27 -04:00
Andrew Gallant
e67c868ddd deps: bump grep-pcre2 to 0.1.8 2024-09-08 22:09:23 -04:00
Andrew Gallant
d33f2e2f70 grep-pcre2-0.1.8 2024-09-08 22:08:41 -04:00
Andrew Gallant
082edafffa deps: bump grep-regex to 0.1.13 2024-09-08 22:08:22 -04:00
Andrew Gallant
7c8dc332b3 grep-regex-0.1.13 2024-09-08 22:07:52 -04:00
Andrew Gallant
ea961915b5 deps: bump grep-cli to 0.1.11 2024-09-08 22:07:30 -04:00
Andrew Gallant
7943bdfe82 grep-cli-0.1.11 2024-09-08 22:06:59 -04:00
Andrew Gallant
312a7884fc deps: bump ignore to 0.4.23 2024-09-08 22:06:39 -04:00
Andrew Gallant
ac02f54c89 ignore-0.4.23 2024-09-08 22:06:03 -04:00
Andrew Gallant
24b337b940 deps: bump globset to 0.4.15 2024-09-08 22:05:45 -04:00
Andrew Gallant
a5083f99ce globset-0.4.15 2024-09-08 22:04:48 -04:00
Andrew Gallant
f89cdba5df doc: update date in man page template 2024-09-08 22:04:11 -04:00
Andrew Gallant
f7b677d136 deps: update everything 2024-09-08 22:03:29 -04:00
Andrew Gallant
3f68a8f3d7 changelog: 14.1.1 2024-09-08 22:03:22 -04:00
Andrew Gallant
9d738ad0c0 regex: fix inner literal extraction that resulted in false negatives
In some rare cases, it was possible for ripgrep's inner literal detector
to extract a set of literals that could produce a false negative. #2884
gives an example: `(?i:e.x|ex)`. In this case, the set extracted can be
discovered by running `rg '(?i:e.x|ex) --trace`:

    Seq[E("EX"), E("Ex"), E("eX"), E("ex")]

This extraction leads to building a multi-substring matcher for `EX`,
`Ex`, `eX` and `ex`. Searching the haystack `e-x` produces no match,
and thus, ripgrep shows no matches. But the regex `(?i:e.x|ex)` matches
`e-x`.

The issue at play here was that when two extracted literal sequences
were unioned, we were correctly unioning their "prefix" attribute.
And this in turn leads to those literal sequences being combined
incorrectly via cross product. This case in particular triggers it
because two different optimizations combine to produce an incorrect
result. Firslty, the regex has a common prefix extracted and is
rewritten as `(?i:e(?:.x|x))`. Secondly, the `x` in the first branch of
the alternation has its `prefix` attribute set to `false` (correctly),
which means it can't be cross producted with another concatenation. But
in this case, it is unioned with the `x` from the second branch, and
this results in the union result having `prefix` set to `true`. This
in turn pops up and lets it get cross producted with the `e` prefix,
producing an incorrect literal sequence.

We fix this by changing the implementation of `union` to return
`prefix` set to `true` only when *both* literal sequences being unioned
have `prefix` set to `true`.

Doing this exposed a second bug that was present, but was purely
cosmetic: the extracted literals in this case, after the fix, are
`X` and `x`. They were considered "exact" (i.e., lead to a match),
but of course they are not. Observing an `X` or an `x` does not mean
there is a match. This was fixed by making `choose` always return
an inexact literal sequence. This is perhaps too conservative in
aggregate in some cases, but always correct. The idea here is that if
one is choosing between two concatenations, then it is likely the case
that the sequence returned should be considered inexact. The issue
is that this can lead to avoiding cross products in some cases that
would otherwise be correct. This is bad because it means extracting
shorter literals in some cases. (In general, the longer the literal the
better.) But we prioritize correctness for now and fix it. You can see
a few tests where this shortens some extracted literals.

Fixes #2884
2024-09-08 22:00:46 -04:00
Andrew Gallant
6c5108ed17 github: add FUNDING 2024-09-03 11:46:01 -04:00
Andrew Gallant
e0f1000df6 deps: update everything
This removes `once_cell` (a dependency of `cc`) but adds `shlex` (also a
dependency of `cc`). AFAIK, ripgrep does not utilize anything in `cc`
that requires `shlex`, which is pretty unfortunate that we have to spend
time compiling it. (We use `cc` only when the `pcre2` feature is
enabled.)
2024-08-28 11:38:43 -04:00
Henk-Jan Meijer
ea99421ec8 doc: fix transcription bug in ugrep benchmark command
I re-ran the benchmark and the timing remains nearly
unchanged, so that part was correct.

PR #2876
2024-08-21 13:58:36 -04:00
Cort Spellman
af8c386d5e doc: fix typo in --heading flag help
PR #2864
2024-08-02 17:32:42 -04:00
Naser Aleisa
71d71d2d98 doc: refer to correct flag name for --engine=auto
PR #2850
2024-07-04 07:25:13 -04:00
Tobias Decking
c9ebcbd8ab globset: optimize character escaping
Rewrites the char_to_escaped_literal and bytes_to_escaped_literal
functions in a way that minimizes heap allocations. After this, the
resulting string is the only allocation remaining.

I believe when this code was originally written, the routines available
to avoid heap allocations didn't exist.

I'm skeptical that this matters in the grand scheme of things, but I
think this is still worth doing for "good sense" reasons.

PR #2833
2024-06-05 09:56:00 -04:00
Pratham Verma
dec0dc3196 doc: update link for debian installation
PR #2829
2024-06-02 17:48:50 -04:00
Andrew Gallant
2f0a269f07 github: use an obviously old version of ripgrep in issue template
This should hopefully avoid confusion where the use of the version
number in the issue template isn't mistaken for the implication that the
version must therefore be recent.

Ref #2824
2024-05-27 18:22:11 -04:00
Andrew Gallant
0a0893a765 ignore: add debug log message when opening gitignore file
I'm not sure why it took me this long to add this debug message, but
it's quite useful in determining where ignore rules are coming from.
2024-05-27 14:53:19 -04:00
Bryan Honof
35160a1cdb doc: add Flox as an installation method
Ref https://flox.dev/docs/

PR #2817
2024-05-24 11:59:19 -04:00
Andrew Gallant
f1d23c06e3 cli: add more logging for stdin heuristic detection
Stdin heuristic detection is complicated and opaque enough that it's
worth having easy access to the complete story that leads ripgrep to
decide whether to search stdin or not.

Ref #2806
2024-05-13 09:43:04 -04:00
tgolang
22b677900f doc: fix some typos
PR #2754
2024-05-13 07:44:51 -04:00
NicoElbers
bb6f0f5519 doc: fix typo in --vimgrep help message
PR #2802
2024-05-11 07:02:24 -04:00
Andrew Gallant
b6ef99ee55 doc: remove unused man page template
This seems to be causing confusion. And since we don't use it as of
ripgrep 14, let's just remove it.

Man page generation is now done by ripgrep itself. That is:

    rg --generate man > rg.1

Closes #2801
2024-05-09 13:46:28 -04:00
Nicolas Holzschuch
bb8601b2ba printer: make compilation on non-unix, non-windows platforms work
Some of the new hyperlink work caused ripgrep to stop compiling
on non-{Unix,Windows} platforms. The most popular of which is WASI.

This commit makes non-{Unix,Windows} compile again. And we add a
very basic WASI test in CI to catch regressions.

More work is needed to make tests on non-{Unix,Windows} platforms
work. And of course, this commit specifically takes the path of disabling
hyperlink support for non-{Unix,Windows} platforms.
2024-04-23 13:12:19 -04:00
Andrew Gallant
02b47b7469 deps: update everything
Notably, this removes winapi in favor of windows-sys, as a result of
winapi-util switching over to windows-sys[1].

Annoyingly, when PCRE2 is enabled, this brings in a dependency on
`once_cell`[2]. I had worked to remove it from my dependencies and now
it's back. Gah. I suppose I could disable the `parallel` feature of
`cc`, but that doesn't seem like a good trade-off.

[1]: https://github.com/BurntSushi/winapi-util/pull/13
[2]: https://github.com/rust-lang/cc-rs/pull/1037
2024-04-23 10:46:12 -04:00
redistay
d922b7ac11 doc: fix typo
PR #2776
2024-04-02 09:10:25 -04:00
Linda_pp
2acf25c689 ignore/types: add WGSL to the default file types
[WGSL][1] is a shading language for WebGPU. As defined in [Appendix
A][2], the file extension is `.wgsl`.

PR #2774 

[1]: https://www.w3.org/TR/WGSL/
[2]: https://www.w3.org/TR/WGSL/#text-wgsl-media-type
2024-04-01 23:05:15 -04:00
Vadim Kostin
80007698d3 ignore/types: add Vue
PR #2772
2024-04-01 07:49:29 -04:00
cgzones
3ad0e83471 ignore/walk: correct build_parallel() documentation
The returned closure should return `WalkState`, not `()`.

Closes #2767
2024-03-27 14:50:05 -04:00
Andrew Gallant
eca13f08a2 deps: bump everything else 2024-03-24 18:58:28 -04:00
Andrew Gallant
4f99f82b19 deps: bump pcre2 and pcre2-sys
This moves to PCRE2 10.43.
2024-03-24 18:58:06 -04:00
Anton Zhiyanov
327d74f161 doc: add link to unofficial playground
PR #2760
2024-03-20 08:11:09 -04:00
Brent Williams
9da0995df4 ignore/types: add 'svelte' to the default file types
Ref: https://svelte.dev/

PR #2759
2024-03-19 13:36:08 -04:00
Andrew Gallant
e9abbc1a02 cargo: nuke 'simd-accel' from orbit
This feature causes nothing but problems and is frequently broken. The
only optimization it was enabling were SIMD optimizations for
transcoding. In particular, for UTF-16 transcoding. This is performed by
the [`encoding_rs`](https://github.com/hsivonen/encoding_rs) crate,
which specifically uses unstable portable SIMD APIs instead of the
stable non-portable SIMD APIs.

SIMD optimizations that apply to search have long been making use of
stable APIs, and are automatically enabled when your target supports
them. This is, IMO, the correct user experience and one that
`encoding_rs` refuses to support. I'm done dealing with it, so
transcoding will only use scalar code until the SIMD optimizations in
`encoding_rs` work on stable. (This doesn't mean that `encoding_rs` has
to change. This could also be fixed by stabilizing `std::simd`.)

Fixes #2748
2024-03-07 09:47:43 -05:00
Andrew Gallant
9bd30e8e48 deps: update everything 2024-03-07 09:38:22 -05:00
Andrew Gallant
59212d08d3 style: fix new lints
The Rust compiler seems to have gotten smarter at finding unused or
redundant imports.
2024-03-07 09:37:48 -05:00
SuperSpecialSweet
6ebebb2aaa doc: fix typo in comments
PR #2741
2024-02-22 06:57:58 -05:00
Andrew Gallant
e92e2ef813 cli: remove stray dbg!
Whoops, forgot to review my commits before pushing.
2024-02-15 12:02:15 -05:00
Andrew Gallant
4a30819302 cli: tweak how "is one file" predicate works
In effect, we switch from `path.is_file()` to `!path.is_dir()`. In cases
where process substitution is used, for example, the path can actually
have type "fifo" instead of "file." Even if it's a fifo, we want to
treat it as-if it were a file. The real key here is that we basically
always want to consider a lone argument as a file so long as we know it
isn't a directory. Because a directory is the only thing that will
causes us to (potentially) search more than one thing.

Fixes #2736
2024-02-15 11:59:59 -05:00
Wilfred Hughes
9b42af96f0 doc: fix typo in --hidden docs
PR #2718
2024-01-22 13:31:11 -05:00
Alex Touchet
648a65f197 doc: add missing date in changelog
PR #2704
2024-01-06 17:49:18 -05:00
Andrew Gallant
bdf01f46a6 changelog: start next section 2024-01-06 14:41:45 -05:00
Andrew Gallant
1c775f3a82 pkg/brew: update tap 2024-01-06 14:41:09 -05:00
Andrew Gallant
e50df40a19 14.1.0
Some checks failed
release / create-release (push) Has been cancelled
release / build-release (linux, ubuntu-latest, nightly, x86_64-linux-musl-strip, x86_64-unknown-linux-musl) (push) Has been cancelled
release / build-release (macos, macos-latest, nightly, x86_64-apple-darwin) (push) Has been cancelled
release / build-release (stable-aarch64, ubuntu-latest, qemu-aarch64, stable, aarch64-linux-gnu-strip, aarch64-unknown-linux-gnu) (push) Has been cancelled
release / build-release (stable-arm-gnueabihf, ubuntu-latest, qemu-arm, stable, arm-linux-gnueabihf-strip, armv7-unknown-linux-gnueabihf) (push) Has been cancelled
release / build-release (stable-arm-musleabi, ubuntu-latest, qemu-arm, stable, arm-linux-musleabi-strip, armv7-unknown-linux-musleabi) (push) Has been cancelled
release / build-release (stable-arm-musleabihf, ubuntu-latest, qemu-arm, stable, arm-linux-musleabihf-strip, armv7-unknown-linux-musleabihf) (push) Has been cancelled
release / build-release (stable-powerpc64, ubuntu-latest, qemu-ppc64, stable, powerpc64-linux-gnu-strip, powerpc64-unknown-linux-gnu) (push) Has been cancelled
release / build-release (stable-s390x, ubuntu-latest, qemu-s390x, stable, s390x-linux-gnu-strip, s390x-unknown-linux-gnu) (push) Has been cancelled
release / build-release (stable-x86, ubuntu-latest, i386, stable, x86_64-linux-gnu-strip, i686-unknown-linux-gnu) (push) Has been cancelled
release / build-release (win-gnu, windows-latest, nightly-x86_64-gnu, x86_64-pc-windows-gnu) (push) Has been cancelled
release / build-release (win-msvc, windows-latest, nightly, x86_64-pc-windows-msvc) (push) Has been cancelled
release / build-release (win32-msvc, windows-latest, nightly, i686-pc-windows-msvc) (push) Has been cancelled
release / build-release-deb (push) Has been cancelled
2024-01-06 14:32:27 -05:00
Andrew Gallant
1fa76d2a42 changelog: add 14.1.0 blurb 2024-01-06 14:31:16 -05:00
Andrew Gallant
44aa5a417d deps: bump ignore to 0.4.22 2024-01-06 14:28:28 -05:00
Andrew Gallant
2c3897585d ignore-0.4.22 2024-01-06 14:27:44 -05:00
Andrew Gallant
6e9141a9ca deps: update everything 2024-01-06 14:26:52 -05:00
Andrew Gallant
c8e4a84519 cli: prefix all non-fatal error messages with 'rg: '
Fixes #2694
2024-01-06 14:15:52 -05:00
Andrew Gallant
f02a50a69d changelog: various updates 2024-01-06 13:59:52 -05:00
fe9lix
b9c774937f ignore: fix reference cycle for compiled matchers
It looks like there is a reference cycle caused by the compiled
matchers (compiled HashMap holds ref to Ignore and Ignore holds ref
to HashMap). Using weak refs fixes issue #2690 in my test project.
Also confirmed via before and after when profiling the code, see the
attached screenshots in #2692.

Fixes #2690
2024-01-06 12:50:42 -05:00
Andrew Gallant
67dd809a80 ignore: add some 'allow(dead_code)' annotations
I don't usually like doing this and would prefer to just delete unused
code, but I don't have the context required to understand why this code
is unused. A refresh of this crate is on the (distant) horizon, so I'll
just leave these here for now to squash the warnings.
2024-01-06 12:25:06 -05:00
Jan Verbeek
e0a85678e1 complete/fish: improve shell completions for fish
- Stop using `-n __fish_use_subcommand`. This had the effect of
ignoring options if a positional argument has already been given, but
that's not how ripgrep works.

- Only suggest negation options if the option they're negating is
passed (e.g., only complete `--no-pcre2` if `--pcre2` is present). The
zsh completions already do this.

- Take into account whether an option takes an argument. If an option
is not a switch then it won't suggest further options until the
argument is given, e.g. `-C<tab>` won't suggest options but `-i<tab>`
will.

- Suggest correct arguments for options. We already completed a fixed
set of choices where available, but now we go further:

  - Filenames are only suggested for options that take filenames.

  - `--pre` and `--hostname-bin` suggest binaries from `$PATH`.

  - `-t`/`--type`/&c use `--type-list` for suggestions, like in zsh,
  with a preview of the glob patterns.

  - `--encoding` uses a hardcoded list extracted from the zsh
  completions. This has been refactored into a separate file, and the
  range globs (`{1..5}`) replaced by comma globs (`{1,2,3,4,5}`) since
  those work in both shells. I verified that this produces the same
  list as before in zsh, and the same list in fish (albeit in a
  different order).

PR #2684
2024-01-06 10:39:35 -05:00
David Gilman
23af5fb043 doc: update MSRV in README
PR #2673
2024-01-06 10:22:26 -05:00
Andrew Gallant
5dec4b8e37 ci: drop custom Cross images
It looks like these aren't needed any more? I'm not sure why to be
honest. I suspect it's because we no longer need asciidoc(tor)? to
generate man pages. And I believe tests that require things like `zstd`
are automatically if `zstd` isn't installed.
2024-01-06 10:21:34 -05:00
Younes El-karama
827082a33a ci: add more ARM build configurations to CI and release workflows
... it turns out that rustembedded/cross:armv7-unknown-linux-musleabi
doesn't exist. And looking more closely, it looks like the Cross project
has decided to shake things up and publish images to ghcr instead. So we
migrate everything over to that.
2024-01-06 10:21:34 -05:00
Andrew Gallant
6c2a550e1e deps: update everything
This drops a dependency on memoffset due to a crossbeam-epoch update.
w00t.
2024-01-04 19:46:29 -05:00
Andrew Gallant
8e8fc9c503 deps: bump pcre2-sys to 0.2.8
This release contains some extra logic to disable the JIT on musleabi
targets.
2024-01-04 19:44:28 -05:00
Andrew Gallant
2057023dc5 readme: update benchmarks
We add a few more too.
2024-01-03 16:21:04 -05:00
Andrew Gallant
3f2fe0afee deps: update everything
This also drops a dependency on scopeguard, courtesy of crossbeam-epoch
dropping it. Not sure why they did, but fine by me.
2023-12-17 09:37:33 -05:00
amesgen
56c7ad175a ignore/types: add Lean
Ref: https://lean-lang.org/

PR #2678
2023-12-07 11:46:00 -05:00
Timo Wilken
5b7a30846f doc: fix Guix install instructions
`guix install` should not be run using `sudo`, as per
<https://packages.guix.gnu.org/packages/ripgrep/>.

PR #2669
2023-11-30 10:54:54 -05:00
Patrick Williams
2a4dba3fbf ignore/types: add meson.options
Starting with meson 1.1, there is a preference for using meson.options
instead of meson_options.txt.  Add the new filename to the meson set.

PR #2666
2023-11-29 19:03:12 -05:00
liberodark
84d65865e6 doc: add Void Linux installation instructions
PR #2665
2023-11-29 07:49:20 -05:00
Andrew Gallant
d9aaa11873 pkg/brew: update tap 2023-11-28 16:23:16 -05:00
Andrew Gallant
67ad9917ad 14.0.3
Some checks failed
release / create-release (push) Has been cancelled
release / build-release (linux, ubuntu-latest, nightly, x86_64-linux-musl-strip, x86_64-unknown-linux-musl) (push) Has been cancelled
release / build-release (macos, macos-latest, nightly, x86_64-apple-darwin) (push) Has been cancelled
release / build-release (stable-aarch64, ubuntu-latest, qemu-aarch64, stable, aarch64-linux-gnu-strip, aarch64-unknown-linux-gnu) (push) Has been cancelled
release / build-release (stable-powerpc64, ubuntu-latest, qemu-ppc64, stable, powerpc64-linux-gnu-strip, powerpc64-unknown-linux-gnu) (push) Has been cancelled
release / build-release (stable-s390x, ubuntu-latest, qemu-s390x, stable, s390x-linux-gnu-strip, s390x-unknown-linux-gnu) (push) Has been cancelled
release / build-release (stable-x86, ubuntu-latest, i386, stable, x86_64-linux-gnu-strip, i686-unknown-linux-gnu) (push) Has been cancelled
release / build-release (win-gnu, windows-latest, nightly-x86_64-gnu, x86_64-pc-windows-gnu) (push) Has been cancelled
release / build-release (win-msvc, windows-latest, nightly, x86_64-pc-windows-msvc) (push) Has been cancelled
release / build-release (win32-msvc, windows-latest, nightly, i686-pc-windows-msvc) (push) Has been cancelled
release / build-release-deb (push) Has been cancelled
2023-11-28 16:18:14 -05:00
Andrew Gallant
daa157b5f9 core: actually implement --sortr=path
This is an embarrassing oversight. A `todo!()` actually made its way
into a release! Oof.

This was working in ripgrep 13, but I had redone some aspects of sorting
and this just got left undone.

Fixes #2664
2023-11-28 16:17:14 -05:00
Andrew Gallant
ca5e294ad6 pkg/brew: update tap 2023-11-27 21:44:06 -05:00
Andrew Gallant
6c7947b819 14.0.2
Some checks failed
release / create-release (push) Has been cancelled
release / build-release (linux, ubuntu-latest, nightly, x86_64-linux-musl-strip, x86_64-unknown-linux-musl) (push) Has been cancelled
release / build-release (macos, macos-latest, nightly, x86_64-apple-darwin) (push) Has been cancelled
release / build-release (stable-aarch64, ubuntu-latest, qemu-aarch64, stable, aarch64-linux-gnu-strip, aarch64-unknown-linux-gnu) (push) Has been cancelled
release / build-release (stable-powerpc64, ubuntu-latest, qemu-ppc64, stable, powerpc64-linux-gnu-strip, powerpc64-unknown-linux-gnu) (push) Has been cancelled
release / build-release (stable-s390x, ubuntu-latest, qemu-s390x, stable, s390x-linux-gnu-strip, s390x-unknown-linux-gnu) (push) Has been cancelled
release / build-release (stable-x86, ubuntu-latest, i386, stable, x86_64-linux-gnu-strip, i686-unknown-linux-gnu) (push) Has been cancelled
release / build-release (win-gnu, windows-latest, nightly-x86_64-gnu, x86_64-pc-windows-gnu) (push) Has been cancelled
release / build-release (win-msvc, windows-latest, nightly, x86_64-pc-windows-msvc) (push) Has been cancelled
release / build-release (win32-msvc, windows-latest, nightly, i686-pc-windows-msvc) (push) Has been cancelled
release / build-release-deb (push) Has been cancelled
2023-11-27 21:38:21 -05:00
Andrew Gallant
9acb4a5405 deps: bump grep to 0.3.1 2023-11-27 21:37:41 -05:00
Andrew Gallant
0096c74c11 grep-0.3.1 2023-11-27 21:36:54 -05:00
Andrew Gallant
8c48355b03 deps: bump grep-printer to 0.2.1 2023-11-27 21:36:44 -05:00
Andrew Gallant
f9b86de963 grep-printer-0.2.1 2023-11-27 21:36:02 -05:00
Andrew Gallant
d23b74975a deps: bump grep-searcher to 0.1.13 2023-11-27 21:35:53 -05:00
117 changed files with 5790 additions and 2210 deletions

1
.github/FUNDING.yml vendored Normal file
View File

@@ -0,0 +1 @@
github: [BurntSushi]

View File

@@ -24,7 +24,7 @@ body:
attributes:
label: What version of ripgrep are you using?
description: Enter the output of `rg --version`.
placeholder: ex. ripgrep 13.0.0
placeholder: ex. ripgrep 0.2.1
validations:
required: true

View File

@@ -53,7 +53,7 @@ jobs:
include:
- build: pinned
os: ubuntu-latest
rust: 1.74.0
rust: 1.85.0
- build: stable
os: ubuntu-latest
rust: stable
@@ -75,6 +75,18 @@ jobs:
os: ubuntu-latest
rust: stable
target: aarch64-unknown-linux-gnu
- build: stable-arm-gnueabihf
os: ubuntu-latest
rust: stable
target: armv7-unknown-linux-gnueabihf
- build: stable-arm-musleabihf
os: ubuntu-latest
rust: stable
target: armv7-unknown-linux-musleabihf
- build: stable-arm-musleabi
os: ubuntu-latest
rust: stable
target: armv7-unknown-linux-musleabi
- build: stable-powerpc64
os: ubuntu-latest
rust: stable
@@ -83,15 +95,22 @@ jobs:
os: ubuntu-latest
rust: stable
target: s390x-unknown-linux-gnu
- build: stable-riscv64
os: ubuntu-latest
rust: stable
target: riscv64gc-unknown-linux-gnu
- build: macos
os: macos-latest
rust: nightly
- build: win-msvc
os: windows-2022
os: windows-latest
rust: nightly
- build: win-gnu
os: windows-2022
os: windows-latest
rust: nightly-x86_64-gnu
- build: winaarch64-msvc
os: windows-11-arm
rust: nightly
steps:
- name: Checkout repository
uses: actions/checkout@v4
@@ -165,7 +184,7 @@ jobs:
# 'rg' binary (done in test-complete) with qemu, which is a pain and
# doesn't really gain us much. If shell completion works in one place,
# it probably works everywhere.
if: matrix.target == '' && matrix.os != 'windows-2022'
if: matrix.target == '' && !startsWith(matrix.os, 'windows')
shell: bash
run: ci/test-complete
@@ -175,7 +194,22 @@ jobs:
- name: Print available short flags
shell: bash
run: ${{ env.CARGO }} test --bin rg ${{ env.TARGET_FLAGS }} flags::defs::tests::available_shorts -- --nocapture
run: ${{ env.CARGO }} test --bin rgs ${{ env.TARGET_FLAGS }} flags::defs::tests::available_shorts -- --nocapture
# Setup and compile on the wasm32-wasip1 target
wasm:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Install Rust
uses: dtolnay/rust-toolchain@master
with:
toolchain: stable
- name: Add wasm32-wasip1 target
run: rustup target add wasm32-wasip1
- name: Basic build
run: cargo build --verbose
rustfmt:
runs-on: ubuntu-latest
@@ -203,3 +237,28 @@ jobs:
env:
RUSTDOCFLAGS: -D warnings
run: cargo doc --no-deps --document-private-items --workspace
fuzz_testing:
name: Compile Fuzz Test Targets
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Install required packages (Ubuntu)
run: |
sudo apt-get update
sudo apt-get install g++ --yes
- name: Install Rust
uses: dtolnay/rust-toolchain@master
with:
toolchain: stable
- name: Install Fuzzer
run: cargo install cargo-fuzz
working-directory: fuzz
- name: Verify fuzz targets build
run: cargo check
working-directory: fuzz

View File

@@ -80,12 +80,24 @@ jobs:
target: aarch64-unknown-linux-gnu
strip: aarch64-linux-gnu-strip
qemu: qemu-aarch64
- build: stable-powerpc64
- build: stable-arm-gnueabihf
os: ubuntu-latest
rust: stable
target: powerpc64-unknown-linux-gnu
strip: powerpc64-linux-gnu-strip
qemu: qemu-ppc64
target: armv7-unknown-linux-gnueabihf
strip: arm-linux-gnueabihf-strip
qemu: qemu-arm
- build: stable-arm-musleabihf
os: ubuntu-latest
rust: stable
target: armv7-unknown-linux-musleabihf
strip: arm-linux-musleabihf-strip
qemu: qemu-arm
- build: stable-arm-musleabi
os: ubuntu-latest
rust: stable
target: armv7-unknown-linux-musleabi
strip: arm-linux-musleabi-strip
qemu: qemu-arm
- build: stable-s390x
os: ubuntu-latest
rust: stable
@@ -96,6 +108,10 @@ jobs:
os: macos-latest
rust: nightly
target: x86_64-apple-darwin
- build: macos
os: macos-latest
rust: nightly
target: aarch64-apple-darwin
- build: win-msvc
os: windows-latest
rust: nightly
@@ -104,6 +120,10 @@ jobs:
os: windows-latest
rust: nightly-x86_64-gnu
target: x86_64-pc-windows-gnu
- build: winaarch64-msvc
os: windows-11-arm
rust: nightly
target: aarch64-pc-windows-msvc
- build: win32-msvc
os: windows-latest
rust: nightly
@@ -156,11 +176,11 @@ jobs:
- name: Build release binary
shell: bash
run: |
${{ env.CARGO }} build --verbose --release --features pcre2 ${{ env.TARGET_FLAGS }}
if [ "${{ matrix.os }}" = "windows-latest" ]; then
bin="target/${{ matrix.target }}/release/rg.exe"
${{ env.CARGO }} build --verbose --profile release-lto --features pcre2 ${{ env.TARGET_FLAGS }}
if [[ "${{ matrix.os }}" == windows-* ]]; then
bin="target/${{ matrix.target }}/release-lto/rgs.exe"
else
bin="target/${{ matrix.target }}/release/rg"
bin="target/${{ matrix.target }}/release-lto/rgs"
fi
echo "BIN=$bin" >> $GITHUB_ENV
@@ -175,22 +195,22 @@ jobs:
run: |
docker run --rm -v \
"$PWD/target:/target:Z" \
"rustembedded/cross:${{ matrix.target }}" \
"ghcr.io/cross-rs/${{ matrix.target }}:main" \
"${{ matrix.strip }}" \
"/target/${{ matrix.target }}/release/rg"
"/$BIN"
- name: Determine archive name
shell: bash
run: |
version="${{ needs.create-release.outputs.version }}"
echo "ARCHIVE=ripgrep-$version-${{ matrix.target }}" >> $GITHUB_ENV
echo "ARCHIVE=rgs-$version-${{ matrix.target }}" >> $GITHUB_ENV
- name: Creating directory for archive
shell: bash
run: |
mkdir -p "$ARCHIVE"/{complete,doc}
cp "$BIN" "$ARCHIVE"/
cp {README.md,COPYING,UNLICENSE,LICENSE-MIT} "$ARCHIVE"/
cp {README.md,README-ripgrep.md,COPYING,UNLICENSE,LICENSE-MIT} "$ARCHIVE"/
cp {CHANGELOG.md,FAQ.md,GUIDE.md} "$ARCHIVE"/doc/
- name: Generate man page and completions (no emulation)
@@ -198,11 +218,11 @@ jobs:
shell: bash
run: |
"$BIN" --version
"$BIN" --generate complete-bash > "$ARCHIVE/complete/rg.bash"
"$BIN" --generate complete-fish > "$ARCHIVE/complete/rg.fish"
"$BIN" --generate complete-powershell > "$ARCHIVE/complete/_rg.ps1"
"$BIN" --generate complete-zsh > "$ARCHIVE/complete/_rg"
"$BIN" --generate man > "$ARCHIVE/doc/rg.1"
"$BIN" --generate complete-bash > "$ARCHIVE/complete/rgs.bash"
"$BIN" --generate complete-fish > "$ARCHIVE/complete/rgs.fish"
"$BIN" --generate complete-powershell > "$ARCHIVE/complete/_rgs.ps1"
"$BIN" --generate complete-zsh > "$ARCHIVE/complete/_rgs"
"$BIN" --generate man > "$ARCHIVE/doc/rgs.1"
- name: Generate man page and completions (emulation)
if: matrix.qemu != ''
@@ -210,37 +230,37 @@ jobs:
run: |
docker run --rm -v \
"$PWD/target:/target:Z" \
"rustembedded/cross:${{ matrix.target }}" \
"ghcr.io/cross-rs/${{ matrix.target }}:main" \
"${{ matrix.qemu }}" "/$BIN" --version
docker run --rm -v \
"$PWD/target:/target:Z" \
"rustembedded/cross:${{ matrix.target }}" \
"ghcr.io/cross-rs/${{ matrix.target }}:main" \
"${{ matrix.qemu }}" "/$BIN" \
--generate complete-bash > "$ARCHIVE/complete/rg.bash"
--generate complete-bash > "$ARCHIVE/complete/rgs.bash"
docker run --rm -v \
"$PWD/target:/target:Z" \
"rustembedded/cross:${{ matrix.target }}" \
"ghcr.io/cross-rs/${{ matrix.target }}:main" \
"${{ matrix.qemu }}" "/$BIN" \
--generate complete-fish > "$ARCHIVE/complete/rg.fish"
--generate complete-fish > "$ARCHIVE/complete/rgs.fish"
docker run --rm -v \
"$PWD/target:/target:Z" \
"rustembedded/cross:${{ matrix.target }}" \
"ghcr.io/cross-rs/${{ matrix.target }}:main" \
"${{ matrix.qemu }}" "/$BIN" \
--generate complete-powershell > "$ARCHIVE/complete/_rg.ps1"
--generate complete-powershell > "$ARCHIVE/complete/_rgs.ps1"
docker run --rm -v \
"$PWD/target:/target:Z" \
"rustembedded/cross:${{ matrix.target }}" \
"ghcr.io/cross-rs/${{ matrix.target }}:main" \
"${{ matrix.qemu }}" "/$BIN" \
--generate complete-zsh > "$ARCHIVE/complete/_rg"
--generate complete-zsh > "$ARCHIVE/complete/_rgs"
docker run --rm -v \
"$PWD/target:/target:Z" \
"rustembedded/cross:${{ matrix.target }}" \
"ghcr.io/cross-rs/${{ matrix.target }}:main" \
"${{ matrix.qemu }}" "/$BIN" \
--generate man > "$ARCHIVE/doc/rg.1"
--generate man > "$ARCHIVE/doc/rgs.1"
- name: Build archive (Windows)
shell: bash
if: matrix.os == 'windows-latest'
if: startsWith(matrix.os, 'windows')
run: |
7z a "$ARCHIVE.zip" "$ARCHIVE"
certutil -hashfile "$ARCHIVE.zip" SHA256 > "$ARCHIVE.zip.sha256"
@@ -249,7 +269,7 @@ jobs:
- name: Build archive (Unix)
shell: bash
if: matrix.os != 'windows-latest'
if: ${{ !startsWith(matrix.os, 'windows') }}
run: |
tar czf "$ARCHIVE.tar.gz" "$ARCHIVE"
shasum -a 256 "$ARCHIVE.tar.gz" > "$ARCHIVE.tar.gz.sha256"
@@ -305,7 +325,7 @@ jobs:
shell: bash
run: |
cargo build --target ${{ env.TARGET }}
bin="target/${{ env.TARGET }}/debug/rg"
bin="target/${{ env.TARGET }}/debug/rgs"
echo "BIN=$bin" >> $GITHUB_ENV
- name: Create deployment directory
@@ -318,14 +338,14 @@ jobs:
- name: Generate man page
shell: bash
run: |
"$BIN" --generate man > "$DEPLOY_DIR/rg.1"
"$BIN" --generate man > "$DEPLOY_DIR/rgs.1"
- name: Generate shell completions
shell: bash
run: |
"$BIN" --generate complete-bash > "$DEPLOY_DIR/rg.bash"
"$BIN" --generate complete-fish > "$DEPLOY_DIR/rg.fish"
"$BIN" --generate complete-zsh > "$DEPLOY_DIR/_rg"
"$BIN" --generate complete-bash > "$DEPLOY_DIR/rgs.bash"
"$BIN" --generate complete-fish > "$DEPLOY_DIR/rgs.fish"
"$BIN" --generate complete-zsh > "$DEPLOY_DIR/_rgs"
- name: Build release binary
shell: bash
@@ -333,7 +353,7 @@ jobs:
cargo deb --profile deb --target ${{ env.TARGET }}
version="${{ needs.create-release.outputs.version }}"
echo "DEB_DIR=target/${{ env.TARGET }}/debian" >> $GITHUB_ENV
echo "DEB_NAME=ripgrep_$version-1_amd64.deb" >> $GITHUB_ENV
echo "DEB_NAME=rgs_$version-1_amd64.deb" >> $GITHUB_ENV
- name: Create sha256 sum of deb file
shell: bash

View File

@@ -1,3 +1,191 @@
TBD
===
Unreleased changes. Release notes have not yet been written.
Bug fixes:
* [BUG #3212](https://github.com/BurntSushi/ripgrep/pull/3212):
Don't check for the existence of `.jj` when `--no-ignore` is used.
15.1.0
======
This is a small release that fixes a bug with how ripgrep handles line
buffering. This might manifest as ripgrep printing output later than you
expect or not working correctly with `tail -f` (even if you're using the
`--line-buffered` flag).
Bug fixes:
* [BUG #3194](https://github.com/BurntSushi/ripgrep/issues/3194):
Fix a regression with `--line-buffered` introduced in ripgrep 15.0.0.
Feature enhancements:
* [FEATURE #3192](https://github.com/BurntSushi/ripgrep/pull/3192):
Add hyperlink alias for Cursor.
15.0.0 (2025-10-15)
===================
ripgrep 15 is a new major version release of ripgrep that mostly has bug fixes,
some minor performance improvements and minor new features. Here are some
highlights:
* Several bugs around gitignore matching have been fixed. This includes
a commonly reported bug related to applying gitignore rules from parent
directories.
* A memory usage regression when handling very large gitignore files has been
fixed.
* `rg -vf file`, where `file` is empty, now matches everything.
* The `-r/--replace` flag now works with `--json`.
* A subset of Jujutsu (`jj`) repositories are now treated as if they were git
repositories. That is, ripgrep will respect `jj`'s gitignores.
* Globs can now use nested curly braces.
Platform support:
* `aarch64` for Windows now has release artifacts.
* `powerpc64` no longer has release artifacts generated for it. The CI
release workflow stopped working, and I didn't deem it worth my time to
debug it. If someone wants this and can test it, I'd be happy to add it
back.
* ripgrep binaries are now compiled with full LTO enabled. You may notice
small performance improvements from this and a modest decrease in binary
size.
Performance improvements:
* [PERF #2111](https://github.com/BurntSushi/ripgrep/issues/2111):
Don't resolve helper binaries on Windows when `-z/--search-zip` isn't used.
* [PERF #2865](https://github.com/BurntSushi/ripgrep/pull/2865):
Avoid using path canonicalization on Windows when emitting hyperlinks.
Bug fixes:
* [BUG #829](https://github.com/BurntSushi/ripgrep/issues/829),
[BUG #2731](https://github.com/BurntSushi/ripgrep/issues/2731),
[BUG #2747](https://github.com/BurntSushi/ripgrep/issues/2747),
[BUG #2770](https://github.com/BurntSushi/ripgrep/issues/2770),
[BUG #2778](https://github.com/BurntSushi/ripgrep/issues/2778),
[BUG #2836](https://github.com/BurntSushi/ripgrep/issues/2836),
[BUG #2933](https://github.com/BurntSushi/ripgrep/pull/2933),
[BUG #3067](https://github.com/BurntSushi/ripgrep/pull/3067):
Fix bug related to gitignores from parent directories.
* [BUG #1332](https://github.com/BurntSushi/ripgrep/issues/1332),
[BUG #3001](https://github.com/BurntSushi/ripgrep/issues/3001):
Make `rg -vf file` where `file` is empty match everything.
* [BUG #2177](https://github.com/BurntSushi/ripgrep/issues/2177):
Ignore a UTF-8 BOM marker at the start of `.gitignore` (and similar files).
* [BUG #2750](https://github.com/BurntSushi/ripgrep/issues/2750):
Fix memory usage regression for some truly large gitignore files.
* [BUG #2944](https://github.com/BurntSushi/ripgrep/pull/2944):
Fix a bug where the "bytes searched" in `--stats` output could be incorrect.
* [BUG #2990](https://github.com/BurntSushi/ripgrep/issues/2990):
Fix a bug where ripgrep would mishandle globs that ended with a `.`.
* [BUG #2094](https://github.com/BurntSushi/ripgrep/issues/2094),
[BUG #3076](https://github.com/BurntSushi/ripgrep/issues/3076):
Fix bug with `-m/--max-count` and `-U/--multiline` showing too many matches.
* [BUG #3100](https://github.com/BurntSushi/ripgrep/pull/3100):
Preserve line terminators when using `-r/--replace` flag.
* [BUG #3108](https://github.com/BurntSushi/ripgrep/issues/3108):
Fix a bug where `-q --files-without-match` inverted the exit code.
* [BUG #3131](https://github.com/BurntSushi/ripgrep/issues/3131):
Document inconsistency between `-c/--count` and `--files-with-matches`.
* [BUG #3135](https://github.com/BurntSushi/ripgrep/issues/3135):
Fix rare panic for some classes of large regexes on large haystacks.
* [BUG #3140](https://github.com/BurntSushi/ripgrep/issues/3140):
Ensure hyphens in flag names are escaped in the roff text for the man page.
* [BUG #3155](https://github.com/BurntSushi/ripgrep/issues/3155):
Statically compile PCRE2 into macOS release artifacts on `aarch64`.
* [BUG #3173](https://github.com/BurntSushi/ripgrep/issues/3173):
Fix ancestor ignore filter bug when searching whitelisted hidden files.
* [BUG #3178](https://github.com/BurntSushi/ripgrep/discussions/3178):
Fix bug causing incorrect summary statistics with `--json` flag.
* [BUG #3179](https://github.com/BurntSushi/ripgrep/issues/3179):
Fix gitignore bug when searching absolute paths with global gitignores.
* [BUG #3180](https://github.com/BurntSushi/ripgrep/issues/3180):
Fix a panicking bug when using `-U/--multiline` and `-r/--replace`.
Feature enhancements:
* Many enhancements to the default set of file types available for filtering.
* [FEATURE #1872](https://github.com/BurntSushi/ripgrep/issues/1872):
Make `-r/--replace` work with `--json`.
* [FEATURE #2708](https://github.com/BurntSushi/ripgrep/pull/2708):
Completions for the fish shell take ripgrep's config file into account.
* [FEATURE #2841](https://github.com/BurntSushi/ripgrep/pull/2841):
Add `italic` to the list of available style attributes in `--color`.
* [FEATURE #2842](https://github.com/BurntSushi/ripgrep/pull/2842):
Directories containing `.jj` are now treated as git repositories.
* [FEATURE #2849](https://github.com/BurntSushi/ripgrep/pull/2849):
When using multithreading, schedule files to search in order given on CLI.
* [FEATURE #2943](https://github.com/BurntSushi/ripgrep/issues/2943):
Add `aarch64` release artifacts for Windows.
* [FEATURE #3024](https://github.com/BurntSushi/ripgrep/issues/3024):
Add `highlight` color type, for styling non-matching text in a matching line.
* [FEATURE #3048](https://github.com/BurntSushi/ripgrep/pull/3048):
Globs in ripgrep (and the `globset` crate) now support nested alternates.
* [FEATURE #3096](https://github.com/BurntSushi/ripgrep/pull/3096):
Improve completions for `--hyperlink-format` in bash and fish.
* [FEATURE #3102](https://github.com/BurntSushi/ripgrep/pull/3102):
Improve completions for `--hyperlink-format` in zsh.
14.1.1 (2024-09-08)
===================
This is a minor release with a bug fix for a matching bug. In particular, a bug
was found that could cause ripgrep to ignore lines that should match. That is,
false negatives. It is difficult to characterize the specific set of regexes
in which this occurs as it requires multiple different optimization strategies
to collide and produce an incorrect result. But as one reported example, in
ripgrep, the regex `(?i:e.x|ex)` does not match `e-x` when it should. (This
bug is a result of an inner literal optimization performed in the `grep-regex`
crate and not in the `regex` crate.)
Bug fixes:
* [BUG #2884](https://github.com/BurntSushi/ripgrep/issues/2884):
Fix bug where ripgrep could miss some matches that it should report.
Miscellaneous:
* [MISC #2748](https://github.com/BurntSushi/ripgrep/issues/2748):
Remove ripgrep's `simd-accel` feature because it was frequently broken.
14.1.0 (2024-01-06)
===================
This is a minor release with a few small new features and bug fixes. This
release contains a bug fix for unbounded memory growth while walking a
directory tree. This release also includes improvements to the completions for
the `fish` shell, and release binaries for several additional ARM targets.
Bug fixes:
* [BUG #2664](https://github.com/BurntSushi/ripgrep/issues/2690):
Fix unbounded memory growth in the `ignore` crate.
Feature enhancements:
* Added or improved file type filtering for Lean and Meson.
* [FEATURE #2684](https://github.com/BurntSushi/ripgrep/issues/2684):
Improve completions for the `fish` shell.
* [FEATURE #2702](https://github.com/BurntSushi/ripgrep/pull/2702):
Add release binaries for `armv7-unknown-linux-gnueabihf`,
`armv7-unknown-linux-musleabihf` and `armv7-unknown-linux-musleabi`.
14.0.3 (2023-11-28)
===================
This is a patch release with a bug fix for the `--sortr` flag.
Bug fixes:
* [BUG #2664](https://github.com/BurntSushi/ripgrep/issues/2664):
Fix `--sortr=path`. I left a `todo!()` in the source. Oof.
14.0.2 (2023-11-27)
===================
This is a patch release with a few small bug fixes.

360
Cargo.lock generated
View File

@@ -1,33 +1,36 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
version = 4
[[package]]
name = "aho-corasick"
version = "1.1.2"
version = "1.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0"
checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
dependencies = [
"memchr",
]
[[package]]
name = "anyhow"
version = "1.0.75"
version = "1.0.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a4668cab20f66d8d020e1fbc0ebe47217433c1b6c8f2040faf858554e394ace6"
checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61"
[[package]]
name = "autocfg"
version = "1.1.0"
name = "arbitrary"
version = "1.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
checksum = "c3d036a3c4ab069c7b410a2ce876bd74808d2d0888a82667669f8e783a898bf1"
dependencies = [
"derive_arbitrary",
]
[[package]]
name = "bstr"
version = "1.8.0"
version = "1.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "542f33a8835a0884b006a0c3df3dadd99c0c3f296ed26c2fdc8028e01ad6230c"
checksum = "234113d19d0d7d613b40e86fb654acf958910802bcceab913a4f9e7cda03b1a4"
dependencies = [
"memchr",
"regex-automata",
@@ -36,71 +39,74 @@ dependencies = [
[[package]]
name = "cc"
version = "1.0.83"
version = "1.2.41"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0"
checksum = "ac9fe6cdbb24b6ade63616c0a0688e45bb56732262c158df3c0c4bea4ca47cb7"
dependencies = [
"find-msvc-tools",
"jobserver",
"libc",
"shlex",
]
[[package]]
name = "cfg-if"
version = "1.0.0"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
[[package]]
name = "crossbeam-channel"
version = "0.5.8"
version = "0.5.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a33c2bf77f2df06183c3aa30d1e96c0695a313d4f9c453cc3762a6db39f99200"
checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2"
dependencies = [
"cfg-if",
"crossbeam-utils",
]
[[package]]
name = "crossbeam-deque"
version = "0.8.3"
version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef"
checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
dependencies = [
"cfg-if",
"crossbeam-epoch",
"crossbeam-utils",
]
[[package]]
name = "crossbeam-epoch"
version = "0.9.15"
version = "0.9.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ae211234986c545741a7dc064309f67ee1e5ad243d0e48335adc0484d960bcc7"
checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
dependencies = [
"autocfg",
"cfg-if",
"crossbeam-utils",
"memoffset",
"scopeguard",
]
[[package]]
name = "crossbeam-utils"
version = "0.8.16"
version = "0.8.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a22b2d63d4d1dc0b7f1b6b2747dd0088008a9be28b6ddf0b1e7d335e3037294"
checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
[[package]]
name = "derive_arbitrary"
version = "1.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e567bd82dcff979e4b03460c307b3cdc9e96fde3d73bed1496d2bc75d9dd62a"
dependencies = [
"cfg-if",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "encoding_rs"
version = "0.8.33"
version = "0.8.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7268b386296a025e474d5140678f75d6de9493ae55a5d709eeb9dd08149945e1"
checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3"
dependencies = [
"cfg-if",
"packed_simd",
]
[[package]]
@@ -113,16 +119,35 @@ dependencies = [
]
[[package]]
name = "glob"
version = "0.3.1"
name = "find-msvc-tools"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
checksum = "52051878f80a721bb68ebfbc930e07b65ba72f2da88968ea5c06fd6ca3d3a127"
[[package]]
name = "getrandom"
version = "0.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd"
dependencies = [
"cfg-if",
"libc",
"r-efi",
"wasip2",
]
[[package]]
name = "glob"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280"
[[package]]
name = "globset"
version = "0.4.14"
version = "0.4.18"
dependencies = [
"aho-corasick",
"arbitrary",
"bstr",
"glob",
"log",
@@ -134,7 +159,7 @@ dependencies = [
[[package]]
name = "grep"
version = "0.3.0"
version = "0.4.1"
dependencies = [
"grep-cli",
"grep-matcher",
@@ -148,7 +173,7 @@ dependencies = [
[[package]]
name = "grep-cli"
version = "0.1.10"
version = "0.1.12"
dependencies = [
"bstr",
"globset",
@@ -160,7 +185,7 @@ dependencies = [
[[package]]
name = "grep-matcher"
version = "0.1.7"
version = "0.1.8"
dependencies = [
"memchr",
"regex",
@@ -168,7 +193,7 @@ dependencies = [
[[package]]
name = "grep-pcre2"
version = "0.1.7"
version = "0.1.9"
dependencies = [
"grep-matcher",
"log",
@@ -177,7 +202,7 @@ dependencies = [
[[package]]
name = "grep-printer"
version = "0.2.0"
version = "0.3.1"
dependencies = [
"bstr",
"grep-matcher",
@@ -191,7 +216,7 @@ dependencies = [
[[package]]
name = "grep-regex"
version = "0.1.12"
version = "0.1.14"
dependencies = [
"bstr",
"grep-matcher",
@@ -202,7 +227,7 @@ dependencies = [
[[package]]
name = "grep-searcher"
version = "0.1.13"
version = "0.1.16"
dependencies = [
"bstr",
"encoding_rs",
@@ -217,7 +242,7 @@ dependencies = [
[[package]]
name = "ignore"
version = "0.4.21"
version = "0.4.25"
dependencies = [
"bstr",
"crossbeam-channel",
@@ -233,112 +258,58 @@ dependencies = [
[[package]]
name = "itoa"
version = "1.0.9"
version = "1.0.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38"
[[package]]
name = "jemalloc-sys"
version = "0.5.4+5.3.0-patched"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac6c1946e1cea1788cbfde01c993b52a10e2da07f4bac608228d1bed20bfebf2"
dependencies = [
"cc",
"libc",
]
[[package]]
name = "jemallocator"
version = "0.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a0de374a9f8e63150e6f5e8a60cc14c668226d7a347d8aee1a45766e3c4dd3bc"
dependencies = [
"jemalloc-sys",
"libc",
]
checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
[[package]]
name = "jobserver"
version = "0.1.27"
version = "0.1.34"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8c37f63953c4c63420ed5fd3d6d398c719489b9f872b9fa683262f8edd363c7d"
checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33"
dependencies = [
"getrandom",
"libc",
]
[[package]]
name = "lexopt"
version = "0.3.0"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baff4b617f7df3d896f97fe922b64817f6cd9a756bb81d40f8883f2f66dcb401"
checksum = "9fa0e2a1fcbe2f6be6c42e342259976206b383122fc152e872795338b5a3f3a7"
[[package]]
name = "libc"
version = "0.2.150"
version = "0.2.177"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "89d92a4743f9a61002fae18374ed11e7973f530cb3a3255fb354818118b2203c"
[[package]]
name = "libm"
version = "0.2.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058"
checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976"
[[package]]
name = "log"
version = "0.4.20"
version = "0.4.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f"
checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432"
[[package]]
name = "memchr"
version = "2.6.4"
version = "2.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167"
checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273"
[[package]]
name = "memmap2"
version = "0.9.0"
version = "0.9.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "deaba38d7abf1d4cca21cc89e932e542ba2b9258664d2a9ef0e61512039c9375"
checksum = "744133e4a0e0a658e1374cf3bf8e415c4052a15a111acd372764c55b4177d490"
dependencies = [
"libc",
]
[[package]]
name = "memoffset"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a634b1c61a95585bd15607c6ab0c4e5b226e695ff2800ba0cdccddf208c406c"
dependencies = [
"autocfg",
]
[[package]]
name = "num-traits"
version = "0.2.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c"
dependencies = [
"autocfg",
"libm",
]
[[package]]
name = "packed_simd"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1f9f08af0c877571712e2e3e686ad79efad9657dbf0f7c3c8ba943ff6c38932d"
dependencies = [
"cfg-if",
"num-traits",
]
[[package]]
name = "pcre2"
version = "0.2.6"
version = "0.2.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4c9d53a8ea5fc3d3568d3de4bebc12606fd0eb8234c602576f1f1ee4880488a7"
checksum = "9e970b0fcce0c7ee6ef662744ff711f21ccd6f11b7cf03cd187a80e89797fc67"
dependencies = [
"libc",
"log",
@@ -347,9 +318,9 @@ dependencies = [
[[package]]
name = "pcre2-sys"
version = "0.2.7"
version = "0.2.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f8f5556f23cf2c0b481949fdfc19a7cd9b27ddcb00ef3477b0f4935cbdaedf2"
checksum = "18b9073c1a2549bd409bf4a32c94d903bb1a09bf845bc306ae148897fa0760a4"
dependencies = [
"cc",
"libc",
@@ -358,33 +329,39 @@ dependencies = [
[[package]]
name = "pkg-config"
version = "0.3.27"
version = "0.3.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964"
checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
[[package]]
name = "proc-macro2"
version = "1.0.70"
version = "1.0.101"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39278fbbf5fb4f646ce651690877f89d1c5811a3d4acb27700c1cb3cdb78fd3b"
checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.33"
version = "1.0.41"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae"
checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1"
dependencies = [
"proc-macro2",
]
[[package]]
name = "regex"
version = "1.10.2"
name = "r-efi"
version = "5.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "380b951a9c5e80ddfd6136919eef32310721aa4aacd4889a8d39124b026ab343"
checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
[[package]]
name = "regex"
version = "1.12.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4"
dependencies = [
"aho-corasick",
"memchr",
@@ -394,9 +371,9 @@ dependencies = [
[[package]]
name = "regex-automata"
version = "0.4.3"
version = "0.4.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f"
checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c"
dependencies = [
"aho-corasick",
"memchr",
@@ -405,19 +382,18 @@ dependencies = [
[[package]]
name = "regex-syntax"
version = "0.8.2"
version = "0.8.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f"
checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58"
[[package]]
name = "ripgrep"
version = "14.0.1"
version = "15.1.0"
dependencies = [
"anyhow",
"bstr",
"grep",
"ignore",
"jemallocator",
"lexopt",
"log",
"serde",
@@ -425,14 +401,15 @@ dependencies = [
"serde_json",
"termcolor",
"textwrap",
"tikv-jemallocator",
"walkdir",
]
[[package]]
name = "ryu"
version = "1.0.15"
version = "1.0.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741"
checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f"
[[package]]
name = "same-file"
@@ -444,25 +421,28 @@ dependencies = [
]
[[package]]
name = "scopeguard"
version = "1.2.0"
name = "serde"
version = "1.0.228"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
dependencies = [
"serde_core",
]
[[package]]
name = "serde"
version = "1.0.193"
name = "serde_core"
version = "1.0.228"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "25dd9975e68d0cb5aa1120c288333fc98731bd1dd12f561e468ea4728c042b89"
checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.193"
version = "1.0.228"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43576ca501357b9b071ac53cdc7da8ef0cbd9493d8df094cd821777ea6e894d3"
checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
dependencies = [
"proc-macro2",
"quote",
@@ -471,20 +451,28 @@ dependencies = [
[[package]]
name = "serde_json"
version = "1.0.108"
version = "1.0.145"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3d1c7e3eac408d115102c4c24ad393e0821bb3a5df4d506a80f85f7a742a526b"
checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c"
dependencies = [
"itoa",
"memchr",
"ryu",
"serde",
"serde_core",
]
[[package]]
name = "syn"
version = "2.0.39"
name = "shlex"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "23e78b90f2fcf45d3e842032ce32e3f2d1545ba6636271dcbf24fa306d87be7a"
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
[[package]]
name = "syn"
version = "2.0.107"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2a26dbd934e5451d21ef060c018dae56fc073894c5a7896f882928a76e6d081b"
dependencies = [
"proc-macro2",
"quote",
@@ -493,62 +481,90 @@ dependencies = [
[[package]]
name = "termcolor"
version = "1.4.0"
version = "1.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ff1bc3d3f05aff0403e8ac0d92ced918ec05b666a43f83297ccef5bea8a3d449"
checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755"
dependencies = [
"winapi-util",
]
[[package]]
name = "textwrap"
version = "0.16.0"
version = "0.16.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d"
checksum = "c13547615a44dc9c452a8a534638acdf07120d4b6847c8178705da06306a3057"
[[package]]
name = "tikv-jemalloc-sys"
version = "0.6.1+5.3.0-1-ge13ca993e8ccb9ba9847cc330696e02839f328f7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cd8aa5b2ab86a2cefa406d889139c162cbb230092f7d1d7cbc1716405d852a3b"
dependencies = [
"cc",
"libc",
]
[[package]]
name = "tikv-jemallocator"
version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0359b4327f954e0567e69fb191cf1436617748813819c94b8cd4a431422d053a"
dependencies = [
"libc",
"tikv-jemalloc-sys",
]
[[package]]
name = "unicode-ident"
version = "1.0.12"
version = "1.0.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
checksum = "462eeb75aeb73aea900253ce739c8e18a67423fadf006037cd3ff27e82748a06"
[[package]]
name = "walkdir"
version = "2.4.0"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d71d857dc86794ca4c280d616f7da00d2dbfd8cd788846559a6813e6aa4b54ee"
checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b"
dependencies = [
"same-file",
"winapi-util",
]
[[package]]
name = "winapi"
version = "0.3.9"
name = "wasip2"
version = "1.0.1+wasi-0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
"wit-bindgen",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-util"
version = "0.1.6"
version = "0.1.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596"
checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
dependencies = [
"winapi",
"windows-sys",
]
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
name = "windows-link"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
[[package]]
name = "windows-sys"
version = "0.61.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc"
dependencies = [
"windows-link",
]
[[package]]
name = "wit-bindgen"
version = "0.46.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59"

View File

@@ -1,7 +1,10 @@
[package]
name = "ripgrep"
version = "14.0.1" #:version
authors = ["Andrew Gallant <jamslam@gmail.com>"]
name = "rgs"
version = "0.1.0" #:version
authors = [
"Andrew Gallant <jamslam@gmail.com>",
"Peisong Xiao <peisong.xiao.xps@gmail.com>",
]
description = """
ripgrep is a line-oriented search tool that recursively searches the current
directory for a regex pattern while respecting gitignore rules. ripgrep has
@@ -20,16 +23,17 @@ exclude = [
"/pkg/brew",
"/benchsuite/",
"/scripts/",
"/crates/fuzz",
]
build = "build.rs"
autotests = false
edition = "2021"
rust-version = "1.72"
edition = "2024"
rust-version = "1.85"
[[bin]]
bench = false
path = "crates/core/main.rs"
name = "rg"
name = "rgs"
[[test]]
name = "integration"
@@ -51,16 +55,16 @@ members = [
[dependencies]
anyhow = "1.0.75"
bstr = "1.7.0"
grep = { version = "0.3.0", path = "crates/grep" }
ignore = { version = "0.4.21", path = "crates/ignore" }
grep = { version = "0.4.1", path = "crates/grep" }
ignore = { version = "0.4.24", path = "crates/ignore" }
lexopt = "0.3.0"
log = "0.4.5"
serde_json = "1.0.23"
termcolor = "1.1.0"
textwrap = { version = "0.16.0", default-features = false }
[target.'cfg(all(target_env = "musl", target_pointer_width = "64"))'.dependencies.jemallocator]
version = "0.5.0"
[target.'cfg(all(target_env = "musl", target_pointer_width = "64"))'.dependencies.tikv-jemallocator]
version = "0.6.0"
[dev-dependencies]
serde = "1.0.77"
@@ -68,7 +72,6 @@ serde_derive = "1.0.77"
walkdir = "2"
[features]
simd-accel = ["grep/simd-accel"]
pcre2 = ["grep/pcre2"]
[profile.release]
@@ -86,36 +89,33 @@ panic = "abort"
incremental = false
codegen-units = 1
# This is the main way to strip binaries in the deb package created by
# 'cargo deb'. For other release binaries, we (currently) call 'strip'
# explicitly in the release process.
[profile.deb]
inherits = "release"
debug = false
inherits = "release-lto"
[package.metadata.deb]
features = ["pcre2"]
section = "utils"
assets = [
["target/release/rg", "usr/bin/", "755"],
["target/release/rgs", "usr/bin/", "755"],
["COPYING", "usr/share/doc/ripgrep/", "644"],
["LICENSE-MIT", "usr/share/doc/ripgrep/", "644"],
["UNLICENSE", "usr/share/doc/ripgrep/", "644"],
["CHANGELOG.md", "usr/share/doc/ripgrep/CHANGELOG", "644"],
["README.md", "usr/share/doc/ripgrep/README", "644"],
["README-ripgrep.md", "usr/share/doc/ripgrep/README-ripgrep", "644"],
["FAQ.md", "usr/share/doc/ripgrep/FAQ", "644"],
# The man page is automatically generated by ripgrep's build process, so
# this file isn't actually committed. Instead, to create a dpkg, either
# create a deployment/deb directory and copy the man page to it, or use the
# 'ci/build-deb' script.
["deployment/deb/rg.1", "usr/share/man/man1/rg.1", "644"],
["deployment/deb/rgs.1", "usr/share/man/man1/rgs.1", "644"],
# Similarly for shell completions.
["deployment/deb/rg.bash", "usr/share/bash-completion/completions/rg", "644"],
["deployment/deb/rg.fish", "usr/share/fish/vendor_completions.d/rg.fish", "644"],
["deployment/deb/_rg", "usr/share/zsh/vendor-completions/", "644"],
["deployment/deb/rgs.bash", "usr/share/bash-completion/completions/rgs", "644"],
["deployment/deb/rgs.fish", "usr/share/fish/vendor_completions.d/rgs.fish", "644"],
["deployment/deb/_rgs", "usr/share/zsh/vendor-completions/", "644"],
]
extended-description = """\
ripgrep (rg) recursively searches your current directory for a regex pattern.
rgs recursively searches your current directory for a regex pattern.
By default, ripgrep will respect your .gitignore and automatically skip hidden
files/directories and binary files.
"""

View File

@@ -1,14 +0,0 @@
[target.x86_64-unknown-linux-musl]
image = "burntsushi/cross:x86_64-unknown-linux-musl"
[target.i686-unknown-linux-gnu]
image = "burntsushi/cross:i686-unknown-linux-gnu"
[target.aarch64-unknown-linux-gnu]
image = "burntsushi/cross:aarch64-unknown-linux-gnu"
[target.powerpc64-unknown-linux-gnu]
image = "burntsushi/cross:powerpc64-unknown-linux-gnu"
[target.s390x-unknown-linux-gnu]
image = "burntsushi/cross:s390x-unknown-linux-gnu"

33
FAQ.md
View File

@@ -94,7 +94,7 @@ Does ripgrep have support for shell auto-completion?
Yes! If you installed ripgrep through a package manager on a Unix system, then
the shell completion files included in the release archive should have been
installed for you automatically. If not, you can generate completes using
installed for you automatically. If not, you can generate completions using
ripgrep's command line interface.
For **bash**:
@@ -113,14 +113,31 @@ $ mkdir -p "$dir"
$ rg --generate complete-fish > "$dir/rg.fish"
```
For **zsh**:
For **zsh**, the recommended approach is:
```
```zsh
$ dir="$HOME/.zsh-complete"
$ mkdir -p "$dir"
$ rg --generate complete-zsh > "$dir/_rg"
```
And then add `$HOME/.zsh-complete` to your `fpath` in, e.g., your
`$HOME/.zshrc` file:
```zsh
fpath=($HOME/.zsh-complete $fpath)
```
Or if you'd prefer to load and generate completions at the same time, you can
add the following to your `$HOME/.zshrc` file:
```zsh
$ source <(rg --generate complete-zsh)
```
Note though that while this approach is easier to setup, is generally slower
than the previous method, and will add more time to loading your shell prompt.
For **PowerShell**, create the completions:
```
@@ -248,8 +265,8 @@ The `--colors` flag is a bit more complicated. The general format is:
to bold the output or not).
* `{value}` is determined by the value of `{attribute}`. If
`{attribute}` is `style`, then `{value}` should be one of `nobold`,
`bold`, `nointense`, `intense`, `nounderline` or `underline`. If
`{attribute}` is `fg` or `bg`, then `{value}` should be a color.
`bold`, `nointense`, `intense`, `nounderline`, `underline`, `noitalic` or
`italic`. If `{attribute}` is `fg` or `bg`, then `{value}` should be a color.
A color is specified by either one of eight of English names, a single 256-bit
number or an RGB triple (with over 16 million possible values, or "true
@@ -268,8 +285,8 @@ As a special case, `--colors '{type}:none'` will clear all colors and styles
associated with `{type}`, which lets you start with a clean slate (instead of
building on top of ripgrep's default color settings).
Here's an example that makes highlights the matches with a nice blue background
with bolded white text:
Here's an example that highlights the matches with a nice blue background with
bolded white text:
```
$ rg somepattern \
@@ -1038,7 +1055,7 @@ How can I donate to ripgrep or its maintainers?
I welcome [sponsorship](https://github.com/sponsors/BurntSushi/).
Or if you'd prefer, donating to a charitably organization that you like would
Or if you'd prefer, donating to a charitable organization that you like would
also be most welcome. My favorites are:
* [The Internet Archive](https://archive.org/donate/)

View File

@@ -2,7 +2,7 @@
This guide is intended to give an elementary description of ripgrep and an
overview of its capabilities. This guide assumes that ripgrep is
[installed](README.md#installation)
[installed](README-ripgrep.md#installation)
and that readers have passing familiarity with using command line tools. This
also assumes a Unix-like system, although most commands are probably easily
translatable to any command line shell environment.
@@ -42,17 +42,17 @@ $ unzip 0.7.1.zip
$ cd ripgrep-0.7.1
$ ls
benchsuite grep tests Cargo.toml LICENSE-MIT
ci ignore wincolor CHANGELOG.md README.md
ci ignore wincolor CHANGELOG.md README-ripgrep.md
complete pkg appveyor.yml compile snapcraft.yaml
doc src build.rs COPYING UNLICENSE
globset termcolor Cargo.lock HomebrewFormula
```
Let's try our first search by looking for all occurrences of the word `fast`
in `README.md`:
in `README-ripgrep.md`:
```
$ rg fast README.md
$ rg fast README-ripgrep.md
75: faster than both. (N.B. It is not, strictly speaking, a "drop-in" replacement
88: color and full Unicode support. Unlike GNU grep, `ripgrep` stays fast while
119:### Is it really faster than everything else?
@@ -64,7 +64,7 @@ $ rg fast README.md
search any files, then re-run ripgrep with the `--debug` flag. One likely cause
of this is that you have a `*` rule in a `$HOME/.gitignore` file.)
So what happened here? ripgrep read the contents of `README.md`, and for each
So what happened here? ripgrep read the contents of `README-ripgrep.md`, and for each
line that contained `fast`, ripgrep printed it to your terminal. ripgrep also
included the line number for each line by default. If your terminal supports
colors, then your output might actually look something like this screenshot:
@@ -79,7 +79,7 @@ what if we wanted to find all lines have a word that contains `fast` followed
by some number of other letters?
```
$ rg 'fast\w+' README.md
$ rg 'fast\w+' README-ripgrep.md
75: faster than both. (N.B. It is not, strictly speaking, a "drop-in" replacement
119:### Is it really faster than everything else?
```
@@ -95,7 +95,7 @@ like `faster` will. `faste` would also match!
Here's a different variation on this same theme:
```
$ rg 'fast\w*' README.md
$ rg 'fast\w*' README-ripgrep.md
75: faster than both. (N.B. It is not, strictly speaking, a "drop-in" replacement
88: color and full Unicode support. Unlike GNU grep, `ripgrep` stays fast while
119:### Is it really faster than everything else?
@@ -444,7 +444,7 @@ text with some other text. This is easiest to explain with an example. Remember
when we searched for the word `fast` in ripgrep's README?
```
$ rg fast README.md
$ rg fast README-ripgrep.md
75: faster than both. (N.B. It is not, strictly speaking, a "drop-in" replacement
88: color and full Unicode support. Unlike GNU grep, `ripgrep` stays fast while
119:### Is it really faster than everything else?
@@ -456,7 +456,7 @@ What if we wanted to *replace* all occurrences of `fast` with `FAST`? That's
easy with ripgrep's `--replace` flag:
```
$ rg fast README.md --replace FAST
$ rg fast README-ripgrep.md --replace FAST
75: FASTer than both. (N.B. It is not, strictly speaking, a "drop-in" replacement
88: color and full Unicode support. Unlike GNU grep, `ripgrep` stays FAST while
119:### Is it really FASTer than everything else?
@@ -467,7 +467,7 @@ $ rg fast README.md --replace FAST
or, more succinctly,
```
$ rg fast README.md -r FAST
$ rg fast README-ripgrep.md -r FAST
[snip]
```
@@ -476,7 +476,7 @@ in the output. If you instead wanted to replace an entire line of text, then
you need to include the entire line in your match. For example:
```
$ rg '^.*fast.*$' README.md -r FAST
$ rg '^.*fast.*$' README-ripgrep.md -r FAST
75:FAST
88:FAST
119:FAST
@@ -488,7 +488,7 @@ Alternatively, you can combine the `--only-matching` (or `-o` for short) with
the `--replace` flag to achieve the same result:
```
$ rg fast README.md --only-matching --replace FAST
$ rg fast README-ripgrep.md --only-matching --replace FAST
75:FAST
88:FAST
119:FAST
@@ -499,7 +499,7 @@ $ rg fast README.md --only-matching --replace FAST
or, more succinctly,
```
$ rg fast README.md -or FAST
$ rg fast README-ripgrep.md -or FAST
[snip]
```
@@ -512,7 +512,7 @@ group" (indicated by parentheses) so that we can reference it later in our
replacement string. For example:
```
$ rg 'fast\s+(\w+)' README.md -r 'fast-$1'
$ rg 'fast\s+(\w+)' README-ripgrep.md -r 'fast-$1'
88: color and full Unicode support. Unlike GNU grep, `ripgrep` stays fast-while
124:Summarizing, `ripgrep` is fast-because:
```
@@ -528,7 +528,7 @@ using the indices. For example, the following command is equivalent to the
above command:
```
$ rg 'fast\s+(?P<word>\w+)' README.md -r 'fast-$word'
$ rg 'fast\s+(?P<word>\w+)' README-ripgrep.md -r 'fast-$word'
88: color and full Unicode support. Unlike GNU grep, `ripgrep` stays fast-while
124:Summarizing, `ripgrep` is fast-because:
```

541
README-ripgrep.md Normal file
View File

@@ -0,0 +1,541 @@
ripgrep (rg)
------------
ripgrep is a line-oriented search tool that recursively searches the current
directory for a regex pattern. By default, ripgrep will respect gitignore rules
and automatically skip hidden files/directories and binary files. (To disable
all automatic filtering by default, use `rg -uuu`.) ripgrep has first class
support on Windows, macOS and Linux, with binary downloads available for [every
release](https://github.com/BurntSushi/ripgrep/releases). ripgrep is similar to
other popular search tools like The Silver Searcher, ack and grep.
[![Build status](https://github.com/BurntSushi/ripgrep/workflows/ci/badge.svg)](https://github.com/BurntSushi/ripgrep/actions)
[![Crates.io](https://img.shields.io/crates/v/ripgrep.svg)](https://crates.io/crates/ripgrep)
[![Packaging status](https://repology.org/badge/tiny-repos/ripgrep.svg)](https://repology.org/project/ripgrep/badges)
Dual-licensed under MIT or the [UNLICENSE](https://unlicense.org).
### CHANGELOG
Please see the [CHANGELOG](CHANGELOG.md) for a release history.
### Documentation quick links
* [Installation](#installation)
* [User Guide](GUIDE.md)
* [Frequently Asked Questions](FAQ.md)
* [Regex syntax](https://docs.rs/regex/1/regex/#syntax)
* [Configuration files](GUIDE.md#configuration-file)
* [Shell completions](FAQ.md#complete)
* [Building](#building)
* [Translations](#translations)
### Screenshot of search results
[![A screenshot of a sample search with ripgrep](https://burntsushi.net/stuff/ripgrep1.png)](https://burntsushi.net/stuff/ripgrep1.png)
### Quick examples comparing tools
This example searches the entire
[Linux kernel source tree](https://github.com/BurntSushi/linux)
(after running `make defconfig && make -j8`) for `[A-Z]+_SUSPEND`, where
all matches must be words. Timings were collected on a system with an Intel
i9-12900K 5.2 GHz.
Please remember that a single benchmark is never enough! See my
[blog post on ripgrep](https://blog.burntsushi.net/ripgrep/)
for a very detailed comparison with more benchmarks and analysis.
| Tool | Command | Line count | Time |
| ---- | ------- | ---------- | ---- |
| ripgrep (Unicode) | `rg -n -w '[A-Z]+_SUSPEND'` | 536 | **0.082s** (1.00x) |
| [hypergrep](https://github.com/p-ranav/hypergrep) | `hgrep -n -w '[A-Z]+_SUSPEND'` | 536 | 0.167s (2.04x) |
| [git grep](https://www.kernel.org/pub/software/scm/git/docs/git-grep.html) | `git grep -P -n -w '[A-Z]+_SUSPEND'` | 536 | 0.273s (3.34x) |
| [The Silver Searcher](https://github.com/ggreer/the_silver_searcher) | `ag -w '[A-Z]+_SUSPEND'` | 534 | 0.443s (5.43x) |
| [ugrep](https://github.com/Genivia/ugrep) | `ugrep -r --ignore-files --no-hidden -I -w '[A-Z]+_SUSPEND'` | 536 | 0.639s (7.82x) |
| [git grep](https://www.kernel.org/pub/software/scm/git/docs/git-grep.html) | `LC_ALL=C git grep -E -n -w '[A-Z]+_SUSPEND'` | 536 | 0.727s (8.91x) |
| [git grep (Unicode)](https://www.kernel.org/pub/software/scm/git/docs/git-grep.html) | `LC_ALL=en_US.UTF-8 git grep -E -n -w '[A-Z]+_SUSPEND'` | 536 | 2.670s (32.70x) |
| [ack](https://github.com/beyondgrep/ack3) | `ack -w '[A-Z]+_SUSPEND'` | 2677 | 2.935s (35.94x) |
Here's another benchmark on the same corpus as above that disregards gitignore
files and searches with a whitelist instead. The corpus is the same as in the
previous benchmark, and the flags passed to each command ensure that they are
doing equivalent work:
| Tool | Command | Line count | Time |
| ---- | ------- | ---------- | ---- |
| ripgrep | `rg -uuu -tc -n -w '[A-Z]+_SUSPEND'` | 447 | **0.063s** (1.00x) |
| [ugrep](https://github.com/Genivia/ugrep) | `ugrep -r -n --include='*.c' --include='*.h' -w '[A-Z]+_SUSPEND'` | 447 | 0.607s (9.62x) |
| [GNU grep](https://www.gnu.org/software/grep/) | `grep -E -r -n --include='*.c' --include='*.h' -w '[A-Z]+_SUSPEND'` | 447 | 0.674s (10.69x) |
Now we'll move to searching on single large file. Here is a straight-up
comparison between ripgrep, ugrep and GNU grep on a file cached in memory
(~13GB, [`OpenSubtitles.raw.en.gz`](http://opus.nlpl.eu/download.php?f=OpenSubtitles/v2018/mono/OpenSubtitles.raw.en.gz), decompressed):
| Tool | Command | Line count | Time |
| ---- | ------- | ---------- | ---- |
| ripgrep (Unicode) | `rg -w 'Sherlock [A-Z]\w+'` | 7882 | **1.042s** (1.00x) |
| [ugrep](https://github.com/Genivia/ugrep) | `ugrep -w 'Sherlock [A-Z]\w+'` | 7882 | 1.339s (1.28x) |
| [GNU grep (Unicode)](https://www.gnu.org/software/grep/) | `LC_ALL=en_US.UTF-8 egrep -w 'Sherlock [A-Z]\w+'` | 7882 | 6.577s (6.31x) |
In the above benchmark, passing the `-n` flag (for showing line numbers)
increases the times to `1.664s` for ripgrep and `9.484s` for GNU grep. ugrep
times are unaffected by the presence or absence of `-n`.
Beware of performance cliffs though:
| Tool | Command | Line count | Time |
| ---- | ------- | ---------- | ---- |
| ripgrep (Unicode) | `rg -w '[A-Z]\w+ Sherlock [A-Z]\w+'` | 485 | **1.053s** (1.00x) |
| [GNU grep (Unicode)](https://www.gnu.org/software/grep/) | `LC_ALL=en_US.UTF-8 grep -E -w '[A-Z]\w+ Sherlock [A-Z]\w+'` | 485 | 6.234s (5.92x) |
| [ugrep](https://github.com/Genivia/ugrep) | `ugrep -w '[A-Z]\w+ Sherlock [A-Z]\w+'` | 485 | 28.973s (27.51x) |
And performance can drop precipitously across the board when searching big
files for patterns without any opportunities for literal optimizations:
| Tool | Command | Line count | Time |
| ---- | ------- | ---------- | ---- |
| ripgrep | `rg '[A-Za-z]{30}'` | 6749 | **15.569s** (1.00x) |
| [ugrep](https://github.com/Genivia/ugrep) | `ugrep -E '[A-Za-z]{30}'` | 6749 | 21.857s (1.40x) |
| [GNU grep](https://www.gnu.org/software/grep/) | `LC_ALL=C grep -E '[A-Za-z]{30}'` | 6749 | 32.409s (2.08x) |
| [GNU grep (Unicode)](https://www.gnu.org/software/grep/) | `LC_ALL=en_US.UTF-8 grep -E '[A-Za-z]{30}'` | 6795 | 8m30s (32.74x) |
Finally, high match counts also tend to both tank performance and smooth
out the differences between tools (because performance is dominated by how
quickly one can handle a match and not the algorithm used to detect the match,
generally speaking):
| Tool | Command | Line count | Time |
| ---- | ------- | ---------- | ---- |
| ripgrep | `rg the` | 83499915 | **6.948s** (1.00x) |
| [ugrep](https://github.com/Genivia/ugrep) | `ugrep the` | 83499915 | 11.721s (1.69x) |
| [GNU grep](https://www.gnu.org/software/grep/) | `LC_ALL=C grep the` | 83499915 | 15.217s (2.19x) |
### Why should I use ripgrep?
* It can replace many use cases served by other search tools
because it contains most of their features and is generally faster. (See
[the FAQ](FAQ.md#posix4ever) for more details on whether ripgrep can truly
replace grep.)
* Like other tools specialized to code search, ripgrep defaults to
[recursive search](GUIDE.md#recursive-search) and does [automatic
filtering](GUIDE.md#automatic-filtering). Namely, ripgrep won't search files
ignored by your `.gitignore`/`.ignore`/`.rgignore` files, it won't search
hidden files and it won't search binary files. Automatic filtering can be
disabled with `rg -uuu`.
* ripgrep can [search specific types of files](GUIDE.md#manual-filtering-file-types).
For example, `rg -tpy foo` limits your search to Python files and `rg -Tjs
foo` excludes JavaScript files from your search. ripgrep can be taught about
new file types with custom matching rules.
* ripgrep supports many features found in `grep`, such as showing the context
of search results, searching multiple patterns, highlighting matches with
color and full Unicode support. Unlike GNU grep, ripgrep stays fast while
supporting Unicode (which is always on).
* ripgrep has optional support for switching its regex engine to use PCRE2.
Among other things, this makes it possible to use look-around and
backreferences in your patterns, which are not supported in ripgrep's default
regex engine. PCRE2 support can be enabled with `-P/--pcre2` (use PCRE2
always) or `--auto-hybrid-regex` (use PCRE2 only if needed). An alternative
syntax is provided via the `--engine (default|pcre2|auto)` option.
* ripgrep has [rudimentary support for replacements](GUIDE.md#replacements),
which permit rewriting output based on what was matched.
* ripgrep supports [searching files in text encodings](GUIDE.md#file-encoding)
other than UTF-8, such as UTF-16, latin-1, GBK, EUC-JP, Shift_JIS and more.
(Some support for automatically detecting UTF-16 is provided. Other text
encodings must be specifically specified with the `-E/--encoding` flag.)
* ripgrep supports searching files compressed in a common format (brotli,
bzip2, gzip, lz4, lzma, xz, or zstandard) with the `-z/--search-zip` flag.
* ripgrep supports
[arbitrary input preprocessing filters](GUIDE.md#preprocessor)
which could be PDF text extraction, less supported decompression, decrypting,
automatic encoding detection and so on.
* ripgrep can be configured via a
[configuration file](GUIDE.md#configuration-file).
In other words, use ripgrep if you like speed, filtering by default, fewer
bugs and Unicode support.
### Why shouldn't I use ripgrep?
Despite initially not wanting to add every feature under the sun to ripgrep,
over time, ripgrep has grown support for most features found in other file
searching tools. This includes searching for results spanning across multiple
lines, and opt-in support for PCRE2, which provides look-around and
backreference support.
At this point, the primary reasons not to use ripgrep probably consist of one
or more of the following:
* You need a portable and ubiquitous tool. While ripgrep works on Windows,
macOS and Linux, it is not ubiquitous and it does not conform to any
standard such as POSIX. The best tool for this job is good old grep.
* There still exists some other feature (or bug) not listed in this README that
you rely on that's in another tool that isn't in ripgrep.
* There is a performance edge case where ripgrep doesn't do well where another
tool does do well. (Please file a bug report!)
* ripgrep isn't possible to install on your machine or isn't available for your
platform. (Please file a bug report!)
### Is it really faster than everything else?
Generally, yes. A large number of benchmarks with detailed analysis for each is
[available on my blog](https://blog.burntsushi.net/ripgrep/).
Summarizing, ripgrep is fast because:
* It is built on top of
[Rust's regex engine](https://github.com/rust-lang/regex).
Rust's regex engine uses finite automata, SIMD and aggressive literal
optimizations to make searching very fast. (PCRE2 support can be opted into
with the `-P/--pcre2` flag.)
* Rust's regex library maintains performance with full Unicode support by
building UTF-8 decoding directly into its deterministic finite automaton
engine.
* It supports searching with either memory maps or by searching incrementally
with an intermediate buffer. The former is better for single files and the
latter is better for large directories. ripgrep chooses the best searching
strategy for you automatically.
* Applies your ignore patterns in `.gitignore` files using a
[`RegexSet`](https://docs.rs/regex/1/regex/struct.RegexSet.html).
That means a single file path can be matched against multiple glob patterns
simultaneously.
* It uses a lock-free parallel recursive directory iterator, courtesy of
[`crossbeam`](https://docs.rs/crossbeam) and
[`ignore`](https://docs.rs/ignore).
### Feature comparison
Andy Lester, author of [ack](https://beyondgrep.com/), has published an
excellent table comparing the features of ack, ag, git-grep, GNU grep and
ripgrep: https://beyondgrep.com/feature-comparison/
Note that ripgrep has grown a few significant new features recently that
are not yet present in Andy's table. This includes, but is not limited to,
configuration files, passthru, support for searching compressed files,
multiline search and opt-in fancy regex support via PCRE2.
### Playground
If you'd like to try ripgrep before installing, there's an unofficial
[playground](https://codapi.org/ripgrep/) and an [interactive
tutorial](https://codapi.org/try/ripgrep/).
If you have any questions about these, please open an issue in the [tutorial
repo](https://github.com/nalgeon/tryxinyminutes).
### Installation
The binary name for ripgrep is `rg`.
**[Archives of precompiled binaries for ripgrep are available for Windows,
macOS and Linux.](https://github.com/BurntSushi/ripgrep/releases)** Linux and
Windows binaries are static executables. Users of platforms not explicitly
mentioned below are advised to download one of these archives.
If you're a **macOS Homebrew** or a **Linuxbrew** user, then you can install
ripgrep from homebrew-core:
```
$ brew install ripgrep
```
If you're a **MacPorts** user, then you can install ripgrep from the
[official ports](https://www.macports.org/ports.php?by=name&substr=ripgrep):
```
$ sudo port install ripgrep
```
If you're a **Windows Chocolatey** user, then you can install ripgrep from the
[official repo](https://chocolatey.org/packages/ripgrep):
```
$ choco install ripgrep
```
If you're a **Windows Scoop** user, then you can install ripgrep from the
[official bucket](https://github.com/ScoopInstaller/Main/blob/master/bucket/ripgrep.json):
```
$ scoop install ripgrep
```
If you're a **Windows Winget** user, then you can install ripgrep from the
[winget-pkgs](https://github.com/microsoft/winget-pkgs/tree/master/manifests/b/BurntSushi/ripgrep)
repository:
```
$ winget install BurntSushi.ripgrep.MSVC
```
If you're an **Arch Linux** user, then you can install ripgrep from the official repos:
```
$ sudo pacman -S ripgrep
```
If you're a **Gentoo** user, you can install ripgrep from the
[official repo](https://packages.gentoo.org/packages/sys-apps/ripgrep):
```
$ sudo emerge sys-apps/ripgrep
```
If you're a **Fedora** user, you can install ripgrep from official
repositories.
```
$ sudo dnf install ripgrep
```
If you're an **openSUSE** user, ripgrep is included in **openSUSE Tumbleweed**
and **openSUSE Leap** since 15.1.
```
$ sudo zypper install ripgrep
```
If you're a **CentOS Stream 10** user, you can install ripgrep from the
[EPEL](https://docs.fedoraproject.org/en-US/epel/getting-started/) repository:
```
$ sudo dnf config-manager --set-enabled crb
$ sudo dnf install https://dl.fedoraproject.org/pub/epel/epel-release-latest-10.noarch.rpm
$ sudo dnf install ripgrep
```
If you're a **Red Hat 10** user, you can install ripgrep from the
[EPEL](https://docs.fedoraproject.org/en-US/epel/getting-started/) repository:
```
$ sudo subscription-manager repos --enable codeready-builder-for-rhel-10-$(arch)-rpms
$ sudo dnf install https://dl.fedoraproject.org/pub/epel/epel-release-latest-10.noarch.rpm
$ sudo dnf install ripgrep
```
If you're a **Rocky Linux 10** user, you can install ripgrep from the
[EPEL](https://docs.fedoraproject.org/en-US/epel/getting-started/) repository:
```
$ sudo dnf install https://dl.fedoraproject.org/pub/epel/epel-release-latest-10.noarch.rpm
$ sudo dnf install ripgrep
```
If you're a **Nix** user, you can install ripgrep from
[nixpkgs](https://github.com/NixOS/nixpkgs/blob/master/pkgs/by-name/ri/ripgrep/package.nix):
```
$ nix-env --install ripgrep
```
If you're a **Flox** user, you can install ripgrep as follows:
```
$ flox install ripgrep
```
If you're a **Guix** user, you can install ripgrep from the official
package collection:
```
$ guix install ripgrep
```
If you're a **Debian** user (or a user of a Debian derivative like **Ubuntu**),
then ripgrep can be installed using a binary `.deb` file provided in each
[ripgrep release](https://github.com/BurntSushi/ripgrep/releases).
```
$ curl -LO https://github.com/BurntSushi/ripgrep/releases/download/14.1.1/ripgrep_14.1.1-1_amd64.deb
$ sudo dpkg -i ripgrep_14.1.1-1_amd64.deb
```
If you run Debian stable, ripgrep is [officially maintained by
Debian](https://tracker.debian.org/pkg/rust-ripgrep), although its version may
be older than the `deb` package available in the previous step.
```
$ sudo apt-get install ripgrep
```
If you're an **Ubuntu Cosmic (18.10)** (or newer) user, ripgrep is
[available](https://launchpad.net/ubuntu/+source/rust-ripgrep) using the same
packaging as Debian:
```
$ sudo apt-get install ripgrep
```
(N.B. Various snaps for ripgrep on Ubuntu are also available, but none of them
seem to work right and generate a number of very strange bug reports that I
don't know how to fix and don't have the time to fix. Therefore, it is no
longer a recommended installation option.)
If you're an **ALT** user, you can install ripgrep from the
[official repo](https://packages.altlinux.org/en/search?name=ripgrep):
```
$ sudo apt-get install ripgrep
```
If you're a **FreeBSD** user, then you can install ripgrep from the
[official ports](https://www.freshports.org/textproc/ripgrep/):
```
$ sudo pkg install ripgrep
```
If you're an **OpenBSD** user, then you can install ripgrep from the
[official ports](https://openports.se/textproc/ripgrep):
```
$ doas pkg_add ripgrep
```
If you're a **NetBSD** user, then you can install ripgrep from
[pkgsrc](https://pkgsrc.se/textproc/ripgrep):
```
$ sudo pkgin install ripgrep
```
If you're a **Haiku x86_64** user, then you can install ripgrep from the
[official ports](https://github.com/haikuports/haikuports/tree/master/sys-apps/ripgrep):
```
$ sudo pkgman install ripgrep
```
If you're a **Haiku x86_gcc2** user, then you can install ripgrep from the
same port as Haiku x86_64 using the x86 secondary architecture build:
```
$ sudo pkgman install ripgrep_x86
```
If you're a **Void Linux** user, then you can install ripgrep from the
[official repository](https://voidlinux.org/packages/?arch=x86_64&q=ripgrep):
```
$ sudo xbps-install -Syv ripgrep
```
If you're a **Rust programmer**, ripgrep can be installed with `cargo`.
* Note that the minimum supported version of Rust for ripgrep is **1.85.0**,
although ripgrep may work with older versions.
* Note that the binary may be bigger than expected because it contains debug
symbols. This is intentional. To remove debug symbols and therefore reduce
the file size, run `strip` on the binary.
```
$ cargo install ripgrep
```
Alternatively, one can use [`cargo
binstall`](https://github.com/cargo-bins/cargo-binstall) to install a ripgrep
binary directly from GitHub:
```
$ cargo binstall ripgrep
```
### Building
ripgrep is written in Rust, so you'll need to grab a
[Rust installation](https://www.rust-lang.org/) in order to compile it.
ripgrep compiles with Rust 1.85.0 (stable) or newer. In general, ripgrep tracks
the latest stable release of the Rust compiler.
To build ripgrep:
```
$ git clone https://github.com/BurntSushi/ripgrep
$ cd ripgrep
$ cargo build --release
$ ./target/release/rg --version
0.1.3
```
**NOTE:** In the past, ripgrep supported a `simd-accel` Cargo feature when
using a Rust nightly compiler. This only benefited UTF-16 transcoding.
Since it required unstable features, this build mode was prone to breakage.
Because of that, support for it has been removed. If you want SIMD
optimizations for UTF-16 transcoding, then you'll have to petition the
[`encoding_rs`](https://github.com/hsivonen/encoding_rs) project to use stable
APIs.
Finally, optional PCRE2 support can be built with ripgrep by enabling the
`pcre2` feature:
```
$ cargo build --release --features 'pcre2'
```
Enabling the PCRE2 feature works with a stable Rust compiler and will
attempt to automatically find and link with your system's PCRE2 library via
`pkg-config`. If one doesn't exist, then ripgrep will build PCRE2 from source
using your system's C compiler and then statically link it into the final
executable. Static linking can be forced even when there is an available PCRE2
system library by either building ripgrep with the MUSL target or by setting
`PCRE2_SYS_STATIC=1`.
ripgrep can be built with the MUSL target on Linux by first installing the MUSL
library on your system (consult your friendly neighborhood package manager).
Then you just need to add MUSL support to your Rust toolchain and rebuild
ripgrep, which yields a fully static executable:
```
$ rustup target add x86_64-unknown-linux-musl
$ cargo build --release --target x86_64-unknown-linux-musl
```
Applying the `--features` flag from above works as expected. If you want to
build a static executable with MUSL and with PCRE2, then you will need to have
`musl-gcc` installed, which might be in a separate package from the actual
MUSL library, depending on your Linux distribution.
### Running tests
ripgrep is relatively well-tested, including both unit tests and integration
tests. To run the full test suite, use:
```
$ cargo test --all
```
from the repository root.
### Related tools
* [delta](https://github.com/dandavison/delta) is a syntax highlighting
pager that supports the `rg --json` output format. So all you need to do to
make it work is `rg --json pattern | delta`. See [delta's manual section on
grep](https://dandavison.github.io/delta/grep.html) for more details.
### Vulnerability reporting
For reporting a security vulnerability, please
[contact Andrew Gallant](https://blog.burntsushi.net/about/).
The contact page has my email address and PGP public key if you wish to send an
encrypted message.
### Translations
The following is a list of known translations of ripgrep's documentation. These
are unofficially maintained and may not be up to date.
* [Chinese](https://github.com/chinanf-boy/ripgrep-zh#%E6%9B%B4%E6%96%B0-)
* [Spanish](https://github.com/UltiRequiem/traducciones/tree/master/ripgrep)

496
README.md
View File

@@ -1,480 +1,42 @@
ripgrep (rg)
------------
ripgrep is a line-oriented search tool that recursively searches the current
directory for a regex pattern. By default, ripgrep will respect gitignore rules
and automatically skip hidden files/directories and binary files. (To disable
all automatic filtering by default, use `rg -uuu`.) ripgrep has first class
support on Windows, macOS and Linux, with binary downloads available for [every
release](https://github.com/BurntSushi/ripgrep/releases). ripgrep is similar to
other popular search tools like The Silver Searcher, ack and grep.
# rgs
[![Build status](https://github.com/BurntSushi/ripgrep/workflows/ci/badge.svg)](https://github.com/BurntSushi/ripgrep/actions)
[![Crates.io](https://img.shields.io/crates/v/ripgrep.svg)](https://crates.io/crates/ripgrep)
[![Packaging status](https://repology.org/badge/tiny-repos/ripgrep.svg)](https://repology.org/project/ripgrep/badges)
This repository is a fork of ripgrep with additional features. The original
ripgrep documentation is in README-ripgrep.md:
Dual-licensed under MIT or the [UNLICENSE](https://unlicense.org).
- README-ripgrep.md
## Additional features in this fork
### CHANGELOG
### Multiline windowing
Please see the [CHANGELOG](CHANGELOG.md) for a release history.
- `--multiline-window=N` (short: `-W N`) limits multiline matches to a sliding
window of N lines while still using multiline matching semantics.
- `--multiline-window` implicitly enables `--multiline` and cannot be used with
`--no-multiline`.
### Documentation quick links
### Per-file match indexing
* [Installation](#installation)
* [User Guide](GUIDE.md)
* [Frequently Asked Questions](FAQ.md)
* [Regex syntax](https://docs.rs/regex/1/regex/#syntax)
* [Configuration files](GUIDE.md#configuration-file)
* [Shell completions](FAQ.md#complete)
* [Building](#building)
* [Translations](#translations)
- `--in-file-index` / `--no-in-file-index` control indexing of matches within a
file to disambiguate overlapping multiline results.
- When enabled, output is formatted as `filename[index]:line:`.
- When searching a single file, the output is formatted as `[index]:line:` (no
filename).
### Squashed output
### Screenshot of search results
- `--squash` collapses contiguous Unicode whitespace (including newlines) into a
single ASCII space in output.
- `--squash-nl-only` collapses newlines into spaces while preserving other
whitespace.
- When multiple lines are squashed into one, line numbers are printed as
`start-end:`.
[![A screenshot of a sample search with ripgrep](https://burntsushi.net/stuff/ripgrep1.png)](https://burntsushi.net/stuff/ripgrep1.png)
### Binary name
- The target binary name is `rgs` (not `rg`).
### Quick examples comparing tools
## Acknowledgements
This example searches the entire
[Linux kernel source tree](https://github.com/BurntSushi/linux)
(after running `make defconfig && make -j8`) for `[A-Z]+_SUSPEND`, where
all matches must be words. Timings were collected on a system with an Intel
i7-6900K 3.2 GHz.
Please remember that a single benchmark is never enough! See my
[blog post on ripgrep](https://blog.burntsushi.net/ripgrep/)
for a very detailed comparison with more benchmarks and analysis.
| Tool | Command | Line count | Time |
| ---- | ------- | ---------- | ---- |
| ripgrep (Unicode) | `rg -n -w '[A-Z]+_SUSPEND'` | 452 | **0.136s** |
| [git grep](https://www.kernel.org/pub/software/scm/git/docs/git-grep.html) | `git grep -P -n -w '[A-Z]+_SUSPEND'` | 452 | 0.348s |
| [ugrep (Unicode)](https://github.com/Genivia/ugrep) | `ugrep -r --ignore-files --no-hidden -I -w '[A-Z]+_SUSPEND'` | 452 | 0.506s |
| [The Silver Searcher](https://github.com/ggreer/the_silver_searcher) | `ag -w '[A-Z]+_SUSPEND'` | 452 | 0.654s |
| [git grep](https://www.kernel.org/pub/software/scm/git/docs/git-grep.html) | `LC_ALL=C git grep -E -n -w '[A-Z]+_SUSPEND'` | 452 | 1.150s |
| [ack](https://github.com/beyondgrep/ack3) | `ack -w '[A-Z]+_SUSPEND'` | 452 | 4.054s |
| [git grep (Unicode)](https://www.kernel.org/pub/software/scm/git/docs/git-grep.html) | `LC_ALL=en_US.UTF-8 git grep -E -n -w '[A-Z]+_SUSPEND'` | 452 | 4.205s |
Here's another benchmark on the same corpus as above that disregards gitignore
files and searches with a whitelist instead. The corpus is the same as in the
previous benchmark, and the flags passed to each command ensure that they are
doing equivalent work:
| Tool | Command | Line count | Time |
| ---- | ------- | ---------- | ---- |
| ripgrep | `rg -uuu -tc -n -w '[A-Z]+_SUSPEND'` | 388 | **0.096s** |
| [ugrep](https://github.com/Genivia/ugrep) | `ugrep -r -n --include='*.c' --include='*.h' -w '[A-Z]+_SUSPEND'` | 388 | 0.493s |
| [GNU grep](https://www.gnu.org/software/grep/) | `egrep -r -n --include='*.c' --include='*.h' -w '[A-Z]+_SUSPEND'` | 388 | 0.806s |
And finally, a straight-up comparison between ripgrep, ugrep and GNU grep on a
single large file cached in memory
(~13GB, [`OpenSubtitles.raw.en.gz`](http://opus.nlpl.eu/download.php?f=OpenSubtitles/v2018/mono/OpenSubtitles.raw.en.gz)):
| Tool | Command | Line count | Time |
| ---- | ------- | ---------- | ---- |
| ripgrep | `rg -w 'Sherlock [A-Z]\w+'` | 7882 | **2.769s** |
| [ugrep](https://github.com/Genivia/ugrep) | `ugrep -w 'Sherlock [A-Z]\w+'` | 7882 | 6.802s |
| [GNU grep](https://www.gnu.org/software/grep/) | `LC_ALL=en_US.UTF-8 egrep -w 'Sherlock [A-Z]\w+'` | 7882 | 9.027s |
In the above benchmark, passing the `-n` flag (for showing line numbers)
increases the times to `3.423s` for ripgrep and `13.031s` for GNU grep. ugrep
times are unaffected by the presence or absence of `-n`.
### Why should I use ripgrep?
* It can replace many use cases served by other search tools
because it contains most of their features and is generally faster. (See
[the FAQ](FAQ.md#posix4ever) for more details on whether ripgrep can truly
replace grep.)
* Like other tools specialized to code search, ripgrep defaults to
[recursive search](GUIDE.md#recursive-search) and does [automatic
filtering](GUIDE.md#automatic-filtering). Namely, ripgrep won't search files
ignored by your `.gitignore`/`.ignore`/`.rgignore` files, it won't search
hidden files and it won't search binary files. Automatic filtering can be
disabled with `rg -uuu`.
* ripgrep can [search specific types of files](GUIDE.md#manual-filtering-file-types).
For example, `rg -tpy foo` limits your search to Python files and `rg -Tjs
foo` excludes JavaScript files from your search. ripgrep can be taught about
new file types with custom matching rules.
* ripgrep supports many features found in `grep`, such as showing the context
of search results, searching multiple patterns, highlighting matches with
color and full Unicode support. Unlike GNU grep, ripgrep stays fast while
supporting Unicode (which is always on).
* ripgrep has optional support for switching its regex engine to use PCRE2.
Among other things, this makes it possible to use look-around and
backreferences in your patterns, which are not supported in ripgrep's default
regex engine. PCRE2 support can be enabled with `-P/--pcre2` (use PCRE2
always) or `--auto-hybrid-regex` (use PCRE2 only if needed). An alternative
syntax is provided via the `--engine (default|pcre2|auto-hybrid)` option.
* ripgrep has [rudimentary support for replacements](GUIDE.md#replacements),
which permit rewriting output based on what was matched.
* ripgrep supports [searching files in text encodings](GUIDE.md#file-encoding)
other than UTF-8, such as UTF-16, latin-1, GBK, EUC-JP, Shift_JIS and more.
(Some support for automatically detecting UTF-16 is provided. Other text
encodings must be specifically specified with the `-E/--encoding` flag.)
* ripgrep supports searching files compressed in a common format (brotli,
bzip2, gzip, lz4, lzma, xz, or zstandard) with the `-z/--search-zip` flag.
* ripgrep supports
[arbitrary input preprocessing filters](GUIDE.md#preprocessor)
which could be PDF text extraction, less supported decompression, decrypting,
automatic encoding detection and so on.
* ripgrep can be configured via a
[configuration file](GUIDE.md#configuration-file).
In other words, use ripgrep if you like speed, filtering by default, fewer
bugs and Unicode support.
### Why shouldn't I use ripgrep?
Despite initially not wanting to add every feature under the sun to ripgrep,
over time, ripgrep has grown support for most features found in other file
searching tools. This includes searching for results spanning across multiple
lines, and opt-in support for PCRE2, which provides look-around and
backreference support.
At this point, the primary reasons not to use ripgrep probably consist of one
or more of the following:
* You need a portable and ubiquitous tool. While ripgrep works on Windows,
macOS and Linux, it is not ubiquitous and it does not conform to any
standard such as POSIX. The best tool for this job is good old grep.
* There still exists some other feature (or bug) not listed in this README that
you rely on that's in another tool that isn't in ripgrep.
* There is a performance edge case where ripgrep doesn't do well where another
tool does do well. (Please file a bug report!)
* ripgrep isn't possible to install on your machine or isn't available for your
platform. (Please file a bug report!)
### Is it really faster than everything else?
Generally, yes. A large number of benchmarks with detailed analysis for each is
[available on my blog](https://blog.burntsushi.net/ripgrep/).
Summarizing, ripgrep is fast because:
* It is built on top of
[Rust's regex engine](https://github.com/rust-lang/regex).
Rust's regex engine uses finite automata, SIMD and aggressive literal
optimizations to make searching very fast. (PCRE2 support can be opted into
with the `-P/--pcre2` flag.)
* Rust's regex library maintains performance with full Unicode support by
building UTF-8 decoding directly into its deterministic finite automaton
engine.
* It supports searching with either memory maps or by searching incrementally
with an intermediate buffer. The former is better for single files and the
latter is better for large directories. ripgrep chooses the best searching
strategy for you automatically.
* Applies your ignore patterns in `.gitignore` files using a
[`RegexSet`](https://docs.rs/regex/1/regex/struct.RegexSet.html).
That means a single file path can be matched against multiple glob patterns
simultaneously.
* It uses a lock-free parallel recursive directory iterator, courtesy of
[`crossbeam`](https://docs.rs/crossbeam) and
[`ignore`](https://docs.rs/ignore).
### Feature comparison
Andy Lester, author of [ack](https://beyondgrep.com/), has published an
excellent table comparing the features of ack, ag, git-grep, GNU grep and
ripgrep: https://beyondgrep.com/feature-comparison/
Note that ripgrep has grown a few significant new features recently that
are not yet present in Andy's table. This includes, but is not limited to,
configuration files, passthru, support for searching compressed files,
multiline search and opt-in fancy regex support via PCRE2.
### Installation
The binary name for ripgrep is `rg`.
**[Archives of precompiled binaries for ripgrep are available for Windows,
macOS and Linux.](https://github.com/BurntSushi/ripgrep/releases)** Linux and
Windows binaries are static executables. Users of platforms not explicitly
mentioned below are advised to download one of these archives.
If you're a **macOS Homebrew** or a **Linuxbrew** user, then you can install
ripgrep from homebrew-core:
```
$ brew install ripgrep
```
If you're a **MacPorts** user, then you can install ripgrep from the
[official ports](https://www.macports.org/ports.php?by=name&substr=ripgrep):
```
$ sudo port install ripgrep
```
If you're a **Windows Chocolatey** user, then you can install ripgrep from the
[official repo](https://chocolatey.org/packages/ripgrep):
```
$ choco install ripgrep
```
If you're a **Windows Scoop** user, then you can install ripgrep from the
[official bucket](https://github.com/ScoopInstaller/Main/blob/master/bucket/ripgrep.json):
```
$ scoop install ripgrep
```
If you're a **Windows Winget** user, then you can install ripgrep from the
[winget-pkgs](https://github.com/microsoft/winget-pkgs/tree/master/manifests/b/BurntSushi/ripgrep)
repository:
```
$ winget install BurntSushi.ripgrep.MSVC
```
If you're an **Arch Linux** user, then you can install ripgrep from the official repos:
```
$ sudo pacman -S ripgrep
```
If you're a **Gentoo** user, you can install ripgrep from the
[official repo](https://packages.gentoo.org/packages/sys-apps/ripgrep):
```
$ sudo emerge sys-apps/ripgrep
```
If you're a **Fedora** user, you can install ripgrep from official
repositories.
```
$ sudo dnf install ripgrep
```
If you're an **openSUSE** user, ripgrep is included in **openSUSE Tumbleweed**
and **openSUSE Leap** since 15.1.
```
$ sudo zypper install ripgrep
```
If you're a **RHEL/CentOS 7/8** user, you can install ripgrep from
[copr](https://copr.fedorainfracloud.org/coprs/carlwgeorge/ripgrep/):
```
$ sudo yum install -y yum-utils
$ sudo yum-config-manager --add-repo=https://copr.fedorainfracloud.org/coprs/carlwgeorge/ripgrep/repo/epel-7/carlwgeorge-ripgrep-epel-7.repo
$ sudo yum install ripgrep
```
If you're a **Nix** user, you can install ripgrep from
[nixpkgs](https://github.com/NixOS/nixpkgs/blob/master/pkgs/tools/text/ripgrep/default.nix):
```
$ nix-env --install ripgrep
```
If you're a **Guix** user, you can install ripgrep from the official
package collection:
```
$ sudo guix install ripgrep
```
If you're a **Debian** user (or a user of a Debian derivative like **Ubuntu**),
then ripgrep can be installed using a binary `.deb` file provided in each
[ripgrep release](https://github.com/BurntSushi/ripgrep/releases).
```
$ curl -LO https://github.com/BurntSushi/ripgrep/releases/download/13.0.0/ripgrep_13.0.0_amd64.deb
$ sudo dpkg -i ripgrep_13.0.0_amd64.deb
```
If you run Debian stable, ripgrep is [officially maintained by
Debian](https://tracker.debian.org/pkg/rust-ripgrep), although its version may
be older than the `deb` package available in the previous step.
```
$ sudo apt-get install ripgrep
```
If you're an **Ubuntu Cosmic (18.10)** (or newer) user, ripgrep is
[available](https://launchpad.net/ubuntu/+source/rust-ripgrep) using the same
packaging as Debian:
```
$ sudo apt-get install ripgrep
```
(N.B. Various snaps for ripgrep on Ubuntu are also available, but none of them
seem to work right and generate a number of very strange bug reports that I
don't know how to fix and don't have the time to fix. Therefore, it is no
longer a recommended installation option.)
If you're an **ALT** user, you can install ripgrep from the
[official repo](https://packages.altlinux.org/en/search?name=ripgrep):
```
$ sudo apt-get install ripgrep
```
If you're a **FreeBSD** user, then you can install ripgrep from the
[official ports](https://www.freshports.org/textproc/ripgrep/):
```
$ sudo pkg install ripgrep
```
If you're an **OpenBSD** user, then you can install ripgrep from the
[official ports](https://openports.se/textproc/ripgrep):
```
$ doas pkg_add ripgrep
```
If you're a **NetBSD** user, then you can install ripgrep from
[pkgsrc](https://pkgsrc.se/textproc/ripgrep):
```
$ sudo pkgin install ripgrep
```
If you're a **Haiku x86_64** user, then you can install ripgrep from the
[official ports](https://github.com/haikuports/haikuports/tree/master/sys-apps/ripgrep):
```
$ sudo pkgman install ripgrep
```
If you're a **Haiku x86_gcc2** user, then you can install ripgrep from the
same port as Haiku x86_64 using the x86 secondary architecture build:
```
$ sudo pkgman install ripgrep_x86
```
If you're a **Rust programmer**, ripgrep can be installed with `cargo`.
* Note that the minimum supported version of Rust for ripgrep is **1.70.0**,
although ripgrep may work with older versions.
* Note that the binary may be bigger than expected because it contains debug
symbols. This is intentional. To remove debug symbols and therefore reduce
the file size, run `strip` on the binary.
```
$ cargo install ripgrep
```
Alternatively, one can use [`cargo
binstall`](https://github.com/cargo-bins/cargo-binstall) to install a ripgrep
binary directly from GitHub:
```
$ cargo binstall ripgrep
```
### Building
ripgrep is written in Rust, so you'll need to grab a
[Rust installation](https://www.rust-lang.org/) in order to compile it.
ripgrep compiles with Rust 1.70.0 (stable) or newer. In general, ripgrep tracks
the latest stable release of the Rust compiler.
To build ripgrep:
```
$ git clone https://github.com/BurntSushi/ripgrep
$ cd ripgrep
$ cargo build --release
$ ./target/release/rg --version
0.1.3
```
If you have a Rust nightly compiler and a recent Intel CPU, then you can enable
additional optional SIMD acceleration like so:
```
RUSTFLAGS="-C target-cpu=native" cargo build --release --features 'simd-accel'
```
The `simd-accel` feature enables SIMD support in certain ripgrep dependencies
(responsible for transcoding). They are not necessary to get SIMD optimizations
for search; those are enabled automatically. Hopefully, some day, the
`simd-accel` feature will similarly become unnecessary. **WARNING:** Currently,
enabling this option can increase compilation times dramatically.
Finally, optional PCRE2 support can be built with ripgrep by enabling the
`pcre2` feature:
```
$ cargo build --release --features 'pcre2'
```
(Tip: use `--features 'pcre2 simd-accel'` to also include compile time SIMD
optimizations, which will only work with a nightly compiler.)
Enabling the PCRE2 feature works with a stable Rust compiler and will
attempt to automatically find and link with your system's PCRE2 library via
`pkg-config`. If one doesn't exist, then ripgrep will build PCRE2 from source
using your system's C compiler and then statically link it into the final
executable. Static linking can be forced even when there is an available PCRE2
system library by either building ripgrep with the MUSL target or by setting
`PCRE2_SYS_STATIC=1`.
ripgrep can be built with the MUSL target on Linux by first installing the MUSL
library on your system (consult your friendly neighborhood package manager).
Then you just need to add MUSL support to your Rust toolchain and rebuild
ripgrep, which yields a fully static executable:
```
$ rustup target add x86_64-unknown-linux-musl
$ cargo build --release --target x86_64-unknown-linux-musl
```
Applying the `--features` flag from above works as expected. If you want to
build a static executable with MUSL and with PCRE2, then you will need to have
`musl-gcc` installed, which might be in a separate package from the actual
MUSL library, depending on your Linux distribution.
### Running tests
ripgrep is relatively well-tested, including both unit tests and integration
tests. To run the full test suite, use:
```
$ cargo test --all
```
from the repository root.
### Related tools
* [delta](https://github.com/dandavison/delta) is a syntax highlighting
pager that supports the `rg --json` output format. So all you need to do to
make it work is `rg --json pattern | delta`. See [delta's manual section on
grep](https://dandavison.github.io/delta/grep.html) for more details.
### Vulnerability reporting
For reporting a security vulnerability, please
[contact Andrew Gallant](https://blog.burntsushi.net/about/).
The contact page has my email address and PGP public key if you wish to send an
encrypted message.
### Translations
The following is a list of known translations of ripgrep's documentation. These
are unofficially maintained and may not be up to date.
* [Chinese](https://github.com/chinanf-boy/ripgrep-zh#%E6%9B%B4%E6%96%B0-)
* [Spanish](https://github.com/UltiRequiem/traducciones/tree/master/ripgrep)
This project is built on top of ripgrep by Andrew Gallant and contributors.
All credit for the original tool, documentation, and design belongs to the
ripgrep project. See README-ripgrep.md and the upstream licenses for details.

View File

@@ -7,6 +7,7 @@
a strong motivation otherwise, review and update every dependency. Also
run `--aggressive`, but don't update to crates that are still in beta.
* Update date in `crates/core/flags/doc/template.rg.1`.
* Update the CHANGELOG as appropriate.
* Review changes for every crate in `crates` since the last ripgrep release.
If the set of changes is non-empty, issue a new release for that crate. Check
crates in the following order. After updating a crate, ensure minimal
@@ -22,7 +23,6 @@
* crates/printer
* crates/grep (bump minimal versions as necessary)
* crates/core (do **not** bump version, but update dependencies as needed)
* Update the CHANGELOG as appropriate.
* Edit the `Cargo.toml` to set the new ripgrep version. Run
`cargo update -p ripgrep` so that the `Cargo.lock` is updated. Commit the
changes and create a new signed tag. Alternatively, use
@@ -41,8 +41,6 @@
> tool that recursively searches the current directory for a regex pattern.
> By default, ripgrep will respect gitignore rules and automatically skip
> hidden files/directories and binary files.
* Run `git checkout {VERSION} && ci/build-and-publish-m2 {VERSION}` on a macOS
system with Apple silicon.
* Run `cargo publish`.
* Run `ci/sha256-releases {VERSION} >> pkg/brew/ripgrep-bin.rb`. Then edit
`pkg/brew/ripgrep-bin.rb` to update the version number and sha256 hashes.

View File

@@ -22,13 +22,13 @@ fn set_windows_exe_options() {
manifest.push(MANIFEST);
let Some(manifest) = manifest.to_str() else { return };
println!("cargo:rerun-if-changed={}", MANIFEST);
println!("cargo:rerun-if-changed={MANIFEST}");
// Embed the Windows application manifest file.
println!("cargo:rustc-link-arg-bin=rg=/MANIFEST:EMBED");
println!("cargo:rustc-link-arg-bin=rg=/MANIFESTINPUT:{manifest}");
println!("cargo:rustc-link-arg-bin=rgs=/MANIFEST:EMBED");
println!("cargo:rustc-link-arg-bin=rgs=/MANIFESTINPUT:{manifest}");
// Turn linker warnings into errors. Helps debugging, otherwise the
// warnings get squashed (I believe).
println!("cargo:rustc-link-arg-bin=rg=/WX");
println!("cargo:rustc-link-arg-bin=rgs=/WX");
}
/// Make the current git hash available to the build as the environment
@@ -37,10 +37,25 @@ fn set_git_revision_hash() {
use std::process::Command;
let args = &["rev-parse", "--short=10", "HEAD"];
let Ok(output) = Command::new("git").args(args).output() else { return };
let rev = String::from_utf8_lossy(&output.stdout).trim().to_string();
if rev.is_empty() {
return;
let output = Command::new("git").args(args).output();
match output {
Ok(output) => {
let rev =
String::from_utf8_lossy(&output.stdout).trim().to_string();
if rev.is_empty() {
println!(
"cargo:warning=output from `git rev-parse` is empty, \
so skipping embedding of commit hash"
);
return;
}
println!("cargo:rustc-env=RIPGREP_BUILD_GIT_HASH={rev}");
}
Err(e) => {
println!(
"cargo:warning=failed to run `git rev-parse`, \
so skipping embedding of commit hash: {e}"
);
}
}
println!("cargo:rustc-env=RIPGREP_BUILD_GIT_HASH={}", rev);
}

View File

@@ -1,43 +0,0 @@
#!/bin/bash
# This script builds a ripgrep release for the aarch64-apple-darwin target.
# At time of writing (2023-11-21), GitHub Actions does not free Apple silicon
# runners. Since I have somewhat recently acquired an M2 mac mini, I just use
# this script to build the release tarball and upload it with `gh`.
#
# Once GitHub Actions has proper support for Apple silicon, we should add it
# to our release workflow and drop this script.
set -e
version="$1"
if [ -z "$version" ]; then
echo "missing version" >&2
echo "Usage: "$(basename "$0")" <version>" >&2
exit 1
fi
if ! grep -q "version = \"$version\"" Cargo.toml; then
echo "version does not match Cargo.toml" >&2
exit 1
fi
target=aarch64-apple-darwin
cargo build --release --features pcre2 --target $target
BIN=target/$target/release/rg
NAME=ripgrep-$version-$target
ARCHIVE="deployment/m2/$NAME"
mkdir -p "$ARCHIVE"/{complete,doc}
cp "$BIN" "$ARCHIVE"/
strip "$ARCHIVE/rg"
cp {README.md,COPYING,UNLICENSE,LICENSE-MIT} "$ARCHIVE"/
cp {CHANGELOG.md,FAQ.md,GUIDE.md} "$ARCHIVE"/doc/
"$BIN" --generate complete-bash > "$ARCHIVE/complete/rg.bash"
"$BIN" --generate complete-fish > "$ARCHIVE/complete/rg.fish"
"$BIN" --generate complete-powershell > "$ARCHIVE/complete/_rg.ps1"
"$BIN" --generate complete-zsh > "$ARCHIVE/complete/_rg"
"$BIN" --generate man > "$ARCHIVE/doc/rg.1"
tar c -C deployment/m2 -z -f "$ARCHIVE.tar.gz" "$NAME"
shasum -a 256 "$ARCHIVE.tar.gz" > "$ARCHIVE.tar.gz.sha256"
gh release upload "$version" "$ARCHIVE.tar.gz" "$ARCHIVE.tar.gz.sha256"

View File

@@ -1,23 +0,0 @@
These are Docker images used for cross compilation in CI builds (or locally)
via the [Cross](https://github.com/rust-embedded/cross) tool.
The Cross tool actually provides its own Docker images, and all Docker images
in this directory are derived from one of them. We provide our own in order to
customize the environment. For example, we need to install compression tools
like `xz` so that tests for the `-z/--search-zip` flag are run.
If you make a change to a Docker image, then you can re-build it. `cd` into the
directory containing the `Dockerfile` and run:
$ cd x86_64-unknown-linux-musl
$ ./build
At this point, subsequent uses of `cross` will now use your built image since
Docker prefers local images over remote images. In order to make these changes
stick, they need to be pushed to Docker Hub:
$ docker push burntsushi/cross:x86_64-unknown-linux-musl
Of course, only I (BurntSushi) can push to that location. To make `cross` use
a different location, then edit `Cross.toml` in the root of this repo to use
a different image name for the desired target.

View File

@@ -1,4 +0,0 @@
FROM rustembedded/cross:aarch64-unknown-linux-gnu
COPY stage/ubuntu-install-packages /
RUN /ubuntu-install-packages

View File

@@ -1,5 +0,0 @@
#!/bin/sh
mkdir -p stage
cp ../../ubuntu-install-packages ./stage/
docker build -t burntsushi/cross:aarch64-unknown-linux-gnu .

View File

@@ -1,4 +0,0 @@
FROM rustembedded/cross:i686-unknown-linux-gnu
COPY stage/ubuntu-install-packages /
RUN /ubuntu-install-packages

View File

@@ -1,5 +0,0 @@
#!/bin/sh
mkdir -p stage
cp ../../ubuntu-install-packages ./stage/
docker build -t burntsushi/cross:i686-unknown-linux-gnu .

View File

@@ -1,4 +0,0 @@
FROM rustembedded/cross:powerpc64-unknown-linux-gnu
COPY stage/ubuntu-install-packages /
RUN /ubuntu-install-packages

View File

@@ -1,5 +0,0 @@
#!/bin/sh
mkdir -p stage
cp ../../ubuntu-install-packages ./stage/
docker build -t burntsushi/cross:powerpc64-unknown-linux-gnu .

View File

@@ -1,4 +0,0 @@
FROM rustembedded/cross:s390x-unknown-linux-gnu
COPY stage/ubuntu-install-packages /
RUN /ubuntu-install-packages

View File

@@ -1,5 +0,0 @@
#!/bin/sh
mkdir -p stage
cp ../../ubuntu-install-packages ./stage/
docker build -t burntsushi/cross:s390x-unknown-linux-gnu .

View File

@@ -1,4 +0,0 @@
FROM rustembedded/cross:x86_64-unknown-linux-musl
COPY stage/ubuntu-install-packages /
RUN /ubuntu-install-packages

View File

@@ -1,5 +0,0 @@
#!/bin/sh
mkdir -p stage
cp ../../ubuntu-install-packages ./stage/
docker build -t burntsushi/cross:x86_64-unknown-linux-musl .

View File

@@ -11,7 +11,7 @@ version="$1"
# Linux and Darwin builds.
for arch in i686 x86_64; do
for target in apple-darwin unknown-linux-musl; do
url="https://github.com/BurntSushi/ripgrep/releases/download/$version/ripgrep-$version-$arch-$target.tar.gz"
url="https://git.peisongxiao.com/peisongxiao/rgs/releases/download/$version/rgs-$version-$arch-$target.tar.gz"
sha=$(curl -sfSL "$url" | sha256sum)
echo "$version-$arch-$target $sha"
done
@@ -19,7 +19,7 @@ done
# Source.
for ext in zip tar.gz; do
url="https://github.com/BurntSushi/ripgrep/archive/$version.$ext"
url="https://git.peisongxiao.com/peisongxiao/rgs/archive/$version.$ext"
sha=$(curl -sfSL "$url" | sha256sum)
echo "source.$ext $sha"
done

View File

@@ -18,11 +18,11 @@ get_comp_args() {
main() {
local diff
local rg="${0:a:h}/../${TARGET_DIR:-target}/release/rg"
local _rg="${0:a:h}/../crates/core/flags/complete/rg.zsh"
local rg="${0:a:h}/../${TARGET_DIR:-target}/release/rgs"
local _rg="${0:a:h}/../crates/core/flags/complete/rgs.zsh"
local -a help_args comp_args
[[ -e $rg ]] || rg=${rg/%\/release\/rg/\/debug\/rg}
[[ -e $rg ]] || rg=${rg/%\/release\/rgs/\/debug\/rgs}
rg=${rg:a}
_rg=${_rg:a}

View File

@@ -11,4 +11,4 @@ if ! command -V sudo; then
fi
sudo apt-get update
sudo apt-get install -y --no-install-recommends \
zsh xz-utils liblz4-tool musl-tools brotli zstd
zsh xz-utils liblz4-tool musl-tools brotli zstd g++

View File

@@ -1,6 +1,6 @@
[package]
name = "grep-cli"
version = "0.1.10" #:version
version = "0.1.12" #:version
authors = ["Andrew Gallant <jamslam@gmail.com>"]
description = """
Utilities for search oriented command line applications.
@@ -11,11 +11,11 @@ repository = "https://github.com/BurntSushi/ripgrep/tree/master/crates/cli"
readme = "README.md"
keywords = ["regex", "grep", "cli", "utility", "util"]
license = "Unlicense OR MIT"
edition = "2021"
edition = "2024"
[dependencies]
bstr = { version = "1.6.2", features = ["std"] }
globset = { version = "0.4.14", path = "../globset" }
globset = { version = "0.4.18", path = "../globset" }
log = "0.4.20"
termcolor = "1.3.0"

View File

@@ -177,7 +177,7 @@ impl DecompressionMatcher {
/// If there are multiple possible commands matching the given path, then
/// the command added last takes precedence.
pub fn command<P: AsRef<Path>>(&self, path: P) -> Option<Command> {
for i in self.globs.matches(path).into_iter().rev() {
if let Some(i) = self.globs.matches(path).into_iter().next_back() {
let decomp_cmd = &self.commands[i];
let mut cmd = Command::new(&decomp_cmd.bin);
cmd.args(&decomp_cmd.args);
@@ -414,6 +414,8 @@ impl io::Read for DecompressionReader {
/// relative path. We permit this since it is assumed that the user has set
/// this explicitly, and thus, desires this behavior.
///
/// # Platform behavior
///
/// On non-Windows, this is a no-op.
pub fn resolve_binary<P: AsRef<Path>>(
prog: P,

View File

@@ -16,7 +16,7 @@ use std::{ffi::OsString, io};
pub fn hostname() -> io::Result<OsString> {
#[cfg(windows)]
{
use winapi_util::sysinfo::{get_computer_name, ComputerNameKind};
use winapi_util::sysinfo::{ComputerNameKind, get_computer_name};
get_computer_name(ComputerNameKind::PhysicalDnsHostname)
}
#[cfg(unix)]
@@ -25,10 +25,10 @@ pub fn hostname() -> io::Result<OsString> {
}
#[cfg(not(any(windows, unix)))]
{
io::Error::new(
Err(io::Error::new(
io::ErrorKind::Other,
"hostname could not be found on unsupported platform",
)
))
}
}

View File

@@ -133,19 +133,19 @@ mod wtr;
pub use crate::{
decompress::{
resolve_binary, DecompressionMatcher, DecompressionMatcherBuilder,
DecompressionReader, DecompressionReaderBuilder,
DecompressionMatcher, DecompressionMatcherBuilder,
DecompressionReader, DecompressionReaderBuilder, resolve_binary,
},
escape::{escape, escape_os, unescape, unescape_os},
hostname::hostname,
human::{parse_human_readable_size, ParseSizeError},
human::{ParseSizeError, parse_human_readable_size},
pattern::{
pattern_from_bytes, pattern_from_os, patterns_from_path,
patterns_from_reader, patterns_from_stdin, InvalidPatternError,
InvalidPatternError, pattern_from_bytes, pattern_from_os,
patterns_from_path, patterns_from_reader, patterns_from_stdin,
},
process::{CommandError, CommandReader, CommandReaderBuilder},
wtr::{
stdout, stdout_buffered_block, stdout_buffered_line, StandardStream,
StandardStream, stdout, stdout_buffered_block, stdout_buffered_line,
},
};
@@ -178,22 +178,71 @@ pub fn is_readable_stdin() -> bool {
};
let stdin = std::io::stdin();
let Ok(fd) = stdin.as_fd().try_clone_to_owned() else { return false };
let fd = match stdin.as_fd().try_clone_to_owned() {
Ok(fd) => fd,
Err(err) => {
log::debug!(
"for heuristic stdin detection on Unix, \
could not clone stdin file descriptor \
(thus assuming stdin is not readable): {err}",
);
return false;
}
};
let file = File::from(fd);
let Ok(md) = file.metadata() else { return false };
let md = match file.metadata() {
Ok(md) => md,
Err(err) => {
log::debug!(
"for heuristic stdin detection on Unix, \
could not get file metadata for stdin \
(thus assuming stdin is not readable): {err}",
);
return false;
}
};
let ft = md.file_type();
ft.is_file() || ft.is_fifo() || ft.is_socket()
let is_file = ft.is_file();
let is_fifo = ft.is_fifo();
let is_socket = ft.is_socket();
let is_readable = is_file || is_fifo || is_socket;
log::debug!(
"for heuristic stdin detection on Unix, \
found that \
is_file={is_file}, is_fifo={is_fifo} and is_socket={is_socket}, \
and thus concluded that is_stdin_readable={is_readable}",
);
is_readable
}
#[cfg(windows)]
fn imp() -> bool {
winapi_util::file::typ(winapi_util::HandleRef::stdin())
.map(|t| t.is_disk() || t.is_pipe())
.unwrap_or(false)
let stdin = winapi_util::HandleRef::stdin();
let typ = match winapi_util::file::typ(stdin) {
Ok(typ) => typ,
Err(err) => {
log::debug!(
"for heuristic stdin detection on Windows, \
could not get file type of stdin \
(thus assuming stdin is not readable): {err}",
);
return false;
}
};
let is_disk = typ.is_disk();
let is_pipe = typ.is_pipe();
let is_readable = is_disk || is_pipe;
log::debug!(
"for heuristic stdin detection on Windows, \
found that is_disk={is_disk} and is_pipe={is_pipe}, \
and thus concluded that is_stdin_readable={is_readable}",
);
is_readable
}
#[cfg(not(any(unix, windows)))]
fn imp() -> bool {
log::debug!("on non-{{Unix,Windows}}, assuming stdin is not readable");
false
}

View File

@@ -1,6 +1,6 @@
use std::io::{self, IsTerminal};
use termcolor::{self, HyperlinkSpec};
use termcolor::HyperlinkSpec;
/// A writer that supports coloring with either line or block buffering.
#[derive(Debug)]

View File

@@ -0,0 +1,29 @@
# This is impossible to read, but these encodings rarely if ever change, so
# it probably does not matter. They are derived from the list given here:
# https://encoding.spec.whatwg.org/#concept-encoding-get
#
# The globbing here works in both fish and zsh (though they expand it in
# different orders). It may work in other shells too.
{{,us-}ascii,arabic,chinese,cyrillic,greek{,8},hebrew,korean}
logical visual mac {,cs}macintosh x-mac-{cyrillic,roman,ukrainian}
866 ibm{819,866} csibm866
big5{,-hkscs} {cn-,cs}big5 x-x-big5
cp{819,866,125{0,1,2,3,4,5,6,7,8}} x-cp125{0,1,2,3,4,5,6,7,8}
csiso2022{jp,kr} csiso8859{6,8}{e,i}
csisolatin{1,2,3,4,5,6,9} csisolatin{arabic,cyrillic,greek,hebrew}
ecma-{114,118} asmo-708 elot_928 sun_eu_greek
euc-{jp,kr} x-euc-jp cseuckr cseucpkdfmtjapanese
{,x-}gbk csiso58gb231280 gb18030 {,cs}gb2312 gb_2312{,-80} hz-gb-2312
iso-2022-{cn,cn-ext,jp,kr}
iso8859{,-}{1,2,3,4,5,6,7,8,9,10,11,13,14,15}
iso-8859-{1,2,3,4,5,6,7,8,9,10,11,{6,8}-{e,i},13,14,15,16} iso_8859-{1,2,3,4,5,6,7,8,9,15}
iso_8859-{1,2,6,7}:1987 iso_8859-{3,4,5,8}:1988 iso_8859-9:1989
iso-ir-{58,100,101,109,110,126,127,138,144,148,149,157}
koi{,8,8-r,8-ru,8-u,8_r} cskoi8r
ks_c_5601-{1987,1989} ksc{,_}5691 csksc56011987
latin{1,2,3,4,5,6} l{1,2,3,4,5,6,9}
shift{-,_}jis csshiftjis {,x-}sjis ms_kanji ms932
utf{,-}8 utf-16{,be,le} unicode-1-1-utf-8
windows-{31j,874,949,125{0,1,2,3,4,5,6,7,8}} dos-874 tis-620 ansi_x3.4-1968
x-user-defined auto none

View File

@@ -2,43 +2,70 @@
Provides completions for ripgrep's CLI for the fish shell.
*/
use crate::flags::defs::FLAGS;
use crate::flags::{CompletionType, defs::FLAGS};
const TEMPLATE: &'static str =
"complete -c rg -n '__fish_use_subcommand' !SHORT! !LONG! !DOC!\n";
const TEMPLATE_CHOICES: &'static str =
"complete -c rg -n '__fish_use_subcommand' !SHORT! !LONG! !DOC! -r -f -a '!CHOICES!'\n";
const TEMPLATE: &'static str = "complete -c rg !SHORT! -l !LONG! -d '!DOC!'";
const TEMPLATE_NEGATED: &'static str = "complete -c rg -l !NEGATED! -n '__rg_contains_opt !LONG! !SHORT!' -d '!DOC!'\n";
/// Generate completions for Fish.
///
/// Note that these completions are based on what was produced for ripgrep <=13
/// using Clap 2.x. Improvements on this are welcome.
/// Reference: <https://fishshell.com/docs/current/completions.html>
pub(crate) fn generate() -> String {
let mut out = String::new();
out.push_str(include_str!("prelude.fish"));
out.push('\n');
for flag in FLAGS.iter() {
let short = match flag.name_short() {
None => "".to_string(),
Some(byte) => format!("-s {}", char::from(byte)),
};
let long = format!("-l '{}'", flag.name_long().replace("'", "\\'"));
let doc = format!("-d '{}'", flag.doc_short().replace("'", "\\'"));
let template = if flag.doc_choices().is_empty() {
TEMPLATE.to_string()
} else {
TEMPLATE_CHOICES
.replace("!CHOICES!", &flag.doc_choices().join(" "))
};
out.push_str(
&template
.replace("!SHORT!", &short)
.replace("!LONG!", &long)
.replace("!DOC!", &doc),
);
let long = flag.name_long();
let doc = flag.doc_short().replace("'", "\\'");
let mut completion = TEMPLATE
.replace("!SHORT!", &short)
.replace("!LONG!", &long)
.replace("!DOC!", &doc);
match flag.completion_type() {
CompletionType::Filename => {
completion.push_str(" -r -F");
}
CompletionType::Executable => {
completion.push_str(" -r -f -a '(__fish_complete_command)'");
}
CompletionType::Filetype => {
completion.push_str(
" -r -f -a '(rg --type-list | string replace : \\t)'",
);
}
CompletionType::Encoding => {
completion.push_str(" -r -f -a '");
completion.push_str(super::ENCODINGS);
completion.push_str("'");
}
CompletionType::Other if !flag.doc_choices().is_empty() => {
completion.push_str(" -r -f -a '");
completion.push_str(&flag.doc_choices().join(" "));
completion.push_str("'");
}
CompletionType::Other if !flag.is_switch() => {
completion.push_str(" -r -f");
}
CompletionType::Other => (),
}
completion.push('\n');
out.push_str(&completion);
if let Some(negated) = flag.name_negated() {
let long = format!("-l '{}'", negated.replace("'", "\\'"));
let short = match flag.name_short() {
None => "".to_string(),
Some(byte) => char::from(byte).to_string(),
};
out.push_str(
&TEMPLATE
.replace("!SHORT!", "")
&TEMPLATE_NEGATED
.replace("!NEGATED!", &negated)
.replace("!SHORT!", &short)
.replace("!LONG!", &long)
.replace("!DOC!", &doc),
);

View File

@@ -2,6 +2,8 @@
Modules for generating completions for various shells.
*/
static ENCODINGS: &'static str = include_str!("encodings.sh");
pub(super) mod bash;
pub(super) mod fish;
pub(super) mod powershell;

View File

@@ -34,8 +34,7 @@ Register-ArgumentCompleter -Native -CommandName 'rg' -ScriptBlock {
}
";
const TEMPLATE_FLAG: &'static str =
"[CompletionResult]::new('!DASH_NAME!', '!NAME!', [CompletionResultType]::ParameterName, '!DOC!')";
const TEMPLATE_FLAG: &'static str = "[CompletionResult]::new('!DASH_NAME!', '!NAME!', [CompletionResultType]::ParameterName, '!DOC!')";
/// Generate completions for PowerShell.
///
@@ -72,7 +71,7 @@ pub(crate) fn generate() -> String {
}
if let Some(negated) = flag.name_negated() {
let dash_name = format!("--{}", negated);
let dash_name = format!("--{negated}");
flags.push_str("\n ");
flags.push_str(
&TEMPLATE_FLAG

View File

@@ -0,0 +1,31 @@
# Usage: __rg_contains_opt LONG [SHORT]
function __rg_contains_opt --description 'Specialized __fish_contains_opt'
# Cache the config file because this function is called many times per
# completion attempt.
# The cache will persist for the entire shell session (even if the
# variable or the file contents change).
if not set -q __rg_config
set -g __rg_config
if set -qx RIPGREP_CONFIG_PATH
set __rg_config (
cat -- $RIPGREP_CONFIG_PATH 2>/dev/null \
| string trim \
| string match -rv '^$|^#'
)
end
end
set -l commandline (commandline -cpo) (commandline -ct) $__rg_config
if contains -- "--$argv[1]" $commandline
return 0
end
if set -q argv[2]
if string match -qr -- "^-[^-]*$argv[2]" $commandline
return 0
end
end
return 1
end

View File

@@ -1,7 +1,7 @@
#compdef rg
#compdef rgs
##
# zsh completion function for ripgrep
# zsh completion function for rgs
#
# Run ci/test-complete after building to ensure that the options supported by
# this function stay in synch with the `rg` binary.
@@ -96,6 +96,8 @@ _rg() {
+ '(file-name)' # File-name options
{-H,--with-filename}'[show file name for matches]'
{-I,--no-filename}"[don't show file name for matches]"
'--in-file-index[show per-file match index in output]'
'--no-in-file-index[hide per-file match index in output]'
+ '(file-system)' # File system options
"--one-file-system[don't descend into directories on other file systems]"
@@ -210,6 +212,7 @@ _rg() {
+ '(multiline)' # Multiline options
{-U,--multiline}'[permit matching across multiple lines]'
{-W+,--multiline-window=}'[limit multiline matches to NUM lines (with -U enabled implicitly)]:number of lines'
$no'(multiline-dotall)--no-multiline[restrict matches to at most one line each]'
+ '(multiline-dotall)' # Multiline DOTALL options
@@ -279,6 +282,10 @@ _rg() {
+ '(threads)' # Thread-count options
'(sort)'{-j+,--threads=}'[specify approximate number of threads to use]:number of threads'
+ '(squash)' # Squash options
'--squash[squash contiguous whitespace into a single space]'
'--squash-nl-only[squash new lines into a single space]'
+ '(trim)' # Trim options
'--trim[trim any ASCII whitespace prefix from each line]'
$no"--no-trim[don't trim ASCII whitespace prefix from each line]"
@@ -319,7 +326,7 @@ _rg() {
'--field-context-separator[set string to delimit fields in context lines]'
'--field-match-separator[set string to delimit fields in matching lines]'
'--hostname-bin=[executable for getting system hostname]:hostname executable:_command_names -e'
'--hyperlink-format=[specify pattern for hyperlinks]:pattern'
'--hyperlink-format=[specify pattern for hyperlinks]: :_rg_hyperlink_formats'
'--trace[show more verbose debug messages]'
'--dfa-size-limit=[specify upper size limit of generated DFA]:DFA size (bytes)'
"(1 stats)--files[show each file that would be searched (but don't search)]"
@@ -363,10 +370,11 @@ _rg() {
'column:specify coloring for column numbers'
'line:specify coloring for line numbers'
'match:specify coloring for match text'
'highlight:specify coloring for matching lines'
'path:specify coloring for file names'
)
descr='color/style type'
elif [[ ${IPREFIX#--*=}$PREFIX == (column|line|match|path):[^:]# ]]; then
elif [[ ${IPREFIX#--*=}$PREFIX == (column|line|match|highlight|path):[^:]# ]]; then
suf=( -qS: )
tmp=(
'none:clear color/style for type'
@@ -409,42 +417,20 @@ _rg() {
}
# Complete encodings
(( $+functions[_rg_encodings] )) ||
_rg_encodings() {
local -a expl
local -aU _encodings
# This is impossible to read, but these encodings rarely if ever change, so it
# probably doesn't matter. They are derived from the list given here:
# https://encoding.spec.whatwg.org/#concept-encoding-get
_encodings=(
{{,us-}ascii,arabic,chinese,cyrillic,greek{,8},hebrew,korean}
logical visual mac {,cs}macintosh x-mac-{cyrillic,roman,ukrainian}
866 ibm{819,866} csibm866
big5{,-hkscs} {cn-,cs}big5 x-x-big5
cp{819,866,125{0..8}} x-cp125{0..8}
csiso2022{jp,kr} csiso8859{6,8}{e,i}
csisolatin{{1..6},9} csisolatin{arabic,cyrillic,greek,hebrew}
ecma-{114,118} asmo-708 elot_928 sun_eu_greek
euc-{jp,kr} x-euc-jp cseuckr cseucpkdfmtjapanese
{,x-}gbk csiso58gb231280 gb18030 {,cs}gb2312 gb_2312{,-80} hz-gb-2312
iso-2022-{cn,cn-ext,jp,kr}
iso8859{,-}{{1..11},13,14,15}
iso-8859-{{1..11},{6,8}-{e,i},13,14,15,16} iso_8859-{{1..9},15}
iso_8859-{1,2,6,7}:1987 iso_8859-{3,4,5,8}:1988 iso_8859-9:1989
iso-ir-{58,100,101,109,110,126,127,138,144,148,149,157}
koi{,8,8-r,8-ru,8-u,8_r} cskoi8r
ks_c_5601-{1987,1989} ksc{,_}5691 csksc56011987
latin{1..6} l{{1..6},9}
shift{-,_}jis csshiftjis {,x-}sjis ms_kanji ms932
utf{,-}8 utf-16{,be,le} unicode-1-1-utf-8
windows-{31j,874,949,125{0..8}} dos-874 tis-620 ansi_x3.4-1968
x-user-defined auto none
!ENCODINGS!
)
_wanted encodings expl encoding compadd -a "$@" - _encodings
}
# Complete file types
(( $+functions[_rg_types] )) ||
_rg_types() {
local -a expl
local -aU _types
@@ -458,7 +444,58 @@ _rg_types() {
fi
}
_rg "$@"
# Complete hyperlink format-string aliases
(( $+functions[_rg_hyperlink_format_aliases] )) ||
_rg_hyperlink_format_aliases() {
_describe -t format-aliases 'hyperlink format alias' '(
!HYPERLINK_ALIASES!
)'
}
# Complete custom hyperlink format strings
(( $+functions[_rg_hyperlink_format_strings] )) ||
_rg_hyperlink_format_strings() {
local op='{' ed='}'
local -a pfx sfx rmv
compquote op ed
sfx=( -S $ed )
rmv=( -r ${(q)ed[1]} )
compset -S "$op*"
compset -S "$ed*" && sfx=( -S '' )
compset -P "*$ed"
compset -p ${#PREFIX%$op*}
compset -P $op || pfx=( -P $op )
WSL_DISTRO_NAME=${WSL_DISTRO_NAME:-\$WSL_DISTRO_NAME} \
_describe -t format-variables 'hyperlink format variable' '(
path:"absolute path to file containing match (required)"
host:"system host name or output of --hostname-bin executable"
line:"line number of match"
column:"column of match (requires {line})"
wslprefix:"\"wsl$/$WSL_DISTRO_NAME\" (for WSL share)"
)' "${(@)pfx}" "${(@)sfx}" "${(@)rmv}"
}
# Complete hyperlink formats
(( $+functions[_rg_hyperlink_formats] )) ||
_rg_hyperlink_formats() {
_alternative \
'format-string-aliases: :_rg_hyperlink_format_aliases' \
'format-strings: :_rg_hyperlink_format_strings'
}
# Don't run the completion function when being sourced by itself.
#
# See https://github.com/BurntSushi/ripgrep/issues/2956
# See https://github.com/BurntSushi/ripgrep/pull/2957
if [[ $funcstack[1] == _rg ]] || (( ! $+functions[compdef] )); then
_rg "$@"
else
compdef _rg rg
fi
################################################################################
# ZSH COMPLETION REFERENCE

View File

@@ -19,5 +19,14 @@ long as it meets criteria 3 and 4 above.
/// Generate completions for zsh.
pub(crate) fn generate() -> String {
include_str!("rg.zsh").to_string()
let hyperlink_alias_descriptions = grep::printer::hyperlink_aliases()
.iter()
.map(|alias| {
format!(r#" {}:"{}""#, alias.name(), alias.description())
})
.collect::<Vec<String>>()
.join("\n");
include_str!("rgs.zsh")
.replace("!ENCODINGS!", super::ENCODINGS.trim_end())
.replace("!HYPERLINK_ALIASES!", &hyperlink_alias_descriptions)
}

View File

@@ -10,7 +10,7 @@ use std::{
path::{Path, PathBuf},
};
use bstr::{io::BufReadExt, ByteSlice};
use bstr::{ByteSlice, io::BufReadExt};
/// Return a sequence of arguments derived from ripgrep rc configuration files.
pub fn args() -> Vec<OsString> {

View File

@@ -17,23 +17,25 @@ ripgrep. For example, `-E`, `--encoding` and `--no-encoding` all manipulate the
same encoding state in ripgrep.
*/
use std::path::PathBuf;
use std::{path::PathBuf, sync::LazyLock};
use {anyhow::Context as AnyhowContext, bstr::ByteVec};
use crate::flags::{
Category, Flag, FlagValue,
lowargs::{
BinaryMode, BoundaryMode, BufferMode, CaseMode, ColorChoice,
ContextMode, EncodingMode, EngineChoice, GenerateMode, LoggingMode,
LowArgs, MmapMode, Mode, PatternSource, SearchMode, SortMode,
SortModeKind, SpecialMode, TypeChange,
},
Category, Flag, FlagValue,
};
#[cfg(test)]
use crate::flags::parse::parse_low_raw;
use super::CompletionType;
/// A list of all flags in ripgrep via implementations of `Flag`.
///
/// The order of these flags matter. It determines the order of the flags in
@@ -95,6 +97,7 @@ pub(super) const FLAGS: &[&dyn Flag] = &[
&MaxFilesize,
&Mmap,
&Multiline,
&MultilineWindow,
&MultilineDotall,
&NoConfig,
&NoIgnore,
@@ -131,6 +134,8 @@ pub(super) const FLAGS: &[&dyn Flag] = &[
&Text,
&Threads,
&Trace,
&Squash,
&SquashNlOnly,
&Trim,
&Type,
&TypeNot,
@@ -140,6 +145,7 @@ pub(super) const FLAGS: &[&dyn Flag] = &[
&Unrestricted,
&Version,
&Vimgrep,
&InFileIndex,
&WithFilename,
&WithFilenameNo,
&WordRegexp,
@@ -749,7 +755,8 @@ the \flag{colors} flag to manually set all color styles to \fBnone\fP:
\-\-colors 'path:none' \\
\-\-colors 'line:none' \\
\-\-colors 'column:none' \\
\-\-colors 'match:none'
\-\-colors 'match:none' \\
\-\-colors 'highlight:none'
.EE
.sp
"
@@ -827,21 +834,21 @@ impl Flag for Colors {
"Configure color settings and styles."
}
fn doc_long(&self) -> &'static str {
r"
r#"
This flag specifies color settings for use in the output. This flag may be
provided multiple times. Settings are applied iteratively. Pre-existing color
labels are limited to one of eight choices: \fBred\fP, \fBblue\fP, \fBgreen\fP,
\fBcyan\fP, \fBmagenta\fP, \fByellow\fP, \fBwhite\fP and \fBblack\fP. Styles
are limited to \fBnobold\fP, \fBbold\fP, \fBnointense\fP, \fBintense\fP,
\fBnounderline\fP or \fBunderline\fP.
\fBnounderline\fP, \fBunderline\fP, \fBnoitalic\fP or \fBitalic\fP.
.sp
The format of the flag is
\fB{\fP\fItype\fP\fB}:{\fP\fIattribute\fP\fB}:{\fP\fIvalue\fP\fB}\fP.
\fItype\fP should be one of \fBpath\fP, \fBline\fP, \fBcolumn\fP or
\fBmatch\fP. \fIattribute\fP can be \fBfg\fP, \fBbg\fP or \fBstyle\fP.
\fIvalue\fP is either a color (for \fBfg\fP and \fBbg\fP) or a text style. A
special format, \fB{\fP\fItype\fP\fB}:none\fP, will clear all color settings
for \fItype\fP.
\fItype\fP should be one of \fBpath\fP, \fBline\fP, \fBcolumn\fP,
\fBhighlight\fP or \fBmatch\fP. \fIattribute\fP can be \fBfg\fP, \fBbg\fP or
\fBstyle\fP. \fIvalue\fP is either a color (for \fBfg\fP and \fBbg\fP) or a
text style. A special format, \fB{\fP\fItype\fP\fB}:none\fP, will clear all
color settings for \fItype\fP.
.sp
For example, the following command will change the match color to magenta and
the background color for line numbers to yellow:
@@ -850,6 +857,17 @@ the background color for line numbers to yellow:
rg \-\-colors 'match:fg:magenta' \-\-colors 'line:bg:yellow'
.EE
.sp
Another example, the following command will "highlight" the non-matching text
in matching lines:
.sp
.EX
rg \-\-colors 'highlight:bg:yellow' \-\-colors 'highlight:fg:black'
.EE
.sp
The "highlight" color type is particularly useful for contrasting matching
lines with surrounding context printed by the \flag{before-context},
\flag{after-context}, \flag{context} or \flag{passthru} flags.
.sp
Extended colors can be used for \fIvalue\fP when the tty supports ANSI color
sequences. These are specified as either \fIx\fP (256-color) or
.IB x , x , x
@@ -872,7 +890,7 @@ or, equivalently,
.sp
Note that the \fBintense\fP and \fBnointense\fP styles will have no effect when
used alongside these extended color codes.
"
"#
}
fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> {
@@ -906,6 +924,24 @@ fn test_colors() {
"line:bg:yellow".parse().unwrap()
]
);
let args = parse_low_raw(["--colors", "highlight:bg:240"]).unwrap();
assert_eq!(args.colors, vec!["highlight:bg:240".parse().unwrap()]);
let args = parse_low_raw([
"--colors",
"match:fg:magenta",
"--colors",
"highlight:bg:blue",
])
.unwrap();
assert_eq!(
args.colors,
vec![
"match:fg:magenta".parse().unwrap(),
"highlight:bg:blue".parse().unwrap()
]
);
}
/// --column
@@ -1232,18 +1268,27 @@ impl Flag for Count {
}
fn doc_long(&self) -> &'static str {
r"
This flag suppresses normal output and shows the number of lines that match the
given patterns for each file searched. Each file containing a match has its
path and count printed on each line. Note that unless \flag{multiline}
is enabled, this reports the number of lines that match and not the total
number of matches. In multiline mode, \flag{count} is equivalent to
\flag{count-matches}.
This flag suppresses normal output and shows the number of lines that match
the given patterns for each file searched. Each file containing a match has
its path and count printed on each line. Note that unless \flag{multiline} is
enabled and the pattern(s) given can match over multiple lines, this reports
the number of lines that match and not the total number of matches. When
multiline mode is enabled and the pattern(s) given can match over multiple
lines, \flag{count} is equivalent to \flag{count-matches}.
.sp
If only one file is given to ripgrep, then only the count is printed if there
is a match. The \flag{with-filename} flag can be used to force printing the
file path in this case. If you need a count to be printed regardless of whether
there is a match, then use \flag{include-zero}.
.sp
Note that it is possible for this flag to have results inconsistent with
the output of \flag{files-with-matches}. Notably, by default, ripgrep tries
to avoid searching files with binary data. With this flag, ripgrep needs to
search the entire content of files, which may include binary data. But with
\flag{files-with-matches}, ripgrep can stop as soon as a match is observed,
which may come well before any binary data. To avoid this inconsistency without
disabling binary detection, use the \flag{binary} flag.
.sp
This overrides the \flag{count-matches} flag. Note that when \flag{count}
is combined with \flag{only-matching}, then ripgrep behaves as if
\flag{count-matches} was given.
@@ -1582,6 +1627,9 @@ The encoding detection that ripgrep uses can be reverted to its automatic mode
via the \flag-negate{encoding} flag.
"
}
fn completion_type(&self) -> CompletionType {
CompletionType::Encoding
}
fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> {
let value = match v {
@@ -1977,6 +2025,9 @@ When \flag{file} or \flag{regexp} is used, then ripgrep treats all positional
arguments as files or directories to search.
"
}
fn completion_type(&self) -> CompletionType {
CompletionType::Filename
}
fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> {
let path = PathBuf::from(v.unwrap_value());
@@ -2146,6 +2197,14 @@ impl Flag for FilesWithMatches {
r"
Print only the paths with at least one match and suppress match contents.
.sp
Note that it is possible for this flag to have results inconsistent with the
output of \flag{count}. Notably, by default, ripgrep tries to avoid searching
files with binary data. With this flag, ripgrep might stop searching before
the binary data is observed. But with \flag{count}, ripgrep has to search the
entire contents to determine the match count, which means it might see binary
data that causes it to skip searching that file. To avoid this inconsistency
without disabling binary detection, use the \flag{binary} flag.
.sp
This overrides \flag{files-without-match}.
"
}
@@ -2619,7 +2678,7 @@ of printing the file path as a prefix for each matched line.
This is the default mode when printing to a tty.
.sp
When \fBstdout\fP is not a tty, then ripgrep will default to the standard
grep-like format. Once can force this format in Unix-like environments by
grep-like format. One can force this format in Unix-like environments by
piping the output of ripgrep to \fBcat\fP. For example, \fBrg\fP \fIfoo\fP \fB|
cat\fP.
"
@@ -2738,12 +2797,17 @@ impl Flag for Hidden {
Search hidden files and directories. By default, hidden files and directories
are skipped. Note that if a hidden file or a directory is whitelisted in
an ignore file, then it will be searched even if this flag isn't provided.
Similarly if a hidden file or directory is given explicitly as an argumnet to
Similarly if a hidden file or directory is given explicitly as an argument to
ripgrep.
.sp
A file or directory is considered hidden if its base name starts with a dot
character (\fB.\fP). On operating systems which support a "hidden" file
attribute, like Windows, files with this attribute are also considered hidden.
.sp
Note that \flag{hidden} will include files and folders like \fB.git\fP
regardless of \flag{no-ignore-vcs}. To exclude such paths when using
\flag{hidden}, you must explicitly ignore them using another flag or ignore
file.
"#
}
@@ -2808,6 +2872,9 @@ to calling \fBgethostname\fP. On Windows, this corresponds to calling
ripgrep uses your system's hostname for producing hyperlinks.
"#
}
fn completion_type(&self) -> CompletionType {
CompletionType::Executable
}
fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> {
let path = PathBuf::from(v.unwrap_value());
@@ -2851,7 +2918,10 @@ impl Flag for HyperlinkFormat {
r"Set the format of hyperlinks."
}
fn doc_long(&self) -> &'static str {
r#"
static DOC: LazyLock<String> = LazyLock::new(|| {
let mut doc = String::new();
doc.push_str(
r#"
Set the format of hyperlinks to use when printing results. Hyperlinks make
certain elements of ripgrep's output, such as file paths, clickable. This
generally only works in terminal emulators that support OSC-8 hyperlinks. For
@@ -2859,10 +2929,23 @@ example, the format \fBfile://{host}{path}\fP will emit an RFC 8089 hyperlink.
To see the format that ripgrep is using, pass the \flag{debug} flag.
.sp
Alternatively, a format string may correspond to one of the following aliases:
\fBdefault\fP, \fBnone\fP, \fBfile\fP, \fBgrep+\fP, \fBkitty\fP, \fBmacvim\fP,
\fBtextmate\fP, \fBvscode\fP, \fBvscode-insiders\fP, \fBvscodium\fP. The
alias will be replaced with a format string that is intended to work for the
corresponding application.
"#,
);
let mut aliases = grep::printer::hyperlink_aliases();
aliases.sort_by_key(|alias| {
alias.display_priority().unwrap_or(i16::MAX)
});
for (i, alias) in aliases.iter().enumerate() {
doc.push_str(r"\fB");
doc.push_str(alias.name());
doc.push_str(r"\fP");
doc.push_str(if i < aliases.len() - 1 { ", " } else { "." });
}
doc.push_str(
r#"
The alias will be replaced with a format string that is intended to work for
the corresponding application.
.sp
The following variables are available in the format string:
.sp
@@ -2939,7 +3022,24 @@ in the output. To make the path appear, and thus also a hyperlink, use the
.sp
For more information on hyperlinks in terminal emulators, see:
https://gist.github.com/egmontkob/eb114294efbcd5adb1944c9f3cb5feda
"#
"#,
);
doc
});
&DOC
}
fn doc_choices(&self) -> &'static [&'static str] {
static CHOICES: LazyLock<Vec<String>> = LazyLock::new(|| {
let mut aliases = grep::printer::hyperlink_aliases();
aliases.sort_by_key(|alias| {
alias.display_priority().unwrap_or(i16::MAX)
});
aliases.iter().map(|alias| alias.name().to_string()).collect()
});
static BORROWED: LazyLock<Vec<&'static str>> =
LazyLock::new(|| CHOICES.iter().map(|name| &**name).collect());
&*BORROWED
}
fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> {
@@ -3133,14 +3233,19 @@ impl Flag for IgnoreFile {
Specifies a path to one or more \fBgitignore\fP formatted rules files.
These patterns are applied after the patterns found in \fB.gitignore\fP,
\fB.rgignore\fP and \fB.ignore\fP are applied and are matched relative to the
current working directory. Multiple additional ignore files can be specified
by using this flag repeatedly. When specifying multiple ignore files, earlier
files have lower precedence than later files.
current working directory. That is, files specified via this flag have lower
precedence than files automatically found in the directory tree. Multiple
additional ignore files can be specified by using this flag repeatedly. When
specifying multiple ignore files, earlier files have lower precedence than
later files.
.sp
If you are looking for a way to include or exclude files and directories
directly on the command line, then use \flag{glob} instead.
"
}
fn completion_type(&self) -> CompletionType {
CompletionType::Filename
}
fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> {
let path = PathBuf::from(v.unwrap_value());
@@ -3795,6 +3900,14 @@ impl Flag for MaxCount {
r"
Limit the number of matching lines per file searched to \fINUM\fP.
.sp
When \flag{multiline} is used, a single match that spans multiple lines is only
counted once for the purposes of this limit. Multiple matches in a single line
are counted only once, as they would be in non-multiline mode.
.sp
When combined with \flag{after-context} or \flag{context}, it's possible for
more matches than the maximum to be printed if contextual lines contain a
match.
.sp
Note that \fB0\fP is a legal value but not likely to be useful. When used,
ripgrep won't search anything.
"
@@ -4076,7 +4189,14 @@ This overrides the \flag{stop-on-nonmatch} flag.
}
fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> {
args.multiline = v.unwrap_switch();
let enabled = v.unwrap_switch();
if !enabled && args.multiline_window.is_some() {
anyhow::bail!(
"--no-multiline cannot be used with --multiline-window \
(which implicitly enables --multiline)"
);
}
args.multiline = enabled;
if args.multiline {
args.stop_on_nonmatch = false;
}
@@ -4100,6 +4220,68 @@ fn test_multiline() {
assert_eq!(false, args.multiline);
}
/// --multiline-window
#[derive(Debug)]
struct MultilineWindow;
impl Flag for MultilineWindow {
fn is_switch(&self) -> bool {
false
}
fn name_short(&self) -> Option<u8> {
Some(b'W')
}
fn name_long(&self) -> &'static str {
"multiline-window"
}
fn doc_variable(&self) -> Option<&'static str> {
Some("NUM")
}
fn doc_category(&self) -> Category {
Category::Search
}
fn doc_short(&self) -> &'static str {
r"Limit multiline matches to a fixed number of lines."
}
fn doc_long(&self) -> &'static str {
r#"
Limit the maximum number of lines that a multiline match may span to
\fINUM\fP (use \fB--multiline-window=\fP\fINUM\fP).
.sp
This flag implicitly enables \flag{multiline}. Matches are found as if the file being
searched were limited to \fINUM\fP lines at a time, which can prevent
unintended long matches while still enabling multi-line searching.
.sp
The value of \fINUM\fP must be at least 1.
"#
}
fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> {
let lines = convert::usize(&v.unwrap_value())?;
if lines == 0 {
anyhow::bail!("--multiline-window must be at least 1");
}
args.multiline_window = Some(lines);
args.multiline = true;
Ok(())
}
}
#[cfg(test)]
#[test]
fn test_multiline_window() {
let args = parse_low_raw(None::<&str>).unwrap();
assert_eq!(None, args.multiline_window);
let args = parse_low_raw(["--multiline-window=2"]).unwrap();
assert_eq!(Some(2), args.multiline_window);
assert_eq!(true, args.multiline);
let args = parse_low_raw(["-W", "3"]).unwrap();
assert_eq!(Some(3), args.multiline_window);
assert_eq!(true, args.multiline);
}
/// --multiline-dotall
#[derive(Debug)]
struct MultilineDotall;
@@ -4589,11 +4771,15 @@ impl Flag for NoIgnoreVcs {
}
fn doc_long(&self) -> &'static str {
r"
When given, filter rules from source control ignore files (e.g., \fB.gitignore\fP)
are not respected. By default, ripgrep respects \fBgit\fP's ignore rules for
automatic filtering. In some cases, it may not be desirable to respect the
source control's ignore rules and instead only respect rules in \fB.ignore\fP
or \fB.rgignore\fP.
When given, filter rules from source control ignore files (e.g.,
\fB.gitignore\fP) are not respected. By default, ripgrep respects \fBgit\fP's
ignore rules for automatic filtering. In some cases, it may not be desirable
to respect the source control's ignore rules and instead only respect rules in
\fB.ignore\fP or \fB.rgignore\fP.
.sp
Note that this flag does not directly affect the filtering of source control
files or folders that start with a dot (\fB.\fP), like \fB.git\fP. These are
affected by \flag{hidden} and its related flags instead.
.sp
This flag implies \flag{no-ignore-parent} for source control ignore files as
well.
@@ -5410,6 +5596,9 @@ format, then \fBpzstd\fP is used to decompress the contents to stdout.
This overrides the \flag{search-zip} flag.
"#
}
fn completion_type(&self) -> CompletionType {
CompletionType::Executable
}
fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> {
let path = match v {
@@ -5487,9 +5676,9 @@ don't need preprocessing. For example, given the following shell script,
pdftotext "$1" -
.EE
.sp
then it is possible to use \fB\-\-pre\fP \fIpre-pdftotext\fP \fB--pre-glob
'\fP\fI*.pdf\fP\fB'\fP to make it so ripgrep only executes the
\fIpre-pdftotext\fP command on files with a \fI.pdf\fP extension.
then it is possible to use \fB\-\-pre\fP \fIpre-pdftotext\fP
\fB\-\-pre\-glob\fP '\fI*.pdf\fP' to make it so ripgrep only executes
the \fIpre-pdftotext\fP command on files with a \fI.pdf\fP extension.
.sp
Multiple \flag{pre-glob} flags may be used. Globbing rules match
\fBgitignore\fP globs. Precede a glob with a \fB!\fP to exclude it.
@@ -6695,6 +6884,88 @@ fn test_trace() {
assert_eq!(Some(LoggingMode::Trace), args.logging);
}
/// --squash
#[derive(Debug)]
struct Squash;
impl Flag for Squash {
fn is_switch(&self) -> bool {
true
}
fn name_long(&self) -> &'static str {
"squash"
}
fn doc_category(&self) -> Category {
Category::Output
}
fn doc_short(&self) -> &'static str {
r"Squash contiguous whitespace in output to a single space."
}
fn doc_long(&self) -> &'static str {
r#"
Squash any contiguous Unicode whitespace (including new lines) into a single
ASCII space when printing matches.
"#
}
fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> {
assert!(v.unwrap_switch(), "--squash can only be enabled");
args.squash = grep::printer::SquashMode::Whitespace;
Ok(())
}
}
#[cfg(test)]
#[test]
fn test_squash() {
let args = parse_low_raw(None::<&str>).unwrap();
assert_eq!(grep::printer::SquashMode::None, args.squash);
let args = parse_low_raw(["--squash"]).unwrap();
assert_eq!(grep::printer::SquashMode::Whitespace, args.squash);
}
/// --squash-nl-only
#[derive(Debug)]
struct SquashNlOnly;
impl Flag for SquashNlOnly {
fn is_switch(&self) -> bool {
true
}
fn name_long(&self) -> &'static str {
"squash-nl-only"
}
fn doc_category(&self) -> Category {
Category::Output
}
fn doc_short(&self) -> &'static str {
r"Squash new lines into spaces in output."
}
fn doc_long(&self) -> &'static str {
r#"
Squash contiguous line terminators into a single ASCII space when printing
matches. Other whitespace is preserved.
"#
}
fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> {
assert!(v.unwrap_switch(), "--squash-nl-only can only be enabled");
args.squash = grep::printer::SquashMode::Newlines;
Ok(())
}
}
#[cfg(test)]
#[test]
fn test_squash_nl_only() {
let args = parse_low_raw(None::<&str>).unwrap();
assert_eq!(grep::printer::SquashMode::None, args.squash);
let args = parse_low_raw(["--squash-nl-only"]).unwrap();
assert_eq!(grep::printer::SquashMode::Newlines, args.squash);
}
/// --trim
#[derive(Debug)]
struct Trim;
@@ -6781,6 +7052,9 @@ any rules found in ignore files.
To see the list of available file types, use the \flag{type-list} flag.
"#
}
fn completion_type(&self) -> CompletionType {
CompletionType::Filetype
}
fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> {
args.type_changes.push(TypeChange::Select {
@@ -7000,6 +7274,9 @@ will only search files that are unrecognized by its type definitions.
To see the list of available file types, use the \flag{type-list} flag.
"#
}
fn completion_type(&self) -> CompletionType {
CompletionType::Filetype
}
fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> {
args.type_changes.push(TypeChange::Negate {
@@ -7238,7 +7515,7 @@ impl Flag for Vimgrep {
Category::Output
}
fn doc_short(&self) -> &'static str {
r"Print results im a vim compatible format."
r"Print results in a vim compatible format."
}
fn doc_long(&self) -> &'static str {
r"
@@ -7279,6 +7556,53 @@ fn test_vimgrep() {
assert_eq!(true, args.vimgrep);
}
/// --in-file-index
#[derive(Debug)]
struct InFileIndex;
impl Flag for InFileIndex {
fn is_switch(&self) -> bool {
true
}
fn name_long(&self) -> &'static str {
"in-file-index"
}
fn name_negated(&self) -> Option<&'static str> {
Some("no-in-file-index")
}
fn doc_category(&self) -> Category {
Category::Output
}
fn doc_short(&self) -> &'static str {
r"Prefix matches with an index per file."
}
fn doc_long(&self) -> &'static str {
r"
When enabled, ripgrep prefixes each matching line with an index that is
incremented per file. The format is \fIFILE\fP[\fIN\fP]:\fILINE\fP:, which can
disambiguate multi-line matches that print the same line multiple times.
"
}
fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> {
args.in_file_index = v.unwrap_switch();
Ok(())
}
}
#[cfg(test)]
#[test]
fn test_in_file_index() {
let args = parse_low_raw(None::<&str>).unwrap();
assert_eq!(false, args.in_file_index);
let args = parse_low_raw(["--in-file-index"]).unwrap();
assert_eq!(true, args.in_file_index);
let args = parse_low_raw(["--in-file-index", "--no-in-file-index"]).unwrap();
assert_eq!(false, args.in_file_index);
}
/// --with-filename
#[derive(Debug)]
struct WithFilename;
@@ -7642,9 +7966,10 @@ mod tests {
assert!(
choice.chars().all(|c| c.is_ascii_alphanumeric()
|| c == '-'
|| c == ':'),
|| c == ':'
|| c == '+'),
"choice '{choice}' for flag '{long}' does not match \
^[-:0-9A-Za-z]+$",
^[-+:0-9A-Za-z]+$",
)
}
}

View File

@@ -8,7 +8,7 @@ is used when the `--help` flag is given.
use std::{collections::BTreeMap, fmt::Write};
use crate::flags::{defs::FLAGS, doc::version, Category, Flag};
use crate::flags::{Category, Flag, defs::FLAGS, doc::version};
const TEMPLATE_SHORT: &'static str = include_str!("template.short.help");
const TEMPLATE_LONG: &'static str = include_str!("template.long.help");

View File

@@ -4,7 +4,7 @@ Provides routines for generating ripgrep's man page in `roff` format.
use std::{collections::BTreeMap, fmt::Write};
use crate::flags::{defs::FLAGS, doc::version, Flag};
use crate::flags::{Flag, defs::FLAGS, doc::version};
const TEMPLATE: &'static str = include_str!("template.rg.1");
@@ -53,7 +53,7 @@ fn generate_flag(flag: &'static dyn Flag, out: &mut String) {
write!(out, r", ");
}
let name = flag.name_long();
let name = flag.name_long().replace("-", r"\-");
write!(out, r"\fB\-\-{name}\fP");
if let Some(var) = flag.doc_variable() {
write!(out, r"=\fI{var}\fP");
@@ -71,7 +71,7 @@ fn generate_flag(flag: &'static dyn Flag, out: &mut String) {
if let Some(name) = flag.name_short() {
write!(out, r"\-{}/", char::from(name));
}
write!(out, r"\-\-{}", flag.name_long());
write!(out, r"\-\-{}", flag.name_long().replace("-", r"\-"));
out.push_str(r"\fP");
});
// Convert \flag-negate{foo} into something nicer.

View File

@@ -1,4 +1,4 @@
.TH RG 1 2023-11-26 "!!VERSION!!" "User Commands"
.TH RG 1 2025-10-22 "!!VERSION!!" "User Commands"
.
.
.SH NAME
@@ -43,10 +43,10 @@ configuration file. The file can specify one shell argument per line. Lines
starting with \fB#\fP are ignored. For more details, see \fBCONFIGURATION
FILES\fP below.
.sp
ripgrep will automatically detect if stdin exists and search stdin for a regex
pattern, e.g. \fBls | rg foo\fP. In some environments, stdin may exist when
it shouldn't. To turn off stdin detection, one can explicitly specify the
directory to search, e.g. \fBrg foo ./\fP.
ripgrep will automatically detect if stdin is a readable file and search stdin
for a regex pattern, e.g. \fBls | rg foo\fP. In some environments, stdin may
exist when it shouldn't. To turn off stdin detection, one can explicitly
specify the directory to search, e.g. \fBrg foo ./\fP.
.sp
Like other tools such as \fBls\fP, ripgrep will alter its output depending on
whether stdout is connected to a tty. By default, when printing a tty, ripgrep

View File

@@ -161,9 +161,6 @@ fn compile_cpu_features() -> Vec<String> {
fn features() -> Vec<String> {
let mut features = vec![];
let simd_accel = cfg!(feature = "simd-accel");
features.push(format!("{sign}simd-accel", sign = sign(simd_accel)));
let pcre2 = cfg!(feature = "pcre2");
features.push(format!("{sign}pcre2", sign = sign(pcre2)));
@@ -172,9 +169,5 @@ fn features() -> Vec<String> {
/// Returns `+` when `enabled` is `true` and `-` otherwise.
fn sign(enabled: bool) -> &'static str {
if enabled {
"+"
} else {
"-"
}
if enabled { "+" } else { "-" }
}

View File

@@ -9,7 +9,7 @@ use std::{
use {
bstr::BString,
grep::printer::{ColorSpecs, SummaryKind},
grep::printer::{ColorSpecs, SquashMode, SummaryKind},
};
use crate::{
@@ -45,6 +45,7 @@ pub(crate) struct HiArgs {
context: ContextMode,
context_separator: ContextSeparator,
crlf: bool,
cwd: PathBuf,
dfa_size_limit: Option<usize>,
encoding: EncodingMode,
engine: EngineChoice,
@@ -60,6 +61,7 @@ pub(crate) struct HiArgs {
ignore_file_case_insensitive: bool,
ignore_file: Vec<PathBuf>,
include_zero: bool,
in_file_index: bool,
invert_match: bool,
is_terminal_stdout: bool,
line_number: bool,
@@ -72,6 +74,7 @@ pub(crate) struct HiArgs {
mode: Mode,
multiline: bool,
multiline_dotall: bool,
multiline_window: Option<usize>,
no_ignore_dot: bool,
no_ignore_exclude: bool,
no_ignore_files: bool,
@@ -97,6 +100,7 @@ pub(crate) struct HiArgs {
sort: Option<SortMode>,
stats: Option<grep::printer::Stats>,
stop_on_nonmatch: bool,
squash: SquashMode,
threads: usize,
trim: bool,
types: ignore::types::Types,
@@ -262,6 +266,7 @@ impl HiArgs {
context: low.context,
context_separator: low.context_separator,
crlf: low.crlf,
cwd: state.cwd,
dfa_size_limit: low.dfa_size_limit,
encoding: low.encoding,
engine: low.engine,
@@ -276,6 +281,7 @@ impl HiArgs {
ignore_file: low.ignore_file,
ignore_file_case_insensitive: low.ignore_file_case_insensitive,
include_zero: low.include_zero,
in_file_index: low.in_file_index,
invert_match: low.invert_match,
is_terminal_stdout: state.is_terminal_stdout,
line_number,
@@ -287,6 +293,7 @@ impl HiArgs {
mmap_choice,
multiline: low.multiline,
multiline_dotall: low.multiline_dotall,
multiline_window: low.multiline_window,
no_ignore_dot: low.no_ignore_dot,
no_ignore_exclude: low.no_ignore_exclude,
no_ignore_files: low.no_ignore_files,
@@ -311,6 +318,7 @@ impl HiArgs {
sort: low.sort,
stats,
stop_on_nonmatch: low.stop_on_nonmatch,
squash: low.squash,
threads,
trim: low.trim,
types,
@@ -484,9 +492,9 @@ impl HiArgs {
if self.crlf {
builder.crlf(true);
}
// We don't need to set this in multiline mode since mulitline
// We don't need to set this in multiline mode since multiline
// matchers don't use optimizations related to line terminators.
// Moreover, a mulitline regex used with --null-data should
// Moreover, a multiline regex used with --null-data should
// be allowed to match NUL bytes explicitly, which this would
// otherwise forbid.
if self.null_data {
@@ -517,7 +525,7 @@ impl HiArgs {
/// When this returns false, it is impossible for ripgrep to ever report
/// a match.
pub(crate) fn matches_possible(&self) -> bool {
if self.patterns.patterns.is_empty() {
if self.patterns.patterns.is_empty() && !self.invert_match {
return false;
}
if self.max_count == Some(0) {
@@ -562,7 +570,16 @@ impl HiArgs {
wtr: W,
) -> Printer<W> {
let summary_kind = if self.quiet {
SummaryKind::Quiet
match search_mode {
SearchMode::FilesWithMatches
| SearchMode::Count
| SearchMode::CountMatches
| SearchMode::JSON
| SearchMode::Standard => SummaryKind::QuietWithMatch,
SearchMode::FilesWithoutMatch => {
SummaryKind::QuietWithoutMatch
}
}
} else {
match search_mode {
SearchMode::FilesWithMatches => SummaryKind::PathWithMatch,
@@ -570,10 +587,10 @@ impl HiArgs {
SearchMode::Count => SummaryKind::Count,
SearchMode::CountMatches => SummaryKind::CountMatches,
SearchMode::JSON => {
return Printer::JSON(self.printer_json(wtr))
return Printer::JSON(self.printer_json(wtr));
}
SearchMode::Standard => {
return Printer::Standard(self.printer_standard(wtr))
return Printer::Standard(self.printer_standard(wtr));
}
}
};
@@ -587,8 +604,8 @@ impl HiArgs {
) -> grep::printer::JSON<W> {
grep::printer::JSONBuilder::new()
.pretty(false)
.max_matches(self.max_count)
.always_begin_end(false)
.replacement(self.replace.clone().map(|r| r.into()))
.build(wtr)
}
@@ -605,15 +622,16 @@ impl HiArgs {
.column(self.column)
.heading(self.heading)
.hyperlink(self.hyperlink_config.clone())
.in_file_index(self.in_file_index)
.max_columns_preview(self.max_columns_preview)
.max_columns(self.max_columns)
.max_matches(self.max_count)
.only_matching(self.only_matching)
.path(self.with_filename)
.path_terminator(self.path_terminator.clone())
.per_match_one_line(true)
.per_match(self.vimgrep)
.replacement(self.replace.clone().map(|r| r.into()))
.squash(self.squash)
.separator_context(self.context_separator.clone().into_bytes())
.separator_field_context(
self.field_context_separator.clone().into_bytes(),
@@ -647,7 +665,6 @@ impl HiArgs {
.exclude_zero(!self.include_zero)
.hyperlink(self.hyperlink_config.clone())
.kind(kind)
.max_matches(self.max_count)
.path(self.with_filename)
.path_terminator(self.path_terminator.clone())
.separator_field(b":".to_vec())
@@ -709,10 +726,12 @@ impl HiArgs {
};
let mut builder = grep::searcher::SearcherBuilder::new();
builder
.max_matches(self.max_count)
.line_terminator(line_term)
.invert_match(self.invert_match)
.line_number(self.line_number)
.multi_line(self.multiline)
.multiline_window(self.multiline_window)
.memory_map(self.mmap_choice.clone())
.stop_on_nonmatch(self.stop_on_nonmatch);
match self.context {
@@ -771,7 +790,13 @@ impl HiArgs {
let Some(ref sort) = self.sort else { return Box::new(haystacks) };
let mut with_timestamps: Vec<_> = match sort.kind {
SortModeKind::Path if !sort.reverse => return Box::new(haystacks),
SortModeKind::Path => todo!(),
SortModeKind::Path => {
let mut haystacks = haystacks.collect::<Vec<Haystack>>();
haystacks.sort_by(|ref h1, ref h2| {
h1.path().cmp(h2.path()).reverse()
});
return Box::new(haystacks.into_iter());
}
SortModeKind::LastModified => {
attach_timestamps(haystacks, |md| md.modified()).collect()
}
@@ -782,7 +807,7 @@ impl HiArgs {
attach_timestamps(haystacks, |md| md.created()).collect()
}
};
with_timestamps.sort_by(|(_, ref t1), (_, ref t2)| {
with_timestamps.sort_by(|(_, t1), (_, t2)| {
let ordering = match (*t1, *t2) {
// Both have metadata, do the obvious thing.
(Some(t1), Some(t2)) => t1.cmp(&t2),
@@ -793,11 +818,7 @@ impl HiArgs {
// When both error, we can't distinguish, so treat as equal.
(None, None) => Ordering::Equal,
};
if sort.reverse {
ordering.reverse()
} else {
ordering
}
if sort.reverse { ordering.reverse() } else { ordering }
});
Box::new(with_timestamps.into_iter().map(|(s, _)| s))
}
@@ -887,7 +908,8 @@ impl HiArgs {
.git_ignore(!self.no_ignore_vcs)
.git_exclude(!self.no_ignore_vcs && !self.no_ignore_exclude)
.require_git(!self.no_require_git)
.ignore_case_insensitive(self.ignore_file_case_insensitive);
.ignore_case_insensitive(self.ignore_file_case_insensitive)
.current_dir(&self.cwd);
if !self.no_ignore_dot {
builder.add_custom_ignore_filename(".rgignore");
}
@@ -937,10 +959,12 @@ impl State {
fn new() -> anyhow::Result<State> {
use std::io::IsTerminal;
let cwd = current_dir()?;
log::debug!("read CWD from environment: {}", cwd.display());
Ok(State {
is_terminal_stdout: std::io::stdout().is_terminal(),
stdin_consumed: false,
cwd: current_dir()?,
cwd,
})
}
}
@@ -1074,9 +1098,18 @@ impl Paths {
}
paths.push(path);
}
log::debug!("number of paths given to search: {}", paths.len());
if !paths.is_empty() {
let is_one_file = paths.len() == 1
&& (paths[0] == Path::new("-") || paths[0].is_file());
// Note that we specifically use `!paths[0].is_dir()` here
// instead of `paths[0].is_file()`. Namely, the latter can
// return `false` even when the path is something resembling
// a file. So instead, we just consider the path a file as
// long as we know it isn't a directory.
//
// See: https://github.com/BurntSushi/ripgrep/issues/2736
&& (paths[0] == Path::new("-") || !paths[0].is_dir());
log::debug!("is_one_file? {is_one_file:?}");
return Ok(Paths { paths, has_implicit_path: false, is_one_file });
}
// N.B. is_readable_stdin is a heuristic! Part of the issue is that a
@@ -1163,7 +1196,7 @@ fn types(low: &LowArgs) -> anyhow::Result<ignore::types::Types> {
let mut builder = ignore::types::TypesBuilder::new();
builder.add_defaults();
for tychange in low.type_changes.iter() {
match tychange {
match *tychange {
TypeChange::Clear { ref name } => {
builder.clear(name);
}

View File

@@ -9,7 +9,7 @@ use std::{
use {
bstr::{BString, ByteVec},
grep::printer::{HyperlinkFormat, UserColorSpec},
grep::printer::{HyperlinkFormat, SquashMode, UserColorSpec},
};
/// A collection of "low level" arguments.
@@ -65,6 +65,7 @@ pub(crate) struct LowArgs {
pub(crate) ignore_file: Vec<PathBuf>,
pub(crate) ignore_file_case_insensitive: bool,
pub(crate) include_zero: bool,
pub(crate) in_file_index: bool,
pub(crate) invert_match: bool,
pub(crate) line_number: Option<bool>,
pub(crate) logging: Option<LoggingMode>,
@@ -76,6 +77,7 @@ pub(crate) struct LowArgs {
pub(crate) mmap: MmapMode,
pub(crate) multiline: bool,
pub(crate) multiline_dotall: bool,
pub(crate) multiline_window: Option<usize>,
pub(crate) no_config: bool,
pub(crate) no_ignore_dot: bool,
pub(crate) no_ignore_exclude: bool,
@@ -101,6 +103,7 @@ pub(crate) struct LowArgs {
pub(crate) sort: Option<SortMode>,
pub(crate) stats: bool,
pub(crate) stop_on_nonmatch: bool,
pub(crate) squash: SquashMode,
pub(crate) threads: Option<usize>,
pub(crate) trim: bool,
pub(crate) type_changes: Vec<TypeChange>,
@@ -229,13 +232,14 @@ pub(crate) enum GenerateMode {
}
/// Indicates how ripgrep should treat binary data.
#[derive(Debug, Eq, PartialEq)]
#[derive(Debug, Default, Eq, PartialEq)]
pub(crate) enum BinaryMode {
/// Automatically determine the binary mode to use. Essentially, when
/// a file is searched explicitly, then it will be searched using the
/// `SearchAndSuppress` strategy. Otherwise, it will be searched in a way
/// that attempts to skip binary files as much as possible. That is, once
/// a file is classified as binary, searching will immediately stop.
#[default]
Auto,
/// Search files even when they have binary data, but if a match is found,
/// suppress it and emit a warning.
@@ -251,12 +255,6 @@ pub(crate) enum BinaryMode {
AsText,
}
impl Default for BinaryMode {
fn default() -> BinaryMode {
BinaryMode::Auto
}
}
/// Indicates what kind of boundary mode to use (line or word).
#[derive(Debug, Eq, PartialEq)]
pub(crate) enum BoundaryMode {
@@ -269,10 +267,11 @@ pub(crate) enum BoundaryMode {
/// Indicates the buffer mode that ripgrep should use when printing output.
///
/// The default is `Auto`.
#[derive(Debug, Eq, PartialEq)]
#[derive(Debug, Default, Eq, PartialEq)]
pub(crate) enum BufferMode {
/// Select the buffer mode, 'line' or 'block', automatically based on
/// whether stdout is connected to a tty.
#[default]
Auto,
/// Flush the output buffer whenever a line terminator is seen.
///
@@ -287,18 +286,13 @@ pub(crate) enum BufferMode {
Block,
}
impl Default for BufferMode {
fn default() -> BufferMode {
BufferMode::Auto
}
}
/// Indicates the case mode for how to interpret all patterns given to ripgrep.
///
/// The default is `Sensitive`.
#[derive(Debug, Eq, PartialEq)]
#[derive(Debug, Default, Eq, PartialEq)]
pub(crate) enum CaseMode {
/// Patterns are matched case sensitively. i.e., `a` does not match `A`.
#[default]
Sensitive,
/// Patterns are matched case insensitively. i.e., `a` does match `A`.
Insensitive,
@@ -308,21 +302,16 @@ pub(crate) enum CaseMode {
Smart,
}
impl Default for CaseMode {
fn default() -> CaseMode {
CaseMode::Sensitive
}
}
/// Indicates whether ripgrep should include color/hyperlinks in its output.
///
/// The default is `Auto`.
#[derive(Debug, Eq, PartialEq)]
#[derive(Debug, Default, Eq, PartialEq)]
pub(crate) enum ColorChoice {
/// Color and hyperlinks will never be used.
Never,
/// Color and hyperlinks will be used only when stdout is connected to a
/// tty.
#[default]
Auto,
/// Color will always be used.
Always,
@@ -335,12 +324,6 @@ pub(crate) enum ColorChoice {
Ansi,
}
impl Default for ColorChoice {
fn default() -> ColorChoice {
ColorChoice::Auto
}
}
impl ColorChoice {
/// Convert this color choice to the corresponding termcolor type.
pub(crate) fn to_termcolor(&self) -> termcolor::ColorChoice {
@@ -529,9 +512,10 @@ impl ContextSeparator {
/// The encoding mode the searcher will use.
///
/// The default is `Auto`.
#[derive(Debug, Eq, PartialEq)]
#[derive(Debug, Default, Eq, PartialEq)]
pub(crate) enum EncodingMode {
/// Use only BOM sniffing to auto-detect an encoding.
#[default]
Auto,
/// Use an explicit encoding forcefully, but let BOM sniffing override it.
Some(grep::searcher::Encoding),
@@ -541,21 +525,16 @@ pub(crate) enum EncodingMode {
Disabled,
}
impl Default for EncodingMode {
fn default() -> EncodingMode {
EncodingMode::Auto
}
}
/// The regex engine to use.
///
/// The default is `Default`.
#[derive(Debug, Eq, PartialEq)]
#[derive(Debug, Default, Eq, PartialEq)]
pub(crate) enum EngineChoice {
/// Uses the default regex engine: Rust's `regex` crate.
///
/// (Well, technically it uses `regex-automata`, but `regex-automata` is
/// the implementation of the `regex` crate.)
#[default]
Default,
/// Dynamically select the right engine to use.
///
@@ -566,12 +545,6 @@ pub(crate) enum EngineChoice {
PCRE2,
}
impl Default for EngineChoice {
fn default() -> EngineChoice {
EngineChoice::Default
}
}
/// The field context separator to use to between metadata for each contextual
/// line.
///
@@ -651,10 +624,11 @@ pub(crate) enum LoggingMode {
/// Indicates when to use memory maps.
///
/// The default is `Auto`.
#[derive(Debug, Eq, PartialEq)]
#[derive(Debug, Default, Eq, PartialEq)]
pub(crate) enum MmapMode {
/// This instructs ripgrep to use heuristics for selecting when to and not
/// to use memory maps for searching.
#[default]
Auto,
/// This instructs ripgrep to always try memory maps when possible. (Memory
/// maps are not possible to use in all circumstances, for example, for
@@ -666,12 +640,6 @@ pub(crate) enum MmapMode {
Never,
}
impl Default for MmapMode {
fn default() -> MmapMode {
MmapMode::Auto
}
}
/// Represents a source of patterns that ripgrep should search for.
///
/// The reason to unify these is so that we can retain the order of `-f/--flag`

View File

@@ -36,7 +36,7 @@ pub(crate) use crate::flags::{
},
hiargs::HiArgs,
lowargs::{GenerateMode, Mode, SearchMode, SpecialMode},
parse::{parse, ParseResult},
parse::{ParseResult, parse},
};
mod complete;
@@ -70,7 +70,7 @@ mod parse;
/// value. Flags that accept multiple values are an unsupported abberation.
trait Flag: Debug + Send + Sync + UnwindSafe + RefUnwindSafe + 'static {
/// Returns true if this flag is a switch. When a flag is a switch, the
/// CLI parser will look for a value after the flag is seen.
/// CLI parser will not look for a value after the flag is seen.
fn is_switch(&self) -> bool;
/// A short single byte name for this flag. This returns `None` by default,
@@ -150,6 +150,10 @@ trait Flag: Debug + Send + Sync + UnwindSafe + RefUnwindSafe + 'static {
&[]
}
fn completion_type(&self) -> CompletionType {
CompletionType::Other
}
/// Given the parsed value (which might just be a switch), this should
/// update the state in `args` based on the value given for this flag.
///
@@ -228,6 +232,21 @@ impl Category {
}
}
/// The kind of argument a flag accepts, to be used for shell completions.
#[derive(Clone, Copy, Debug)]
enum CompletionType {
/// No special category. is_switch() and doc_choices() may apply.
Other,
/// A path to a file.
Filename,
/// A command in $PATH.
Executable,
/// The name of a file type, as used by e.g. --type.
Filetype,
/// The name of an encoding_rs encoding, as used by --encoding.
Encoding,
}
/// Represents a value parsed from the command line.
///
/// This doesn't include the corresponding flag, but values come in one of

View File

@@ -7,10 +7,10 @@ use std::{borrow::Cow, collections::BTreeSet, ffi::OsString};
use anyhow::Context;
use crate::flags::{
Flag, FlagValue,
defs::FLAGS,
hiargs::HiArgs,
lowargs::{LoggingMode, LowArgs, SpecialMode},
Flag, FlagValue,
};
/// The result of parsing CLI arguments.
@@ -323,7 +323,7 @@ enum FlagLookup<'a> {
UnrecognizedLong(String),
}
/// The info about a flag associated with a flag's ID in the the flag map.
/// The info about a flag associated with a flag's ID in the flag map.
#[derive(Debug)]
struct FlagInfo {
/// The flag object and its associated metadata.

View File

@@ -6,7 +6,7 @@ print to stderr. We therefore avoid bringing in extra dependencies just for
this functionality.
*/
use log::{self, Log};
use log::Log;
/// The simplest possible logger that logs to stderr.
///

View File

@@ -37,7 +37,7 @@ mod search;
// i686.
#[cfg(all(target_env = "musl", target_pointer_width = "64"))]
#[global_allocator]
static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
static ALLOC: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
/// Then, as it was, then again it will be.
fn main() -> ExitCode {
@@ -468,7 +468,7 @@ fn print_stats<W: Write>(
{bytes_printed} bytes printed
{bytes_searched} bytes searched
{search_time:0.6} seconds spent searching
{process_time:0.6} seconds
{process_time:0.6} seconds total
",
matches = stats.matches(),
lines = stats.matched_lines(),

View File

@@ -28,7 +28,7 @@ static ERRORED: AtomicBool = AtomicBool::new(false);
///
/// This locks stdout, not stderr, even though this prints to stderr. This
/// avoids the appearance of interleaving output when stdout and stderr both
/// correspond to a tty.)
/// correspond to a tty.
#[macro_export]
macro_rules! eprintln_locked {
($($tt:tt)*) => {{
@@ -39,21 +39,29 @@ macro_rules! eprintln_locked {
// lock stdout before printing to stderr. This avoids interleaving
// lines within ripgrep because `search_parallel` uses `termcolor`,
// which accesses the same stdout lock when writing lines.
let stdout = std::io::stdout();
let _handle = stdout.lock();
let stdout = std::io::stdout().lock();
let mut stderr = std::io::stderr().lock();
// We specifically ignore any errors here. One plausible error we
// can get in some cases is a broken pipe error. And when that
// occurs, we should exit gracefully. Otherwise, just abort with
// an error code because there isn't much else we can do.
//
// See: https://github.com/BurntSushi/ripgrep/issues/1966
if let Err(err) = writeln!(std::io::stderr(), $($tt)*) {
if let Err(err) = write!(stderr, "rg: ") {
if err.kind() == std::io::ErrorKind::BrokenPipe {
std::process::exit(0);
} else {
std::process::exit(2);
}
}
if let Err(err) = writeln!(stderr, $($tt)*) {
if err.kind() == std::io::ErrorKind::BrokenPipe {
std::process::exit(0);
} else {
std::process::exit(2);
}
}
drop(stdout);
}
}}
}
@@ -91,19 +99,19 @@ macro_rules! ignore_message {
/// Returns true if and only if messages should be shown.
pub(crate) fn messages() -> bool {
MESSAGES.load(Ordering::SeqCst)
MESSAGES.load(Ordering::Relaxed)
}
/// Set whether messages should be shown or not.
///
/// By default, they are not shown.
pub(crate) fn set_messages(yes: bool) {
MESSAGES.store(yes, Ordering::SeqCst)
MESSAGES.store(yes, Ordering::Relaxed)
}
/// Returns true if and only if "ignore" related messages should be shown.
pub(crate) fn ignore_messages() -> bool {
IGNORE_MESSAGES.load(Ordering::SeqCst)
IGNORE_MESSAGES.load(Ordering::Relaxed)
}
/// Set whether "ignore" related messages should be shown or not.
@@ -114,12 +122,12 @@ pub(crate) fn ignore_messages() -> bool {
/// `messages` is disabled, then "ignore" messages are never shown, regardless
/// of this setting.
pub(crate) fn set_ignore_messages(yes: bool) {
IGNORE_MESSAGES.store(yes, Ordering::SeqCst)
IGNORE_MESSAGES.store(yes, Ordering::Relaxed)
}
/// Returns true if and only if ripgrep came across a non-fatal error.
pub(crate) fn errored() -> bool {
ERRORED.load(Ordering::SeqCst)
ERRORED.load(Ordering::Relaxed)
}
/// Indicate that ripgrep has come across a non-fatal error.
@@ -127,5 +135,5 @@ pub(crate) fn errored() -> bool {
/// Callers should not use this directly. Instead, it is called automatically
/// via the `err_message` macro.
pub(crate) fn set_errored() {
ERRORED.store(true, Ordering::SeqCst);
ERRORED.store(true, Ordering::Relaxed);
}

View File

@@ -41,7 +41,6 @@ impl Default for Config {
pub(crate) struct SearchWorkerBuilder {
config: Config,
command_builder: grep::cli::CommandReaderBuilder,
decomp_builder: grep::cli::DecompressionReaderBuilder,
}
impl Default for SearchWorkerBuilder {
@@ -53,17 +52,10 @@ impl Default for SearchWorkerBuilder {
impl SearchWorkerBuilder {
/// Create a new builder for configuring and constructing a search worker.
pub(crate) fn new() -> SearchWorkerBuilder {
let mut cmd_builder = grep::cli::CommandReaderBuilder::new();
cmd_builder.async_stderr(true);
let mut command_builder = grep::cli::CommandReaderBuilder::new();
command_builder.async_stderr(true);
let mut decomp_builder = grep::cli::DecompressionReaderBuilder::new();
decomp_builder.async_stderr(true);
SearchWorkerBuilder {
config: Config::default(),
command_builder: cmd_builder,
decomp_builder,
}
SearchWorkerBuilder { config: Config::default(), command_builder }
}
/// Create a new search worker using the given searcher, matcher and
@@ -76,7 +68,12 @@ impl SearchWorkerBuilder {
) -> SearchWorker<W> {
let config = self.config.clone();
let command_builder = self.command_builder.clone();
let decomp_builder = self.decomp_builder.clone();
let decomp_builder = config.search_zip.then(|| {
let mut decomp_builder =
grep::cli::DecompressionReaderBuilder::new();
decomp_builder.async_stderr(true);
decomp_builder
});
SearchWorker {
config,
command_builder,
@@ -233,7 +230,11 @@ impl<W: WriteColor> Printer<W> {
pub(crate) struct SearchWorker<W> {
config: Config,
command_builder: grep::cli::CommandReaderBuilder,
decomp_builder: grep::cli::DecompressionReaderBuilder,
/// This is `None` when `search_zip` is not enabled, since in this case it
/// can never be used. We do this because building the reader can sometimes
/// do non-trivial work (like resolving the paths of decompression binaries
/// on Windows).
decomp_builder: Option<grep::cli::DecompressionReaderBuilder>,
matcher: PatternMatcher,
searcher: grep::searcher::Searcher,
printer: Printer<W>,
@@ -273,10 +274,9 @@ impl<W: WriteColor> SearchWorker<W> {
/// Returns true if and only if the given file path should be
/// decompressed before searching.
fn should_decompress(&self, path: &Path) -> bool {
if !self.config.search_zip {
return false;
}
self.decomp_builder.get_matcher().has_command(path)
self.decomp_builder.as_ref().is_some_and(|decomp_builder| {
decomp_builder.get_matcher().has_command(path)
})
}
/// Returns true if and only if the given file path should be run through
@@ -307,15 +307,14 @@ impl<W: WriteColor> SearchWorker<W> {
io::Error::new(
io::ErrorKind::Other,
format!(
"preprocessor command could not start: '{:?}': {}",
cmd, err,
"preprocessor command could not start: '{cmd:?}': {err}",
),
)
})?;
let result = self.search_reader(path, &mut rdr).map_err(|err| {
io::Error::new(
io::ErrorKind::Other,
format!("preprocessor command failed: '{:?}': {}", cmd, err),
format!("preprocessor command failed: '{cmd:?}': {err}"),
)
});
let close_result = rdr.close();
@@ -328,7 +327,10 @@ impl<W: WriteColor> SearchWorker<W> {
/// result. If the given file path isn't recognized as a compressed file,
/// then search it without doing any decompression.
fn search_decompress(&mut self, path: &Path) -> io::Result<SearchResult> {
let mut rdr = self.decomp_builder.build(path)?;
let Some(ref decomp_builder) = self.decomp_builder else {
return self.search_path(path);
};
let mut rdr = decomp_builder.build(path)?;
let result = self.search_reader(path, &mut rdr);
let close_result = rdr.close();
let search_result = result?;

View File

@@ -1,6 +1,6 @@
[package]
name = "globset"
version = "0.4.14" #:version
version = "0.4.18" #:version
authors = ["Andrew Gallant <jamslam@gmail.com>"]
description = """
Cross platform single glob and glob set matching. Glob set matching is the
@@ -13,7 +13,7 @@ repository = "https://github.com/BurntSushi/ripgrep/tree/master/crates/globset"
readme = "README.md"
keywords = ["regex", "glob", "multiple", "set", "pattern"]
license = "Unlicense OR MIT"
edition = "2021"
edition = "2024"
[lib]
name = "globset"
@@ -21,6 +21,7 @@ bench = false
[dependencies]
aho-corasick = "1.1.1"
arbitrary = { version = "1.3.2", optional = true, features = ["derive"] }
bstr = { version = "1.6.2", default-features = false, features = ["std"] }
log = { version = "0.4.20", optional = true }
serde = { version = "1.0.188", optional = true }
@@ -41,6 +42,7 @@ serde_json = "1.0.107"
[features]
default = ["log"]
arbitrary = ["dep:arbitrary"]
# DEPRECATED. It is a no-op. SIMD is done automatically through runtime
# dispatch.
simd-accel = []

View File

@@ -1,8 +1,9 @@
use std::path::{is_separator, Path};
use std::fmt::Write;
use std::path::{Path, is_separator};
use regex_automata::meta::Regex;
use crate::{new_regex, Candidate, Error, ErrorKind};
use crate::{Candidate, Error, ErrorKind, new_regex};
/// Describes a matching strategy for a particular pattern.
///
@@ -70,7 +71,8 @@ impl MatchStrategy {
///
/// It cannot be used directly to match file paths, but it can be converted
/// to a regular expression string or a matcher.
#[derive(Clone, Debug, Eq)]
#[derive(Clone, Eq)]
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
pub struct Glob {
glob: String,
re: String,
@@ -78,6 +80,12 @@ pub struct Glob {
tokens: Tokens,
}
impl AsRef<Glob> for Glob {
fn as_ref(&self) -> &Glob {
self
}
}
impl PartialEq for Glob {
fn eq(&self, other: &Glob) -> bool {
self.glob == other.glob && self.opts == other.opts
@@ -91,6 +99,21 @@ impl std::hash::Hash for Glob {
}
}
impl std::fmt::Debug for Glob {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
if f.alternate() {
f.debug_struct("Glob")
.field("glob", &self.glob)
.field("re", &self.re)
.field("opts", &self.opts)
.field("tokens", &self.tokens)
.finish()
} else {
f.debug_tuple("Glob").field(&self.glob).finish()
}
}
}
impl std::fmt::Display for Glob {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.glob.fmt(f)
@@ -193,6 +216,7 @@ pub struct GlobBuilder<'a> {
}
#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
struct GlobOptions {
/// Whether to match case insensitively.
case_insensitive: bool,
@@ -205,6 +229,11 @@ struct GlobOptions {
/// Whether or not an empty case in an alternate will be removed.
/// e.g., when enabled, `{,a}` will match "" and "a".
empty_alternates: bool,
/// Whether or not an unclosed character class is allowed. When an unclosed
/// character class is found, the opening `[` is treated as a literal `[`.
/// When this isn't enabled, an opening `[` without a corresponding `]` is
/// treated as an error.
allow_unclosed_class: bool,
}
impl GlobOptions {
@@ -214,11 +243,13 @@ impl GlobOptions {
literal_separator: false,
backslash_escape: !is_separator('\\'),
empty_alternates: false,
allow_unclosed_class: false,
}
}
}
#[derive(Clone, Debug, Default, Eq, PartialEq)]
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
struct Tokens(Vec<Token>);
impl std::ops::Deref for Tokens {
@@ -235,6 +266,7 @@ impl std::ops::DerefMut for Tokens {
}
#[derive(Clone, Debug, Eq, PartialEq)]
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
enum Token {
Literal(char),
Any,
@@ -308,11 +340,7 @@ impl Glob {
let Token::Literal(c) = *t else { return None };
lit.push(c);
}
if lit.is_empty() {
None
} else {
Some(lit)
}
if lit.is_empty() { None } else { Some(lit) }
}
/// Returns an extension if this pattern matches a file path if and only
@@ -353,11 +381,7 @@ impl Glob {
_ => return None,
}
}
if lit.is_empty() {
None
} else {
Some(lit)
}
if lit.is_empty() { None } else { Some(lit) }
}
/// This is like `ext`, but returns an extension even if it isn't sufficient
@@ -420,11 +444,7 @@ impl Glob {
if need_sep {
lit.push('/');
}
if lit.is_empty() {
None
} else {
Some(lit)
}
if lit.is_empty() { None } else { Some(lit) }
}
/// Returns a literal suffix of this pattern if the entire pattern matches
@@ -473,11 +493,7 @@ impl Glob {
let Token::Literal(c) = *t else { return None };
lit.push(c);
}
if lit.is_empty() || lit == "/" {
None
} else {
Some((lit, entire))
}
if lit.is_empty() || lit == "/" { None } else { Some((lit, entire)) }
}
/// If this pattern only needs to inspect the basename of a file path,
@@ -563,25 +579,27 @@ impl<'a> GlobBuilder<'a> {
pub fn build(&self) -> Result<Glob, Error> {
let mut p = Parser {
glob: &self.glob,
stack: vec![Tokens::default()],
alternates_stack: Vec::new(),
branches: vec![Tokens::default()],
chars: self.glob.chars().peekable(),
prev: None,
cur: None,
found_unclosed_class: false,
opts: &self.opts,
};
p.parse()?;
if p.stack.is_empty() {
Err(Error {
glob: Some(self.glob.to_string()),
kind: ErrorKind::UnopenedAlternates,
})
} else if p.stack.len() > 1 {
if p.branches.is_empty() {
// OK because of how the the branches/alternate_stack are managed.
// If we end up here, then there *must* be a bug in the parser
// somewhere.
unreachable!()
} else if p.branches.len() > 1 {
Err(Error {
glob: Some(self.glob.to_string()),
kind: ErrorKind::UnclosedAlternates,
})
} else {
let tokens = p.stack.pop().unwrap();
let tokens = p.branches.pop().unwrap();
Ok(Glob {
glob: self.glob.to_string(),
re: tokens.to_regex_with(&self.opts),
@@ -630,6 +648,22 @@ impl<'a> GlobBuilder<'a> {
self.opts.empty_alternates = yes;
self
}
/// Toggle whether unclosed character classes are allowed. When allowed,
/// a `[` without a matching `]` is treated literally instead of resulting
/// in a parse error.
///
/// For example, if this is set then the glob `[abc` will be treated as the
/// literal string `[abc` instead of returning an error.
///
/// By default, this is false. Generally speaking, enabling this leads to
/// worse failure modes since the glob parser becomes more permissive. You
/// might want to enable this when compatibility (e.g., with POSIX glob
/// implementations) is more important than good error messages.
pub fn allow_unclosed_class(&mut self, yes: bool) -> &mut GlobBuilder<'a> {
self.opts.allow_unclosed_class = yes;
self
}
}
impl Tokens {
@@ -732,7 +766,9 @@ impl Tokens {
/// Convert a Unicode scalar value to an escaped string suitable for use as
/// a literal in a non-Unicode regex.
fn char_to_escaped_literal(c: char) -> String {
bytes_to_escaped_literal(&c.to_string().into_bytes())
let mut buf = [0; 4];
let bytes = c.encode_utf8(&mut buf).as_bytes();
bytes_to_escaped_literal(bytes)
}
/// Converts an arbitrary sequence of bytes to a UTF-8 string. All non-ASCII
@@ -741,22 +777,41 @@ fn bytes_to_escaped_literal(bs: &[u8]) -> String {
let mut s = String::with_capacity(bs.len());
for &b in bs {
if b <= 0x7F {
s.push_str(&regex_syntax::escape(
regex_syntax::escape_into(
char::from(b).encode_utf8(&mut [0; 4]),
));
&mut s,
);
} else {
s.push_str(&format!("\\x{:02x}", b));
write!(&mut s, "\\x{:02x}", b).unwrap();
}
}
s
}
struct Parser<'a> {
/// The glob to parse.
glob: &'a str,
stack: Vec<Tokens>,
/// Marks the index in `stack` where the alternation started.
alternates_stack: Vec<usize>,
/// The set of active alternation branches being parsed.
/// Tokens are added to the end of the last one.
branches: Vec<Tokens>,
/// A character iterator over the glob pattern to parse.
chars: std::iter::Peekable<std::str::Chars<'a>>,
/// The previous character seen.
prev: Option<char>,
/// The current character.
cur: Option<char>,
/// Whether we failed to find a closing `]` for a character
/// class. This can only be true when `GlobOptions::allow_unclosed_class`
/// is enabled. When enabled, it is impossible to ever parse another
/// character class with this glob. That's because classes cannot be
/// nested *and* the only way this happens is when there is never a `]`.
///
/// We track this state so that we don't end up spending quadratic time
/// trying to parse something like `[[[[[[[[[[[[[[[[[[[[[[[...`.
found_unclosed_class: bool,
/// Glob options, which may influence parsing.
opts: &'a GlobOptions,
}
@@ -770,7 +825,7 @@ impl<'a> Parser<'a> {
match c {
'?' => self.push_token(Token::Any)?,
'*' => self.parse_star()?,
'[' => self.parse_class()?,
'[' if !self.found_unclosed_class => self.parse_class()?,
'{' => self.push_alternate()?,
'}' => self.pop_alternate()?,
',' => self.parse_comma()?,
@@ -782,36 +837,37 @@ impl<'a> Parser<'a> {
}
fn push_alternate(&mut self) -> Result<(), Error> {
if self.stack.len() > 1 {
return Err(self.error(ErrorKind::NestedAlternates));
}
Ok(self.stack.push(Tokens::default()))
self.alternates_stack.push(self.branches.len());
self.branches.push(Tokens::default());
Ok(())
}
fn pop_alternate(&mut self) -> Result<(), Error> {
let mut alts = vec![];
while self.stack.len() >= 2 {
alts.push(self.stack.pop().unwrap());
}
self.push_token(Token::Alternates(alts))
let Some(start) = self.alternates_stack.pop() else {
return Err(self.error(ErrorKind::UnopenedAlternates));
};
assert!(start <= self.branches.len());
let alts = Token::Alternates(self.branches.drain(start..).collect());
self.push_token(alts)?;
Ok(())
}
fn push_token(&mut self, tok: Token) -> Result<(), Error> {
if let Some(ref mut pat) = self.stack.last_mut() {
if let Some(ref mut pat) = self.branches.last_mut() {
return Ok(pat.push(tok));
}
Err(self.error(ErrorKind::UnopenedAlternates))
}
fn pop_token(&mut self) -> Result<Token, Error> {
if let Some(ref mut pat) = self.stack.last_mut() {
if let Some(ref mut pat) = self.branches.last_mut() {
return Ok(pat.pop().unwrap());
}
Err(self.error(ErrorKind::UnopenedAlternates))
}
fn have_tokens(&self) -> Result<bool, Error> {
match self.stack.last() {
match self.branches.last() {
None => Err(self.error(ErrorKind::UnopenedAlternates)),
Some(ref pat) => Ok(!pat.is_empty()),
}
@@ -820,11 +876,11 @@ impl<'a> Parser<'a> {
fn parse_comma(&mut self) -> Result<(), Error> {
// If we aren't inside a group alternation, then don't
// treat commas specially. Otherwise, we need to start
// a new alternate.
if self.stack.len() <= 1 {
// a new alternate branch.
if self.alternates_stack.is_empty() {
self.push_token(Token::Literal(','))
} else {
Ok(self.stack.push(Tokens::default()))
Ok(self.branches.push(Tokens::default()))
}
}
@@ -861,7 +917,7 @@ impl<'a> Parser<'a> {
}
if !prev.map(is_separator).unwrap_or(false) {
if self.stack.len() <= 1
if self.branches.len() <= 1
|| (prev != Some(',') && prev != Some('{'))
{
self.push_token(Token::ZeroOrMore)?;
@@ -874,7 +930,7 @@ impl<'a> Parser<'a> {
assert!(self.bump().is_none());
true
}
Some(',') | Some('}') if self.stack.len() >= 2 => true,
Some(',') | Some('}') if self.branches.len() >= 2 => true,
Some(c) if is_separator(c) => {
assert!(self.bump().map(is_separator).unwrap_or(false));
false
@@ -904,6 +960,11 @@ impl<'a> Parser<'a> {
}
fn parse_class(&mut self) -> Result<(), Error> {
// Save parser state for potential rollback to literal '[' parsing.
let saved_chars = self.chars.clone();
let saved_prev = self.prev;
let saved_cur = self.cur;
fn add_to_last_range(
glob: &str,
r: &mut (char, char),
@@ -931,11 +992,17 @@ impl<'a> Parser<'a> {
let mut first = true;
let mut in_range = false;
loop {
let c = match self.bump() {
Some(c) => c,
// The only way to successfully break this loop is to observe
// a ']'.
None => return Err(self.error(ErrorKind::UnclosedClass)),
let Some(c) = self.bump() else {
return if self.opts.allow_unclosed_class == true {
self.chars = saved_chars;
self.cur = saved_cur;
self.prev = saved_prev;
self.found_unclosed_class = true;
self.push_token(Token::Literal('['))
} else {
Err(self.error(ErrorKind::UnclosedClass))
};
};
match c {
']' => {
@@ -1020,6 +1087,7 @@ mod tests {
litsep: Option<bool>,
bsesc: Option<bool>,
ealtre: Option<bool>,
unccls: Option<bool>,
}
macro_rules! syntax {
@@ -1062,6 +1130,10 @@ mod tests {
if let Some(ealtre) = $options.ealtre {
builder.empty_alternates(ealtre);
}
if let Some(unccls) = $options.unccls {
builder.allow_unclosed_class(unccls);
}
let pat = builder.build().unwrap();
assert_eq!(format!("(?-u){}", $re), pat.regex());
}
@@ -1202,25 +1274,80 @@ mod tests {
syntaxerr!(err_unclosed4, "[!]", ErrorKind::UnclosedClass);
syntaxerr!(err_range1, "[z-a]", ErrorKind::InvalidRange('z', 'a'));
syntaxerr!(err_range2, "[z--]", ErrorKind::InvalidRange('z', '-'));
syntaxerr!(err_alt1, "{a,b", ErrorKind::UnclosedAlternates);
syntaxerr!(err_alt2, "{a,{b,c}", ErrorKind::UnclosedAlternates);
syntaxerr!(err_alt3, "a,b}", ErrorKind::UnopenedAlternates);
syntaxerr!(err_alt4, "{a,b}}", ErrorKind::UnopenedAlternates);
const CASEI: Options =
Options { casei: Some(true), litsep: None, bsesc: None, ealtre: None };
const SLASHLIT: Options =
Options { casei: None, litsep: Some(true), bsesc: None, ealtre: None };
const CASEI: Options = Options {
casei: Some(true),
litsep: None,
bsesc: None,
ealtre: None,
unccls: None,
};
const SLASHLIT: Options = Options {
casei: None,
litsep: Some(true),
bsesc: None,
ealtre: None,
unccls: None,
};
const NOBSESC: Options = Options {
casei: None,
litsep: None,
bsesc: Some(false),
ealtre: None,
unccls: None,
};
const BSESC: Options = Options {
casei: None,
litsep: None,
bsesc: Some(true),
ealtre: None,
unccls: None,
};
const BSESC: Options =
Options { casei: None, litsep: None, bsesc: Some(true), ealtre: None };
const EALTRE: Options = Options {
casei: None,
litsep: None,
bsesc: Some(true),
ealtre: Some(true),
unccls: None,
};
const UNCCLS: Options = Options {
casei: None,
litsep: None,
bsesc: None,
ealtre: None,
unccls: Some(true),
};
toregex!(allow_unclosed_class_single, r"[", r"^\[$", &UNCCLS);
toregex!(allow_unclosed_class_many, r"[abc", r"^\[abc$", &UNCCLS);
toregex!(allow_unclosed_class_empty1, r"[]", r"^\[\]$", &UNCCLS);
toregex!(allow_unclosed_class_empty2, r"[][", r"^\[\]\[$", &UNCCLS);
toregex!(allow_unclosed_class_negated_unclosed, r"[!", r"^\[!$", &UNCCLS);
toregex!(allow_unclosed_class_negated_empty, r"[!]", r"^\[!\]$", &UNCCLS);
toregex!(
allow_unclosed_class_brace1,
r"{[abc,xyz}",
r"^(?:\[abc|xyz)$",
&UNCCLS
);
toregex!(
allow_unclosed_class_brace2,
r"{[abc,[xyz}",
r"^(?:\[abc|\[xyz)$",
&UNCCLS
);
toregex!(
allow_unclosed_class_brace3,
r"{[abc],[xyz}",
r"^(?:[abc]|\[xyz)$",
&UNCCLS
);
toregex!(re_empty, "", "^$");
toregex!(re_casei, "a", "(?i)^a$", &CASEI);
@@ -1261,7 +1388,9 @@ mod tests {
toregex!(re32, "/a**", r"^/a.*.*$");
toregex!(re33, "/**a", r"^/.*.*a$");
toregex!(re34, "/a**b", r"^/a.*.*b$");
toregex!(re35, "{a,b}", r"^(?:b|a)$");
toregex!(re35, "{a,b}", r"^(?:a|b)$");
toregex!(re36, "{a,{b,c}}", r"^(?:a|(?:b|c))$");
toregex!(re37, "{{a,b},{c,d}}", r"^(?:(?:a|b)|(?:c|d))$");
matches!(match1, "a", "a");
matches!(match2, "a*b", "a_b");
@@ -1349,6 +1478,9 @@ mod tests {
matches!(matchalt14, "foo{,.txt}", "foo.txt");
nmatches!(matchalt15, "foo{,.txt}", "foo");
matches!(matchalt16, "foo{,.txt}", "foo", EALTRE);
matches!(matchalt17, "{a,b{c,d}}", "bc");
matches!(matchalt18, "{a,b{c,d}}", "bd");
matches!(matchalt19, "{a,b{c,d}}", "a");
matches!(matchslash1, "abc/def", "abc/def", SLASHLIT);
#[cfg(unix)]

View File

@@ -94,6 +94,19 @@ Standard Unix-style glob syntax is supported:
A `GlobBuilder` can be used to prevent wildcards from matching path separators,
or to enable case insensitive matching.
# Crate Features
This crate includes optional features that can be enabled if necessary.
These features are not required but may be useful depending on the use case.
The following features are available:
* **arbitrary** -
Enabling this feature introduces a public dependency on the
[`arbitrary`](https://crates.io/crates/arbitrary)
crate. Namely, it implements the `Arbitrary` trait from that crate for the
[`Glob`] type. This feature is disabled by default.
*/
#![deny(missing_docs)]
@@ -107,11 +120,11 @@ use std::{
use {
aho_corasick::AhoCorasick,
bstr::{ByteSlice, ByteVec, B},
bstr::{B, ByteSlice, ByteVec},
regex_automata::{
PatternSet,
meta::Regex,
util::pool::{Pool, PoolGuard},
PatternSet,
},
};
@@ -150,6 +163,7 @@ pub struct Error {
/// The kind of error that can occur when parsing a glob pattern.
#[derive(Clone, Debug, Eq, PartialEq)]
#[non_exhaustive]
pub enum ErrorKind {
/// **DEPRECATED**.
///
@@ -169,20 +183,16 @@ pub enum ErrorKind {
UnopenedAlternates,
/// Occurs when a `{` is found without a matching `}`.
UnclosedAlternates,
/// Occurs when an alternating group is nested inside another alternating
/// group, e.g., `{{a,b},{c,d}}`.
/// **DEPRECATED**.
///
/// This error used to occur when an alternating group was nested inside
/// another alternating group, e.g., `{{a,b},{c,d}}`. However, this is now
/// supported and as such this error cannot occur.
NestedAlternates,
/// Occurs when an unescaped '\' is found at the end of a glob.
DanglingEscape,
/// An error associated with parsing or compiling a regex.
Regex(String),
/// Hints that destructuring should not be exhaustive.
///
/// This enum may grow additional variants, so this makes sure clients
/// don't count on exhaustive matching. (Otherwise, adding a new variant
/// could break existing code.)
#[doc(hidden)]
__Nonexhaustive,
}
impl std::error::Error for Error {
@@ -226,7 +236,6 @@ impl ErrorKind {
}
ErrorKind::DanglingEscape => "dangling '\\'",
ErrorKind::Regex(ref err) => err,
ErrorKind::__Nonexhaustive => unreachable!(),
}
}
}
@@ -255,7 +264,6 @@ impl std::fmt::Display for ErrorKind {
ErrorKind::InvalidRange(s, e) => {
write!(f, "invalid range; '{}' > '{}'", s, e)
}
ErrorKind::__Nonexhaustive => unreachable!(),
}
}
}
@@ -314,7 +322,7 @@ impl GlobSet {
/// Create an empty `GlobSet`. An empty set matches nothing.
#[inline]
pub fn empty() -> GlobSet {
pub const fn empty() -> GlobSet {
GlobSet { len: 0, strats: vec![] }
}
@@ -351,6 +359,43 @@ impl GlobSet {
false
}
/// Returns true if all globs in this set match the path given.
///
/// This will return true if the set of globs is empty, as in that case all
/// `0` of the globs will match.
///
/// ```
/// use globset::{Glob, GlobSetBuilder};
///
/// let mut builder = GlobSetBuilder::new();
/// builder.add(Glob::new("src/*").unwrap());
/// builder.add(Glob::new("**/*.rs").unwrap());
/// let set = builder.build().unwrap();
///
/// assert!(set.matches_all("src/foo.rs"));
/// assert!(!set.matches_all("src/bar.c"));
/// assert!(!set.matches_all("test.rs"));
/// ```
pub fn matches_all<P: AsRef<Path>>(&self, path: P) -> bool {
self.matches_all_candidate(&Candidate::new(path.as_ref()))
}
/// Returns ture if all globs in this set match the path given.
///
/// This takes a Candidate as input, which can be used to amortize the cost
/// of peparing a path for matching.
///
/// This will return true if the set of globs is empty, as in that case all
/// `0` of the globs will match.
pub fn matches_all_candidate(&self, path: &Candidate<'_>) -> bool {
for strat in &self.strats {
if !strat.is_match(path) {
return false;
}
}
true
}
/// Returns the sequence number of every glob pattern that matches the
/// given path.
pub fn matches<P: AsRef<Path>>(&self, path: P) -> Vec<usize> {
@@ -410,10 +455,20 @@ impl GlobSet {
into.dedup();
}
fn new(pats: &[Glob]) -> Result<GlobSet, Error> {
if pats.is_empty() {
return Ok(GlobSet { len: 0, strats: vec![] });
/// Builds a new matcher from a collection of Glob patterns.
///
/// Once a matcher is built, no new patterns can be added to it.
pub fn new<I, G>(globs: I) -> Result<GlobSet, Error>
where
I: IntoIterator<Item = G>,
G: AsRef<Glob>,
{
let mut it = globs.into_iter().peekable();
if it.peek().is_none() {
return Ok(GlobSet::empty());
}
let mut len = 0;
let mut lits = LiteralStrategy::new();
let mut base_lits = BasenameLiteralStrategy::new();
let mut exts = ExtensionStrategy::new();
@@ -421,7 +476,10 @@ impl GlobSet {
let mut suffixes = MultiStrategyBuilder::new();
let mut required_exts = RequiredExtensionStrategyBuilder::new();
let mut regexes = MultiStrategyBuilder::new();
for (i, p) in pats.iter().enumerate() {
for (i, p) in it.enumerate() {
len += 1;
let p = p.as_ref();
match MatchStrategy::new(p) {
MatchStrategy::Literal(lit) => {
lits.add(i, lit);
@@ -445,7 +503,11 @@ impl GlobSet {
required_exts.add(i, ext, p.regex().to_owned());
}
MatchStrategy::Regex => {
debug!("glob converted to regex: {:?}", p);
debug!(
"glob `{:?}` converted to regex: `{:?}`",
p,
p.regex()
);
regexes.add(i, p.regex().to_owned());
}
}
@@ -461,20 +523,33 @@ impl GlobSet {
required_exts.0.len(),
regexes.literals.len()
);
Ok(GlobSet {
len: pats.len(),
strats: vec![
GlobSetMatchStrategy::Extension(exts),
GlobSetMatchStrategy::BasenameLiteral(base_lits),
GlobSetMatchStrategy::Literal(lits),
GlobSetMatchStrategy::Suffix(suffixes.suffix()),
GlobSetMatchStrategy::Prefix(prefixes.prefix()),
GlobSetMatchStrategy::RequiredExtension(
required_exts.build()?,
),
GlobSetMatchStrategy::Regex(regexes.regex_set()?),
],
})
let mut strats = Vec::with_capacity(7);
// Only add strategies that are populated
if !exts.0.is_empty() {
strats.push(GlobSetMatchStrategy::Extension(exts));
}
if !base_lits.0.is_empty() {
strats.push(GlobSetMatchStrategy::BasenameLiteral(base_lits));
}
if !lits.0.is_empty() {
strats.push(GlobSetMatchStrategy::Literal(lits));
}
if !suffixes.is_empty() {
strats.push(GlobSetMatchStrategy::Suffix(suffixes.suffix()));
}
if !prefixes.is_empty() {
strats.push(GlobSetMatchStrategy::Prefix(prefixes.prefix()));
}
if !required_exts.0.is_empty() {
strats.push(GlobSetMatchStrategy::RequiredExtension(
required_exts.build()?,
));
}
if !regexes.is_empty() {
strats.push(GlobSetMatchStrategy::Regex(regexes.regex_set()?));
}
Ok(GlobSet { len, strats })
}
}
@@ -504,7 +579,7 @@ impl GlobSetBuilder {
///
/// Once a matcher is built, no new patterns can be added to it.
pub fn build(&self) -> Result<GlobSet, Error> {
GlobSet::new(&self.pats)
GlobSet::new(self.pats.iter())
}
/// Add a new pattern to this set.
@@ -540,18 +615,30 @@ impl<'a> std::fmt::Debug for Candidate<'a> {
impl<'a> Candidate<'a> {
/// Create a new candidate for matching from the given path.
pub fn new<P: AsRef<Path> + ?Sized>(path: &'a P) -> Candidate<'a> {
let path = normalize_path(Vec::from_path_lossy(path.as_ref()));
Self::from_cow(Vec::from_path_lossy(path.as_ref()))
}
/// Create a new candidate for matching from the given path as a sequence
/// of bytes.
///
/// Generally speaking, this routine expects the bytes to be
/// _conventionally_ UTF-8. It is legal for the byte sequence to contain
/// invalid UTF-8. However, if the bytes are in some other encoding that
/// isn't ASCII compatible (for example, UTF-16), then the results of
/// matching are unspecified.
pub fn from_bytes<P: AsRef<[u8]> + ?Sized>(path: &'a P) -> Candidate<'a> {
Self::from_cow(Cow::Borrowed(path.as_ref()))
}
fn from_cow(path: Cow<'a, [u8]>) -> Candidate<'a> {
let path = normalize_path(path);
let basename = file_name(&path).unwrap_or(Cow::Borrowed(B("")));
let ext = file_name_ext(&basename).unwrap_or(Cow::Borrowed(B("")));
Candidate { path, basename, ext }
}
fn path_prefix(&self, max: usize) -> &[u8] {
if self.path.len() <= max {
&*self.path
} else {
&self.path[..max]
}
if self.path.len() <= max { &*self.path } else { &self.path[..max] }
}
fn path_suffix(&self, max: usize) -> &[u8] {
@@ -892,6 +979,10 @@ impl MultiStrategyBuilder {
patset: Arc::new(Pool::new(create)),
})
}
fn is_empty(&self) -> bool {
self.literals.is_empty()
}
}
#[derive(Clone, Debug)]
@@ -928,13 +1019,26 @@ impl RequiredExtensionStrategyBuilder {
///
/// The escaping works by surrounding meta-characters with brackets. For
/// example, `*` becomes `[*]`.
///
/// # Example
///
/// ```
/// use globset::escape;
///
/// assert_eq!(escape("foo*bar"), "foo[*]bar");
/// assert_eq!(escape("foo?bar"), "foo[?]bar");
/// assert_eq!(escape("foo[bar"), "foo[[]bar");
/// assert_eq!(escape("foo]bar"), "foo[]]bar");
/// assert_eq!(escape("foo{bar"), "foo[{]bar");
/// assert_eq!(escape("foo}bar"), "foo[}]bar");
/// ```
pub fn escape(s: &str) -> String {
let mut escaped = String::with_capacity(s.len());
for c in s.chars() {
match c {
// note that ! does not need escaping because it is only special
// inside brackets
'?' | '*' | '[' | ']' => {
'?' | '*' | '[' | ']' | '{' | '}' => {
escaped.push('[');
escaped.push(c);
escaped.push(']');
@@ -979,6 +1083,7 @@ mod tests {
let set = GlobSetBuilder::new().build().unwrap();
assert!(!set.is_match(""));
assert!(!set.is_match("a"));
assert!(set.matches_all("a"));
}
#[test]
@@ -1019,4 +1124,16 @@ mod tests {
let matches = set.matches("nada");
assert_eq!(0, matches.len());
}
#[test]
fn debug() {
let mut builder = GlobSetBuilder::new();
builder.add(Glob::new("*foo*").unwrap());
builder.add(Glob::new("*bar*").unwrap());
builder.add(Glob::new("*quux*").unwrap());
assert_eq!(
format!("{builder:?}"),
"GlobSetBuilder { pats: [Glob(\"*foo*\"), Glob(\"*bar*\"), Glob(\"*quux*\")] }",
);
}
}

View File

@@ -4,21 +4,25 @@ use bstr::{ByteSlice, ByteVec};
/// The final component of the path, if it is a normal file.
///
/// If the path terminates in `.`, `..`, or consists solely of a root of
/// prefix, file_name will return None.
/// If the path terminates in `..`, or consists solely of a root of prefix,
/// file_name will return `None`.
pub(crate) fn file_name<'a>(path: &Cow<'a, [u8]>) -> Option<Cow<'a, [u8]>> {
if path.last_byte().map_or(true, |b| b == b'.') {
if path.is_empty() {
return None;
}
let last_slash = path.rfind_byte(b'/').map(|i| i + 1).unwrap_or(0);
Some(match *path {
let got = match *path {
Cow::Borrowed(path) => Cow::Borrowed(&path[last_slash..]),
Cow::Owned(ref path) => {
let mut path = path.clone();
path.drain_bytes(..last_slash);
Cow::Owned(path)
}
})
};
if got == &b".."[..] {
return None;
}
Some(got)
}
/// Return a file extension given a path's file name.
@@ -84,7 +88,7 @@ pub(crate) fn normalize_path(mut path: Cow<[u8]>) -> Cow<[u8]> {
mod tests {
use std::borrow::Cow;
use bstr::{ByteVec, B};
use bstr::{B, ByteVec};
use super::{file_name_ext, normalize_path};

View File

@@ -1,6 +1,6 @@
[package]
name = "grep"
version = "0.3.0" #:version
version = "0.4.1" #:version
authors = ["Andrew Gallant <jamslam@gmail.com>"]
description = """
Fast line oriented regex searching as a library.
@@ -11,23 +11,23 @@ repository = "https://github.com/BurntSushi/ripgrep/tree/master/crates/grep"
readme = "README.md"
keywords = ["regex", "grep", "egrep", "search", "pattern"]
license = "Unlicense OR MIT"
edition = "2021"
edition = "2024"
[dependencies]
grep-cli = { version = "0.1.10", path = "../cli" }
grep-matcher = { version = "0.1.7", path = "../matcher" }
grep-pcre2 = { version = "0.1.7", path = "../pcre2", optional = true }
grep-printer = { version = "0.2.0", path = "../printer" }
grep-regex = { version = "0.1.12", path = "../regex" }
grep-searcher = { version = "0.1.12", path = "../searcher" }
grep-cli = { version = "0.1.12", path = "../cli" }
grep-matcher = { version = "0.1.8", path = "../matcher" }
grep-pcre2 = { version = "0.1.9", path = "../pcre2", optional = true }
grep-printer = { version = "0.3.1", path = "../printer" }
grep-regex = { version = "0.1.14", path = "../regex" }
grep-searcher = { version = "0.1.16", path = "../searcher" }
[dev-dependencies]
termcolor = "1.0.4"
walkdir = "2.2.7"
[features]
simd-accel = ["grep-searcher/simd-accel"]
pcre2 = ["grep-pcre2"]
# This feature is DEPRECATED. Runtime dispatch is used for SIMD now.
# These features are DEPRECATED. Runtime dispatch is used for SIMD now.
simd-accel = []
avx-accel = []

View File

@@ -1,6 +1,6 @@
[package]
name = "ignore"
version = "0.4.21" #:version
version = "0.4.25" #:version
authors = ["Andrew Gallant <jamslam@gmail.com>"]
description = """
A fast library for efficiently matching ignore files such as `.gitignore`
@@ -12,7 +12,7 @@ repository = "https://github.com/BurntSushi/ripgrep/tree/master/crates/ignore"
readme = "README.md"
keywords = ["glob", "ignore", "gitignore", "pattern", "file"]
license = "Unlicense OR MIT"
edition = "2021"
edition = "2024"
[lib]
name = "ignore"
@@ -20,7 +20,7 @@ bench = false
[dependencies]
crossbeam-deque = "0.8.3"
globset = { version = "0.4.14", path = "../globset" }
globset = { version = "0.4.18", path = "../globset" }
log = "0.4.20"
memchr = "2.6.3"
same-file = "1.0.6"
@@ -36,7 +36,7 @@ version = "0.1.2"
[dev-dependencies]
bstr = { version = "1.6.2", default-features = false, features = ["std"] }
crossbeam-channel = "0.5.8"
crossbeam-channel = "0.5.15"
[features]
# DEPRECATED. It is a no-op. SIMD is done automatically through runtime

View File

@@ -18,8 +18,8 @@ fn main() {
let stdout_thread = std::thread::spawn(move || {
let mut stdout = std::io::BufWriter::new(std::io::stdout());
for dent in rx {
stdout.write(&*Vec::from_path_lossy(dent.path())).unwrap();
stdout.write(b"\n").unwrap();
stdout.write_all(&Vec::from_path_lossy(dent.path())).unwrap();
stdout.write_all(b"\n").unwrap();
}
});

View File

@@ -27,9 +27,10 @@ pub(crate) const DEFAULT_TYPES: &[(&[&str], &[&str])] = &[
(&["bat", "batch"], &["*.bat"]),
(&["bazel"], &[
"*.bazel", "*.bzl", "*.BUILD", "*.bazelrc", "BUILD", "MODULE.bazel",
"WORKSPACE", "WORKSPACE.bazel",
"WORKSPACE", "WORKSPACE.bazel", "WORKSPACE.bzlmod",
]),
(&["bitbake"], &["*.bb", "*.bbappend", "*.bbclass", "*.conf", "*.inc"]),
(&["boxlang"], &["*.bx", "*.bxm", "*.bxs"]),
(&["brotli"], &["*.br"]),
(&["buildstream"], &["*.bst"]),
(&["bzip2"], &["*.bz2", "*.tbz2"]),
@@ -39,6 +40,7 @@ pub(crate) const DEFAULT_TYPES: &[(&[&str], &[&str])] = &[
(&["carp"], &["*.carp"]),
(&["cbor"], &["*.cbor"]),
(&["ceylon"], &["*.ceylon"]),
(&["cfml"], &["*.cfc", "*.cfm"]),
(&["clojure"], &["*.clj", "*.cljc", "*.cljs", "*.cljx"]),
(&["cmake"], &["*.cmake", "CMakeLists.txt"]),
(&["cmd"], &["*.bat", "*.cmd"]),
@@ -62,7 +64,7 @@ pub(crate) const DEFAULT_TYPES: &[(&[&str], &[&str])] = &[
(&["cython"], &["*.pyx", "*.pxi", "*.pxd"]),
(&["d"], &["*.d"]),
(&["dart"], &["*.dart"]),
(&["devicetree"], &["*.dts", "*.dtsi"]),
(&["devicetree"], &["*.dts", "*.dtsi", "*.dtso"]),
(&["dhall"], &["*.dhall"]),
(&["diff"], &["*.patch", "*.diff"]),
(&["dita"], &["*.dita", "*.ditamap", "*.ditaval"]),
@@ -88,6 +90,8 @@ pub(crate) const DEFAULT_TYPES: &[(&[&str], &[&str])] = &[
(&["fsharp"], &["*.fs", "*.fsx", "*.fsi"]),
(&["fut"], &["*.fut"]),
(&["gap"], &["*.g", "*.gap", "*.gi", "*.gd", "*.tst"]),
(&["gdscript"], &["*.gd"]),
(&["gleam"], &["*.gleam"]),
(&["gn"], &["*.gn", "*.gni"]),
(&["go"], &["*.go"]),
(&["gprbuild"], &["*.gpr"]),
@@ -117,7 +121,9 @@ pub(crate) const DEFAULT_TYPES: &[(&[&str], &[&str])] = &[
(&["julia"], &["*.jl"]),
(&["jupyter"], &["*.ipynb", "*.jpynb"]),
(&["k"], &["*.k"]),
(&["kconfig"], &["Kconfig", "Kconfig.*"]),
(&["kotlin"], &["*.kt", "*.kts"]),
(&["lean"], &["*.lean"]),
(&["less"], &["*.less"]),
(&["license"], &[
// General
@@ -148,6 +154,7 @@ pub(crate) const DEFAULT_TYPES: &[(&[&str], &[&str])] = &[
]),
(&["lilypond"], &["*.ly", "*.ily"]),
(&["lisp"], &["*.el", "*.jl", "*.lisp", "*.lsp", "*.sc", "*.scm"]),
(&["llvm"], &["*.ll"]),
(&["lock"], &["*.lock", "package-lock.json"]),
(&["log"], &["*.log"]),
(&["lua"], &["*.lua"]),
@@ -158,6 +165,7 @@ pub(crate) const DEFAULT_TYPES: &[(&[&str], &[&str])] = &[
"[Gg][Nn][Uu]makefile", "[Mm]akefile",
"[Gg][Nn][Uu]makefile.am", "[Mm]akefile.am",
"[Gg][Nn][Uu]makefile.in", "[Mm]akefile.in",
"Makefile.*",
"*.mk", "*.mak"
]),
(&["mako"], &["*.mako", "*.mao"]),
@@ -172,7 +180,7 @@ pub(crate) const DEFAULT_TYPES: &[(&[&str], &[&str])] = &[
"*.mdx",
]),
(&["matlab"], &["*.m"]),
(&["meson"], &["meson.build", "meson_options.txt"]),
(&["meson"], &["meson.build", "meson_options.txt", "meson.options"]),
(&["minified"], &["*.min.html", "*.min.css", "*.min.js"]),
(&["mint"], &["*.mint"]),
(&["mk"], &["mkfile"]),
@@ -180,7 +188,7 @@ pub(crate) const DEFAULT_TYPES: &[(&[&str], &[&str])] = &[
(&["motoko"], &["*.mo"]),
(&["msbuild"], &[
"*.csproj", "*.fsproj", "*.vcxproj", "*.proj", "*.props", "*.targets",
"*.sln",
"*.sln", "*.slnf"
]),
(&["nim"], &["*.nim", "*.nimf", "*.nimble", "*.nims"]),
(&["nix"], &["*.nix"]),
@@ -209,7 +217,9 @@ pub(crate) const DEFAULT_TYPES: &[(&[&str], &[&str])] = &[
(&["py", "python"], &["*.py", "*.pyi"]),
(&["qmake"], &["*.pro", "*.pri", "*.prf"]),
(&["qml"], &["*.qml"]),
(&["r"], &["*.R", "*.r", "*.Rmd", "*.Rnw"]),
(&["qrc"], &["*.qrc"]),
(&["qui"], &["*.ui"]),
(&["r"], &["*.R", "*.r", "*.Rmd", "*.rmd", "*.Rnw", "*.rnw"]),
(&["racket"], &["*.rkt"]),
(&["raku"], &[
"*.raku", "*.rakumod", "*.rakudoc", "*.rakutest",
@@ -226,14 +236,16 @@ pub(crate) const DEFAULT_TYPES: &[(&[&str], &[&str])] = &[
// Idiomatic files
"config.ru", "Gemfile", ".irbrc", "Rakefile",
// Extensions
"*.gemspec", "*.rb", "*.rbw"
"*.gemspec", "*.rb", "*.rbw", "*.rake"
]),
(&["rust"], &["*.rs"]),
(&["sass"], &["*.sass", "*.scss"]),
(&["scala"], &["*.scala", "*.sbt"]),
(&["scdoc"], &["*.scd", "*.scdoc"]),
(&["seed7"], &["*.sd7", "*.s7i"]),
(&["sh"], &[
// Portable/misc. init files
".login", ".logout", ".profile", "profile",
".env", ".login", ".logout", ".profile", "profile",
// bash-specific init files
".bash_login", "bash_login",
".bash_logout", "bash_logout",
@@ -252,7 +264,7 @@ pub(crate) const DEFAULT_TYPES: &[(&[&str], &[&str])] = &[
".zprofile", "zprofile",
".zshrc", "zshrc",
// Extensions
"*.bash", "*.csh", "*.ksh", "*.sh", "*.tcsh", "*.zsh",
"*.bash", "*.csh", "*.env", "*.ksh", "*.sh", "*.tcsh", "*.zsh",
]),
(&["slim"], &["*.skim", "*.slim", "*.slime"]),
(&["smarty"], &["*.tpl"]),
@@ -262,8 +274,10 @@ pub(crate) const DEFAULT_TYPES: &[(&[&str], &[&str])] = &[
(&["spark"], &["*.spark"]),
(&["spec"], &["*.spec"]),
(&["sql"], &["*.sql", "*.psql"]),
(&["ssa"], &["*.ssa"]),
(&["stylus"], &["*.styl"]),
(&["sv"], &["*.v", "*.vg", "*.sv", "*.svh", "*.h"]),
(&["svelte"], &["*.svelte", "*.svelte.ts"]),
(&["svg"], &["*.svg"]),
(&["swift"], &["*.swift"]),
(&["swig"], &["*.def", "*.i"]),
@@ -278,9 +292,8 @@ pub(crate) const DEFAULT_TYPES: &[(&[&str], &[&str])] = &[
(&["texinfo"], &["*.texi"]),
(&["textile"], &["*.textile"]),
(&["tf"], &[
"*.tf", "*.auto.tfvars", "terraform.tfvars", "*.tf.json",
"*.auto.tfvars.json", "terraform.tfvars.json", "*.terraformrc",
"terraform.rc", "*.tfrc", "*.terraform.lock.hcl",
"*.tf", "*.tf.json", "*.tfvars", "*.tfvars.json",
"*.terraformrc", "terraform.rc", "*.tfrc", "*.terraform.lock.hcl",
]),
(&["thrift"], &["*.thrift"]),
(&["toml"], &["*.toml", "Cargo.lock"]),
@@ -288,6 +301,7 @@ pub(crate) const DEFAULT_TYPES: &[(&[&str], &[&str])] = &[
(&["twig"], &["*.twig"]),
(&["txt"], &["*.txt"]),
(&["typoscript"], &["*.typoscript", "*.ts"]),
(&["typst"], &["*.typ"]),
(&["usd"], &["*.usd", "*.usda", "*.usdc"]),
(&["v"], &["*.v", "*.vsh"]),
(&["vala"], &["*.vala"]),
@@ -301,7 +315,9 @@ pub(crate) const DEFAULT_TYPES: &[(&[&str], &[&str])] = &[
(&["vimscript"], &[
"*.vim", ".vimrc", ".gvimrc", "vimrc", "gvimrc", "_vimrc", "_gvimrc",
]),
(&["vue"], &["*.vue"]),
(&["webidl"], &["*.idl", "*.webidl", "*.widl"]),
(&["wgsl"], &["*.wgsl"]),
(&["wiki"], &["*.mediawiki", "*.wiki"]),
(&["xml"], &[
"*.xml", "*.xml.dist", "*.dtd", "*.xsl", "*.xslt", "*.xsd", "*.xjb",

View File

@@ -19,7 +19,7 @@ use std::{
fs::{File, FileType},
io::{self, BufRead},
path::{Path, PathBuf},
sync::{Arc, RwLock},
sync::{Arc, RwLock, Weak},
};
use crate::{
@@ -34,11 +34,13 @@ use crate::{
/// IgnoreMatch represents information about where a match came from when using
/// the `Ignore` matcher.
#[derive(Clone, Debug)]
#[allow(dead_code)]
pub(crate) struct IgnoreMatch<'a>(IgnoreMatchInner<'a>);
/// IgnoreMatchInner describes precisely where the match information came from.
/// This is private to allow expansion to more matchers in the future.
#[derive(Clone, Debug)]
#[allow(dead_code)]
enum IgnoreMatchInner<'a> {
Override(overrides::Glob<'a>),
Gitignore(&'a gitignore::Glob),
@@ -99,7 +101,7 @@ struct IgnoreInner {
/// Note that this is never used during matching, only when adding new
/// parent directory matchers. This avoids needing to rebuild glob sets for
/// parent directories if many paths are being searched.
compiled: Arc<RwLock<HashMap<OsString, Ignore>>>,
compiled: Arc<RwLock<HashMap<OsString, Weak<IgnoreInner>>>>,
/// The path to the directory that this matcher was built from.
dir: PathBuf,
/// An override matcher (default is empty).
@@ -116,6 +118,18 @@ struct IgnoreInner {
/// The absolute base path of this matcher. Populated only if parent
/// directories are added.
absolute_base: Option<Arc<PathBuf>>,
/// The directory that gitignores should be interpreted relative to.
///
/// Usually this is the directory containing the gitignore file. But in
/// some cases, like for global gitignores or for gitignores specified
/// explicitly, this should generally be set to the current working
/// directory. This is only used for global gitignores or "explicit"
/// gitignores.
///
/// When `None`, this means the CWD could not be determined or is unknown.
/// In this case, global gitignore files are ignored because they otherwise
/// cannot be matched correctly.
global_gitignores_relative_to: Option<PathBuf>,
/// Explicit global ignore matchers specified by the caller.
explicit_ignores: Arc<Vec<Gitignore>>,
/// Ignore files used in addition to `.ignore`
@@ -198,9 +212,11 @@ impl Ignore {
let mut ig = self.clone();
for parent in parents.into_iter().rev() {
let mut compiled = self.0.compiled.write().unwrap();
if let Some(prebuilt) = compiled.get(parent.as_os_str()) {
ig = prebuilt.clone();
continue;
if let Some(weak) = compiled.get(parent.as_os_str()) {
if let Some(prebuilt) = weak.upgrade() {
ig = Ignore(prebuilt);
continue;
}
}
let (mut igtmp, err) = ig.add_child_path(parent);
errs.maybe_push(err);
@@ -208,12 +224,16 @@ impl Ignore {
igtmp.absolute_base = Some(absolute_base.clone());
igtmp.has_git =
if self.0.opts.require_git && self.0.opts.git_ignore {
parent.join(".git").exists()
parent.join(".git").exists() || parent.join(".jj").exists()
} else {
false
};
ig = Ignore(Arc::new(igtmp));
compiled.insert(parent.as_os_str().to_os_string(), ig.clone());
let ig_arc = Arc::new(igtmp);
ig = Ignore(ig_arc.clone());
compiled.insert(
parent.as_os_str().to_os_string(),
Arc::downgrade(&ig_arc),
);
}
(ig, errs.into_error_option())
}
@@ -236,14 +256,15 @@ impl Ignore {
/// Like add_child, but takes a full path and returns an IgnoreInner.
fn add_child_path(&self, dir: &Path) -> (IgnoreInner, Option<Error>) {
let git_type = if self.0.opts.require_git
&& (self.0.opts.git_ignore || self.0.opts.git_exclude)
{
let check_vcs_dir = self.0.opts.require_git
&& (self.0.opts.git_ignore || self.0.opts.git_exclude);
let git_type = if check_vcs_dir {
dir.join(".git").metadata().ok().map(|md| md.file_type())
} else {
None
};
let has_git = git_type.map(|_| true).unwrap_or(false);
let has_git =
check_vcs_dir && (git_type.is_some() || dir.join(".jj").exists());
let mut errs = PartialErrorBuilder::default();
let custom_ig_matcher = if self.0.custom_ignore_filenames.is_empty() {
@@ -282,6 +303,7 @@ impl Ignore {
errs.maybe_push(err);
m
};
let gi_exclude_matcher = if !self.0.opts.git_exclude {
Gitignore::empty()
} else {
@@ -310,6 +332,10 @@ impl Ignore {
parent: Some(self.clone()),
is_absolute_parent: false,
absolute_base: self.0.absolute_base.clone(),
global_gitignores_relative_to: self
.0
.global_gitignores_relative_to
.clone(),
explicit_ignores: self.0.explicit_ignores.clone(),
custom_ignore_filenames: self.0.custom_ignore_filenames.clone(),
custom_ignore_matcher: custom_ig_matcher,
@@ -453,21 +479,27 @@ impl Ignore {
// off of `path`. Overall, this seems a little ham-fisted, but
// it does fix a nasty bug. It should do fine until we overhaul
// this crate.
let dirpath = self.0.dir.as_path();
let path_prefix = match strip_prefix("./", dirpath) {
None => dirpath,
Some(stripped_dot_slash) => stripped_dot_slash,
};
let path = match strip_prefix(path_prefix, path) {
None => abs_parent_path.join(path),
Some(p) => {
let p = match strip_prefix("/", p) {
None => p,
Some(p) => p,
};
abs_parent_path.join(p)
}
};
let path = abs_parent_path.join(
self.parents()
.take_while(|ig| !ig.0.is_absolute_parent)
.last()
.map_or(path, |ig| {
// This is a weird special case when ripgrep users
// search with just a `.`, as some tools do
// automatically (like consult). In this case, if
// we don't bail out now, the code below will strip
// a leading `.` from `path`, which might mangle
// a hidden file name!
if ig.0.dir.as_path() == Path::new(".") {
return path;
}
let without_dot_slash =
strip_if_is_prefix("./", ig.0.dir.as_path());
let relative_base =
strip_if_is_prefix(without_dot_slash, path);
strip_if_is_prefix("/", relative_base)
}),
);
for ig in
self.parents().skip_while(|ig| !ig.0.is_absolute_parent)
@@ -567,6 +599,16 @@ pub(crate) struct IgnoreBuilder {
explicit_ignores: Vec<Gitignore>,
/// Ignore files in addition to .ignore.
custom_ignore_filenames: Vec<OsString>,
/// The directory that gitignores should be interpreted relative to.
///
/// Usually this is the directory containing the gitignore file. But in
/// some cases, like for global gitignores or for gitignores specified
/// explicitly, this should generally be set to the current working
/// directory. This is only used for global gitignores or "explicit"
/// gitignores.
///
/// When `None`, global gitignores are ignored.
global_gitignores_relative_to: Option<PathBuf>,
/// Ignore config.
opts: IgnoreOptions,
}
@@ -574,8 +616,9 @@ pub(crate) struct IgnoreBuilder {
impl IgnoreBuilder {
/// Create a new builder for an `Ignore` matcher.
///
/// All relative file paths are resolved with respect to the current
/// working directory.
/// It is likely a bug to use this without also calling `current_dir()`
/// outside of tests. This isn't made mandatory because this is an internal
/// abstraction and it's annoying to update tests.
pub(crate) fn new() -> IgnoreBuilder {
IgnoreBuilder {
dir: Path::new("").to_path_buf(),
@@ -583,6 +626,7 @@ impl IgnoreBuilder {
types: Arc::new(Types::empty()),
explicit_ignores: vec![],
custom_ignore_filenames: vec![],
global_gitignores_relative_to: None,
opts: IgnoreOptions {
hidden: true,
ignore: true,
@@ -601,10 +645,20 @@ impl IgnoreBuilder {
/// The matcher returned won't match anything until ignore rules from
/// directories are added to it.
pub(crate) fn build(&self) -> Ignore {
self.build_with_cwd(None)
}
/// Builds a new `Ignore` matcher using the given CWD directory.
///
/// The matcher returned won't match anything until ignore rules from
/// directories are added to it.
pub(crate) fn build_with_cwd(&self, cwd: Option<PathBuf>) -> Ignore {
let global_gitignores_relative_to =
cwd.or_else(|| self.global_gitignores_relative_to.clone());
let git_global_matcher = if !self.opts.git_global {
Gitignore::empty()
} else {
let mut builder = GitignoreBuilder::new("");
} else if let Some(ref cwd) = global_gitignores_relative_to {
let mut builder = GitignoreBuilder::new(cwd);
builder
.case_insensitive(self.opts.ignore_case_insensitive)
.unwrap();
@@ -613,6 +667,11 @@ impl IgnoreBuilder {
log::debug!("{}", err);
}
gi
} else {
log::debug!(
"ignoring global gitignore file because CWD is not known"
);
Gitignore::empty()
};
Ignore(Arc::new(IgnoreInner {
@@ -623,6 +682,7 @@ impl IgnoreBuilder {
parent: None,
is_absolute_parent: true,
absolute_base: None,
global_gitignores_relative_to,
explicit_ignores: Arc::new(self.explicit_ignores.clone()),
custom_ignore_filenames: Arc::new(
self.custom_ignore_filenames.clone(),
@@ -637,6 +697,15 @@ impl IgnoreBuilder {
}))
}
/// Set the current directory used for matching global gitignores.
pub(crate) fn current_dir(
&mut self,
cwd: impl Into<PathBuf>,
) -> &mut IgnoreBuilder {
self.global_gitignores_relative_to = Some(cwd.into());
self
}
/// Add an override matcher.
///
/// By default, no override matcher is used.
@@ -866,12 +935,21 @@ fn resolve_git_commondir(
Ok(commondir_abs)
}
/// Strips `prefix` from `path` if it's a prefix, otherwise returns `path`
/// unchanged.
fn strip_if_is_prefix<'a, P: AsRef<Path> + ?Sized>(
prefix: &'a P,
path: &'a Path,
) -> &'a Path {
strip_prefix(prefix, path).map_or(path, |p| p)
}
#[cfg(test)]
mod tests {
use std::{io::Write, path::Path};
use crate::{
dir::IgnoreBuilder, gitignore::Gitignore, tests::TempDir, Error,
Error, dir::IgnoreBuilder, gitignore::Gitignore, tests::TempDir,
};
fn wfile<P: AsRef<Path>>(path: P, contents: &str) {
@@ -935,6 +1013,19 @@ mod tests {
assert!(ig.matched("baz", false).is_none());
}
#[test]
fn gitignore_with_jj() {
let td = tmpdir();
mkdirp(td.path().join(".jj"));
wfile(td.path().join(".gitignore"), "foo\n!bar");
let (ig, err) = IgnoreBuilder::new().build().add_child(td.path());
assert!(err.is_none());
assert!(ig.matched("foo", false).is_ignore());
assert!(ig.matched("bar", false).is_whitelist());
assert!(ig.matched("baz", false).is_none());
}
#[test]
fn gitignore_no_git() {
let td = tmpdir();

View File

@@ -20,8 +20,8 @@ use {
};
use crate::{
pathutil::{is_file_name, strip_prefix},
Error, Match, PartialErrorBuilder,
pathutil::{is_file_name, strip_prefix},
};
/// Glob represents a single glob in a gitignore file.
@@ -128,7 +128,10 @@ impl Gitignore {
/// `$XDG_CONFIG_HOME/git/ignore` is read. If `$XDG_CONFIG_HOME` is not
/// set or is empty, then `$HOME/.config/git/ignore` is used instead.
pub fn global() -> (Gitignore, Option<Error>) {
GitignoreBuilder::new("").build_global()
match std::env::current_dir() {
Ok(cwd) => GitignoreBuilder::new(cwd).build_global(),
Err(err) => (Gitignore::empty(), Some(err.into())),
}
}
/// Creates a new empty gitignore matcher that never matches anything.
@@ -308,6 +311,7 @@ pub struct GitignoreBuilder {
root: PathBuf,
globs: Vec<Glob>,
case_insensitive: bool,
allow_unclosed_class: bool,
}
impl GitignoreBuilder {
@@ -324,6 +328,7 @@ impl GitignoreBuilder {
root: strip_prefix("./", root).unwrap_or(root).to_path_buf(),
globs: vec![],
case_insensitive: false,
allow_unclosed_class: true,
}
}
@@ -390,6 +395,7 @@ impl GitignoreBuilder {
Err(err) => return Some(Error::Io(err).with_path(path)),
Ok(file) => file,
};
log::debug!("opened gitignore file: {}", path.display());
let rdr = BufReader::new(file);
let mut errs = PartialErrorBuilder::default();
for (i, line) in rdr.lines().enumerate() {
@@ -401,6 +407,12 @@ impl GitignoreBuilder {
break;
}
};
// Match Git's handling of .gitignore files that begin with the Unicode BOM
const UTF8_BOM: &str = "\u{feff}";
let line =
if i == 0 { line.trim_start_matches(UTF8_BOM) } else { &line };
if let Err(err) = self.add_line(Some(path.to_path_buf()), &line) {
errs.push(err.tagged(path, lineno));
}
@@ -504,6 +516,7 @@ impl GitignoreBuilder {
.literal_separator(true)
.case_insensitive(self.case_insensitive)
.backslash_escape(true)
.allow_unclosed_class(self.allow_unclosed_class)
.build()
.map_err(|err| Error::Glob {
glob: Some(glob.original.clone()),
@@ -529,6 +542,26 @@ impl GitignoreBuilder {
self.case_insensitive = yes;
Ok(self)
}
/// Toggle whether unclosed character classes are allowed. When allowed,
/// a `[` without a matching `]` is treated literally instead of resulting
/// in a parse error.
///
/// For example, if this is set then the glob `[abc` will be treated as the
/// literal string `[abc` instead of returning an error.
///
/// By default, this is true in order to match established `gitignore`
/// semantics. Generally speaking, enabling this leads to worse failure
/// modes since the glob parser becomes more permissive. You might want to
/// enable this when compatibility (e.g., with POSIX glob implementations)
/// is more important than good error messages.
pub fn allow_unclosed_class(
&mut self,
yes: bool,
) -> &mut GitignoreBuilder {
self.allow_unclosed_class = yes;
self
}
}
/// Return the file path of the current environment's global gitignore file.

View File

@@ -477,11 +477,7 @@ impl<T> Match<T> {
/// Return the match if it is not none. Otherwise, return other.
pub fn or(self, other: Self) -> Self {
if self.is_none() {
other
} else {
self
}
if self.is_none() { other } else { self }
}
}
@@ -527,7 +523,7 @@ mod tests {
let tmpdir = env::temp_dir();
for _ in 0..TRIES {
let count = COUNTER.fetch_add(1, Ordering::SeqCst);
let count = COUNTER.fetch_add(1, Ordering::Relaxed);
let path = tmpdir.join("rust-ignore").join(count.to_string());
if path.is_dir() {
continue;

View File

@@ -1,5 +1,6 @@
/*!
The overrides module provides a way to specify a set of override globs.
This provides functionality similar to `--include` or `--exclude` in command
line tools.
*/
@@ -7,8 +8,8 @@ line tools.
use std::path::Path;
use crate::{
gitignore::{self, Gitignore, GitignoreBuilder},
Error, Match,
gitignore::{self, Gitignore, GitignoreBuilder},
};
/// Glob represents a single glob in an override matcher.
@@ -23,9 +24,11 @@ use crate::{
/// The lifetime `'a` refers to the lifetime of the matcher that produced
/// this glob.
#[derive(Clone, Debug)]
#[allow(dead_code)]
pub struct Glob<'a>(GlobInner<'a>);
#[derive(Clone, Debug)]
#[allow(dead_code)]
enum GlobInner<'a> {
/// No glob matched, but the file path should still be ignored.
UnmatchedIgnore,
@@ -118,7 +121,9 @@ impl OverrideBuilder {
///
/// Matching is done relative to the directory path provided.
pub fn new<P: AsRef<Path>>(path: P) -> OverrideBuilder {
OverrideBuilder { builder: GitignoreBuilder::new(path) }
let mut builder = GitignoreBuilder::new(path);
builder.allow_unclosed_class(false);
OverrideBuilder { builder }
}
/// Builds a new override matcher from the globs added so far.
@@ -141,7 +146,8 @@ impl OverrideBuilder {
/// Toggle whether the globs should be matched case insensitively or not.
///
/// When this option is changed, only globs added after the change will be affected.
/// When this option is changed, only globs added after the change will be
/// affected.
///
/// This is disabled by default.
pub fn case_insensitive(
@@ -153,6 +159,28 @@ impl OverrideBuilder {
self.builder.case_insensitive(yes)?;
Ok(self)
}
/// Toggle whether unclosed character classes are allowed. When allowed,
/// a `[` without a matching `]` is treated literally instead of resulting
/// in a parse error.
///
/// For example, if this is set then the glob `[abc` will be treated as the
/// literal string `[abc` instead of returning an error.
///
/// By default, this is false. Generally speaking, enabling this leads to
/// worse failure modes since the glob parser becomes more permissive. You
/// might want to enable this when compatibility (e.g., with POSIX glob
/// implementations) is more important than good error messages.
///
/// This default is different from the default for [`Gitignore`]. Namely,
/// [`Gitignore`] is intended to match git's behavior as-is. But this
/// abstraction for "override" globs does not necessarily conform to any
/// other known specification and instead prioritizes better error
/// messages.
pub fn allow_unclosed_class(&mut self, yes: bool) -> &mut OverrideBuilder {
self.builder.allow_unclosed_class(yes);
self
}
}
#[cfg(test)]

View File

@@ -91,7 +91,7 @@ use {
regex_automata::util::pool::Pool,
};
use crate::{default_types::DEFAULT_TYPES, pathutil::file_name, Error, Match};
use crate::{Error, Match, default_types::DEFAULT_TYPES, pathutil::file_name};
/// Glob represents a single glob in a set of file type definitions.
///

View File

@@ -5,21 +5,21 @@ use std::{
io,
path::{Path, PathBuf},
sync::atomic::{AtomicBool, AtomicUsize, Ordering as AtomicOrdering},
sync::Arc,
sync::{Arc, OnceLock},
};
use {
crossbeam_deque::{Stealer, Worker as Deque},
same_file::Handle,
walkdir::{self, WalkDir},
walkdir::WalkDir,
};
use crate::{
Error, PartialErrorBuilder,
dir::{Ignore, IgnoreBuilder},
gitignore::GitignoreBuilder,
overrides::Override,
types::Types,
Error, PartialErrorBuilder,
};
/// A directory entry with a possible error attached.
@@ -484,6 +484,7 @@ pub struct WalkBuilder {
paths: Vec<PathBuf>,
ig_builder: IgnoreBuilder,
max_depth: Option<usize>,
min_depth: Option<usize>,
max_filesize: Option<u64>,
follow_links: bool,
same_file_system: bool,
@@ -491,6 +492,18 @@ pub struct WalkBuilder {
threads: usize,
skip: Option<Arc<Handle>>,
filter: Option<Filter>,
/// The directory that gitignores should be interpreted relative to.
///
/// Usually this is the directory containing the gitignore file. But in
/// some cases, like for global gitignores or for gitignores specified
/// explicitly, this should generally be set to the current working
/// directory. This is only used for global gitignores or "explicit"
/// gitignores.
///
/// When `None`, the CWD is fetched from `std::env::current_dir()`. If
/// that fails, then global gitignores are ignored (an error is logged).
global_gitignores_relative_to:
OnceLock<Result<PathBuf, Arc<std::io::Error>>>,
}
#[derive(Clone)]
@@ -508,10 +521,18 @@ impl std::fmt::Debug for WalkBuilder {
.field("paths", &self.paths)
.field("ig_builder", &self.ig_builder)
.field("max_depth", &self.max_depth)
.field("min_depth", &self.min_depth)
.field("max_filesize", &self.max_filesize)
.field("follow_links", &self.follow_links)
.field("same_file_system", &self.same_file_system)
.field("sorter", &"<...>")
.field("threads", &self.threads)
.field("skip", &self.skip)
.field("filter", &"<...>")
.field(
"global_gitignores_relative_to",
&self.global_gitignores_relative_to,
)
.finish()
}
}
@@ -528,6 +549,7 @@ impl WalkBuilder {
paths: vec![path.as_ref().to_path_buf()],
ig_builder: IgnoreBuilder::new(),
max_depth: None,
min_depth: None,
max_filesize: None,
follow_links: false,
same_file_system: false,
@@ -535,6 +557,7 @@ impl WalkBuilder {
threads: 0,
skip: None,
filter: None,
global_gitignores_relative_to: OnceLock::new(),
}
}
@@ -542,6 +565,7 @@ impl WalkBuilder {
pub fn build(&self) -> Walk {
let follow_links = self.follow_links;
let max_depth = self.max_depth;
let min_depth = self.min_depth;
let sorter = self.sorter.clone();
let its = self
.paths
@@ -556,6 +580,9 @@ impl WalkBuilder {
if let Some(max_depth) = max_depth {
wd = wd.max_depth(max_depth);
}
if let Some(min_depth) = min_depth {
wd = wd.min_depth(min_depth);
}
if let Some(ref sorter) = sorter {
match sorter.clone() {
Sorter::ByName(cmp) => {
@@ -575,7 +602,10 @@ impl WalkBuilder {
})
.collect::<Vec<_>>()
.into_iter();
let ig_root = self.ig_builder.build();
let ig_root = self
.get_or_set_current_dir()
.map(|cwd| self.ig_builder.build_with_cwd(Some(cwd.to_path_buf())))
.unwrap_or_else(|| self.ig_builder.build());
Walk {
its,
it: None,
@@ -591,12 +621,17 @@ impl WalkBuilder {
///
/// Note that this *doesn't* return something that implements `Iterator`.
/// Instead, the returned value must be run with a closure. e.g.,
/// `builder.build_parallel().run(|| |path| println!("{:?}", path))`.
/// `builder.build_parallel().run(|| |path| { println!("{path:?}"); WalkState::Continue })`.
pub fn build_parallel(&self) -> WalkParallel {
let ig_root = self
.get_or_set_current_dir()
.map(|cwd| self.ig_builder.build_with_cwd(Some(cwd.to_path_buf())))
.unwrap_or_else(|| self.ig_builder.build());
WalkParallel {
paths: self.paths.clone().into_iter(),
ig_root: self.ig_builder.build(),
ig_root,
max_depth: self.max_depth,
min_depth: self.min_depth,
max_filesize: self.max_filesize,
follow_links: self.follow_links,
same_file_system: self.same_file_system,
@@ -621,6 +656,26 @@ impl WalkBuilder {
/// The default, `None`, imposes no depth restriction.
pub fn max_depth(&mut self, depth: Option<usize>) -> &mut WalkBuilder {
self.max_depth = depth;
if self.min_depth.is_some()
&& self.max_depth.is_some()
&& self.max_depth < self.min_depth
{
self.max_depth = self.min_depth;
}
self
}
/// The minimum depth to recurse.
///
/// The default, `None`, imposes no minimum depth restriction.
pub fn min_depth(&mut self, depth: Option<usize>) -> &mut WalkBuilder {
self.min_depth = depth;
if self.max_depth.is_some()
&& self.min_depth.is_some()
&& self.min_depth > self.max_depth
{
self.min_depth = self.max_depth;
}
self
}
@@ -651,12 +706,25 @@ impl WalkBuilder {
///
/// This has lower precedence than all other sources of ignore rules.
///
/// # Errors
///
/// If there was a problem adding the ignore file, then an error is
/// returned. Note that the error may indicate *partial* failure. For
/// example, if an ignore file contains an invalid glob, all other globs
/// are still applied.
///
/// An error will also occur if this walker could not get the current
/// working directory (and `WalkBuilder::current_dir` isn't set).
pub fn add_ignore<P: AsRef<Path>>(&mut self, path: P) -> Option<Error> {
let mut builder = GitignoreBuilder::new("");
let path = path.as_ref();
let Some(cwd) = self.get_or_set_current_dir() else {
let err = std::io::Error::other(format!(
"CWD is not known, ignoring global gitignore {}",
path.display()
));
return Some(err.into());
};
let mut builder = GitignoreBuilder::new(cwd);
let mut errs = PartialErrorBuilder::default();
errs.maybe_push(builder.add(path));
match builder.build() {
@@ -798,6 +866,10 @@ impl WalkBuilder {
///
/// When disabled, git-related ignore rules are applied even when searching
/// outside a git repository.
///
/// In particular, if this is `false` then `.gitignore` files will be read
/// from parent directories above the git root directory containing `.git`,
/// which is different from the git behavior.
pub fn require_git(&mut self, yes: bool) -> &mut WalkBuilder {
self.ig_builder.require_git(yes);
self
@@ -894,6 +966,10 @@ impl WalkBuilder {
///
/// Note that the errors for reading entries that may not satisfy the
/// predicate will still be yielded.
///
/// Note also that only one filter predicate can be applied to a
/// `WalkBuilder`. Calling this subsequent times overrides previous filter
/// predicates.
pub fn filter_entry<P>(&mut self, filter: P) -> &mut WalkBuilder
where
P: Fn(&DirEntry) -> bool + Send + Sync + 'static,
@@ -901,6 +977,55 @@ impl WalkBuilder {
self.filter = Some(Filter(Arc::new(filter)));
self
}
/// Set the current working directory used for matching global gitignores.
///
/// If this is not set, then this walker will attempt to discover the
/// correct path from the environment's current working directory. If
/// that fails, then global gitignore files will be ignored.
///
/// Global gitignore files come from things like a user's git configuration
/// or from gitignore files added via [`WalkBuilder::add_ignore`].
pub fn current_dir(
&mut self,
cwd: impl Into<PathBuf>,
) -> &mut WalkBuilder {
let cwd = cwd.into();
self.ig_builder.current_dir(cwd.clone());
if let Err(cwd) = self.global_gitignores_relative_to.set(Ok(cwd)) {
// OK because `Err` from `set` implies a value exists.
*self.global_gitignores_relative_to.get_mut().unwrap() = cwd;
}
self
}
/// Gets the currently configured CWD on this walk builder.
///
/// This is "lazy." That is, we only ask for the CWD from the environment
/// if `WalkBuilder::current_dir` hasn't been called yet. And we ensure
/// that we only do it once.
fn get_or_set_current_dir(&self) -> Option<&Path> {
let result = self.global_gitignores_relative_to.get_or_init(|| {
let result = std::env::current_dir().map_err(Arc::new);
match result {
Ok(ref path) => {
log::trace!(
"automatically discovered CWD: {}",
path.display()
);
}
Err(ref err) => {
log::debug!(
"failed to find CWD \
(global gitignores will be ignored): \
{err}"
);
}
}
result
});
result.as_ref().ok().map(|path| &**path)
}
}
/// Walk is a recursive directory iterator over file paths in one or more
@@ -1191,6 +1316,7 @@ pub struct WalkParallel {
ig_root: Ignore,
max_filesize: Option<u64>,
max_depth: Option<usize>,
min_depth: Option<usize>,
follow_links: bool,
same_file_system: bool,
threads: usize,
@@ -1290,6 +1416,7 @@ impl WalkParallel {
quit_now: quit_now.clone(),
active_workers: active_workers.clone(),
max_depth: self.max_depth,
min_depth: self.min_depth,
max_filesize: self.max_filesize,
follow_links: self.follow_links,
skip: self.skip.clone(),
@@ -1305,7 +1432,7 @@ impl WalkParallel {
fn threads(&self) -> usize {
if self.threads == 0 {
2
std::thread::available_parallelism().map_or(1, |n| n.get()).min(12)
} else {
self.threads
}
@@ -1420,8 +1547,11 @@ impl Stack {
stealers: stealers.clone(),
})
.collect();
// Distribute the initial messages.
// Distribute the initial messages, reverse the order to cancel out
// the other reversal caused by the inherent LIFO processing of the
// per-thread stacks which are filled here.
init.into_iter()
.rev()
.zip(stacks.iter().cycle())
.for_each(|(m, s)| s.push(m));
stacks
@@ -1476,6 +1606,8 @@ struct Worker<'s> {
/// The maximum depth of directories to descend. A value of `0` means no
/// descension at all.
max_depth: Option<usize>,
/// The minimum depth of directories to descend.
min_depth: Option<usize>,
/// The maximum size a searched file can be (in bytes). If a file exceeds
/// this size it will be skipped.
max_filesize: Option<u64>,
@@ -1504,10 +1636,19 @@ impl<'s> Worker<'s> {
}
fn run_one(&mut self, mut work: Work) -> WalkState {
let should_visit = self
.min_depth
.map(|min_depth| work.dent.depth() >= min_depth)
.unwrap_or(true);
// If the work is not a directory, then we can just execute the
// caller's callback immediately and move on.
if work.is_symlink() || !work.is_dir() {
return self.visitor.visit(Ok(work.dent));
return if should_visit {
self.visitor.visit(Ok(work.dent))
} else {
WalkState::Continue
};
}
if let Some(err) = work.add_parents() {
let state = self.visitor.visit(Err(err));
@@ -1540,9 +1681,11 @@ impl<'s> Worker<'s> {
// entry before passing the error value.
let readdir = work.read_dir();
let depth = work.dent.depth();
let state = self.visitor.visit(Ok(work.dent));
if !state.is_continue() {
return state;
if should_visit {
let state = self.visitor.visit(Ok(work.dent));
if !state.is_continue() {
return state;
}
}
if !descend {
return WalkState::Skip;
@@ -1887,7 +2030,7 @@ fn device_num<P: AsRef<Path>>(path: P) -> io::Result<u64> {
#[cfg(windows)]
fn device_num<P: AsRef<Path>>(path: P) -> io::Result<u64> {
use winapi_util::{file, Handle};
use winapi_util::{Handle, file};
let h = Handle::from_path_any(path)?;
file::information(h).map(|info| info.volume_serial_number())
@@ -1933,11 +2076,7 @@ mod tests {
}
fn normal_path(unix: &str) -> String {
if cfg!(windows) {
unix.replace("\\", "/")
} else {
unix.to_string()
}
if cfg!(windows) { unix.replace("\\", "/") } else { unix.to_string() }
}
fn walk_collect(prefix: &Path, builder: &WalkBuilder) -> Vec<String> {
@@ -2149,6 +2288,51 @@ mod tests {
);
}
#[test]
fn min_depth() {
let td = tmpdir();
mkdirp(td.path().join("a/b/c"));
wfile(td.path().join("foo"), "");
wfile(td.path().join("a/foo"), "");
wfile(td.path().join("a/b/foo"), "");
wfile(td.path().join("a/b/c/foo"), "");
let builder = WalkBuilder::new(td.path());
assert_paths(
td.path(),
&builder,
&["a", "a/b", "a/b/c", "foo", "a/foo", "a/b/foo", "a/b/c/foo"],
);
let mut builder = WalkBuilder::new(td.path());
assert_paths(
td.path(),
&builder.min_depth(Some(0)),
&["a", "a/b", "a/b/c", "foo", "a/foo", "a/b/foo", "a/b/c/foo"],
);
assert_paths(
td.path(),
&builder.min_depth(Some(1)),
&["a", "a/b", "a/b/c", "foo", "a/foo", "a/b/foo", "a/b/c/foo"],
);
assert_paths(
td.path(),
builder.min_depth(Some(2)),
&["a/b", "a/b/c", "a/b/c/foo", "a/b/foo", "a/foo"],
);
assert_paths(
td.path(),
builder.min_depth(Some(3)),
&["a/b/c", "a/b/c/foo", "a/b/foo"],
);
assert_paths(td.path(), builder.min_depth(Some(10)), &[]);
assert_paths(
td.path(),
builder.min_depth(Some(2)).max_depth(Some(1)),
&["a/b", "a/foo"],
);
}
#[test]
fn max_filesize() {
let td = tmpdir();

View File

@@ -200,13 +200,17 @@ fn test_dirs_in_deep() {
assert!(m("ROOT/parent_dir/dir_deep_00", true).is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_00/file", false).is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_00/child_dir", true).is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_00/child_dir/file", false).is_ignore());
assert!(
m("ROOT/parent_dir/dir_deep_00/child_dir/file", false).is_ignore()
);
// 01
assert!(m("ROOT/parent_dir/dir_deep_01", true).is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_01/file", false).is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_01/child_dir", true).is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_01/child_dir/file", false).is_ignore());
assert!(
m("ROOT/parent_dir/dir_deep_01/child_dir/file", false).is_ignore()
);
// 02
assert!(m("ROOT/parent_dir/dir_deep_02", true).is_none());
@@ -248,51 +252,67 @@ fn test_dirs_in_deep() {
assert!(m("ROOT/parent_dir/dir_deep_20", true).is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_20/file", false).is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_20/child_dir", true).is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_20/child_dir/file", false).is_ignore());
assert!(
m("ROOT/parent_dir/dir_deep_20/child_dir/file", false).is_ignore()
);
// 21
assert!(m("ROOT/parent_dir/dir_deep_21", true).is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_21/file", false).is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_21/child_dir", true).is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_21/child_dir/file", false).is_ignore());
assert!(
m("ROOT/parent_dir/dir_deep_21/child_dir/file", false).is_ignore()
);
// 22
// dir itself doesn't match
assert!(m("ROOT/parent_dir/dir_deep_22", true).is_none());
assert!(m("ROOT/parent_dir/dir_deep_22/file", false).is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_22/child_dir", true).is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_22/child_dir/file", false).is_ignore());
assert!(
m("ROOT/parent_dir/dir_deep_22/child_dir/file", false).is_ignore()
);
// 23
// dir itself doesn't match
assert!(m("ROOT/parent_dir/dir_deep_23", true).is_none());
assert!(m("ROOT/parent_dir/dir_deep_23/file", false).is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_23/child_dir", true).is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_23/child_dir/file", false).is_ignore());
assert!(
m("ROOT/parent_dir/dir_deep_23/child_dir/file", false).is_ignore()
);
// 30
assert!(m("ROOT/parent_dir/dir_deep_30", true).is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_30/file", false).is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_30/child_dir", true).is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_30/child_dir/file", false).is_ignore());
assert!(
m("ROOT/parent_dir/dir_deep_30/child_dir/file", false).is_ignore()
);
// 31
assert!(m("ROOT/parent_dir/dir_deep_31", true).is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_31/file", false).is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_31/child_dir", true).is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_31/child_dir/file", false).is_ignore());
assert!(
m("ROOT/parent_dir/dir_deep_31/child_dir/file", false).is_ignore()
);
// 32
// dir itself doesn't match
assert!(m("ROOT/parent_dir/dir_deep_32", true).is_none());
assert!(m("ROOT/parent_dir/dir_deep_32/file", false).is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_32/child_dir", true).is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_32/child_dir/file", false).is_ignore());
assert!(
m("ROOT/parent_dir/dir_deep_32/child_dir/file", false).is_ignore()
);
// 33
// dir itself doesn't match
assert!(m("ROOT/parent_dir/dir_deep_33", true).is_none());
assert!(m("ROOT/parent_dir/dir_deep_33/file", false).is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_33/child_dir", true).is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_33/child_dir/file", false).is_ignore());
assert!(
m("ROOT/parent_dir/dir_deep_33/child_dir/file", false).is_ignore()
);
}

View File

@@ -0,0 +1,2 @@
ignore/this/path
# This file begins with a BOM (U+FEFF)

View File

@@ -0,0 +1,17 @@
use ignore::gitignore::GitignoreBuilder;
const IGNORE_FILE: &'static str = "tests/gitignore_skip_bom.gitignore";
/// Skip a Byte-Order Mark (BOM) at the beginning of the file, matching Git's
/// behavior.
///
/// Ref: <https://github.com/BurntSushi/ripgrep/issues/2177>
#[test]
fn gitignore_skip_bom() {
let mut builder = GitignoreBuilder::new("ROOT");
let error = builder.add(IGNORE_FILE);
assert!(error.is_none(), "failed to open gitignore file");
let g = builder.build().unwrap();
assert!(g.matched("ignore/this/path", false).is_ignore());
}

View File

@@ -1,6 +1,6 @@
[package]
name = "grep-matcher"
version = "0.1.7" #:version
version = "0.1.8" #:version
authors = ["Andrew Gallant <jamslam@gmail.com>"]
description = """
A trait for regular expressions, with a focus on line oriented search.
@@ -12,7 +12,7 @@ readme = "README.md"
keywords = ["regex", "pattern", "trait"]
license = "Unlicense OR MIT"
autotests = false
edition = "2021"
edition = "2024"
[dependencies]
memchr = "2.6.3"

View File

@@ -144,7 +144,7 @@ fn is_valid_cap_letter(b: &u8) -> bool {
#[cfg(test)]
mod tests {
use super::{find_cap_ref, interpolate, CaptureRef};
use super::{CaptureRef, find_cap_ref, interpolate};
macro_rules! find {
($name:ident, $text:expr) => {

View File

@@ -389,6 +389,15 @@ pub trait Captures {
/// for the overall match.
fn get(&self, i: usize) -> Option<Match>;
/// Return the overall match for the capture.
///
/// This returns the match for index `0`. That is it is equivalent to
/// `get(0).unwrap()`
#[inline]
fn as_match(&self) -> Match {
self.get(0).unwrap()
}
/// Returns true if and only if these captures are empty. This occurs
/// when `len` is `0`.
///

View File

@@ -1,6 +1,6 @@
[package]
name = "grep-pcre2"
version = "0.1.7" #:version
version = "0.1.9" #:version
authors = ["Andrew Gallant <jamslam@gmail.com>"]
description = """
Use PCRE2 with the 'grep' crate.
@@ -11,9 +11,9 @@ repository = "https://github.com/BurntSushi/ripgrep/tree/master/crates/pcre2"
readme = "README.md"
keywords = ["regex", "grep", "pcre", "backreference", "look"]
license = "Unlicense OR MIT"
edition = "2018"
edition = "2024"
[dependencies]
grep-matcher = { version = "0.1.7", path = "../matcher" }
grep-matcher = { version = "0.1.8", path = "../matcher" }
log = "0.4.20"
pcre2 = "0.2.6"

View File

@@ -55,7 +55,12 @@ impl RegexMatcherBuilder {
format!("(?:{})", p.as_ref())
});
}
let mut singlepat = pats.join("|");
let mut singlepat = if patterns.is_empty() {
// A way to spell a pattern that can never match anything.
r"[^\S\s]".to_string()
} else {
pats.join("|")
};
if self.case_smart && !has_uppercase_literal(&singlepat) {
builder.caseless(true);
}
@@ -428,7 +433,7 @@ fn has_uppercase_literal(pattern: &str) -> bool {
#[cfg(test)]
mod tests {
use grep_matcher::{LineMatchKind, Matcher};
use grep_matcher::LineMatchKind;
use super::*;

View File

@@ -1,6 +1,6 @@
[package]
name = "grep-printer"
version = "0.2.0" #:version
version = "0.3.1" #:version
authors = ["Andrew Gallant <jamslam@gmail.com>"]
description = """
An implementation of the grep crate's Sink trait that provides standard
@@ -12,7 +12,7 @@ repository = "https://github.com/BurntSushi/ripgrep/tree/master/crates/printer"
readme = "README.md"
keywords = ["grep", "pattern", "print", "printer", "sink"]
license = "Unlicense OR MIT"
edition = "2021"
edition = "2024"
[features]
default = ["serde"]
@@ -20,22 +20,22 @@ serde = ["dep:serde", "dep:serde_json"]
[dependencies]
bstr = "1.6.2"
grep-matcher = { version = "0.1.7", path = "../matcher" }
grep-searcher = { version = "0.1.12", path = "../searcher" }
grep-matcher = { version = "0.1.8", path = "../matcher" }
grep-searcher = { version = "0.1.16", path = "../searcher" }
log = "0.4.5"
termcolor = "1.3.0"
serde = { version = "1.0.193", optional = true }
serde_json = { version = "1.0.107", optional = true }
[dev-dependencies]
grep-regex = { version = "0.1.12", path = "../regex" }
grep-regex = { version = "0.1.14", path = "../regex" }
[package.metadata.docs.rs]
# We want to document all features.
all-features = true
# This opts into a nightly unstable option to show the features that need to be
# enabled for public API items. To do that, we set 'docsrs', and when that's
# enabled, we enable the 'doc_auto_cfg' feature.
# enabled, we enable the 'doc_cfg' feature.
#
# To test this locally, run:
#

View File

@@ -51,13 +51,13 @@ impl std::fmt::Display for ColorError {
ColorError::UnrecognizedOutType(ref name) => write!(
f,
"unrecognized output type '{}'. Choose from: \
path, line, column, match.",
path, line, column, match, highlight.",
name,
),
ColorError::UnrecognizedSpecType(ref name) => write!(
f,
"unrecognized spec type '{}'. Choose from: \
fg, bg, style, none.",
fg, bg, style, none.",
name,
),
ColorError::UnrecognizedColor(_, ref msg) => write!(f, "{}", msg),
@@ -65,13 +65,13 @@ impl std::fmt::Display for ColorError {
f,
"unrecognized style attribute '{}'. Choose from: \
nobold, bold, nointense, intense, nounderline, \
underline.",
underline, noitalic, italic.",
name,
),
ColorError::InvalidFormat(ref original) => write!(
f,
"invalid color spec format: '{}'. Valid format \
is '(path|line|column|match):(fg|bg|style):(value)'.",
"invalid color spec format: '{}'. Valid format is \
'(path|line|column|match|highlight):(fg|bg|style):(value)'.",
original,
),
}
@@ -90,6 +90,7 @@ pub struct ColorSpecs {
line: ColorSpec,
column: ColorSpec,
matched: ColorSpec,
highlight: ColorSpec,
}
/// A single color specification provided by the user.
@@ -99,7 +100,7 @@ pub struct ColorSpecs {
/// The format of a `Spec` is a triple: `{type}:{attribute}:{value}`. Each
/// component is defined as follows:
///
/// * `{type}` can be one of `path`, `line`, `column` or `match`.
/// * `{type}` can be one of `path`, `line`, `column`, `match` or `highlight`.
/// * `{attribute}` can be one of `fg`, `bg` or `style`. `{attribute}` may also
/// be the special value `none`, in which case, `{value}` can be omitted.
/// * `{value}` is either a color name (for `fg`/`bg`) or a style instruction.
@@ -121,7 +122,7 @@ pub struct ColorSpecs {
/// `0x`.
///
/// Valid style instructions are `nobold`, `bold`, `intense`, `nointense`,
/// `underline`, `nounderline`.
/// `underline`, `nounderline`, `italic`, `noitalic`.
///
/// ## Example
///
@@ -181,6 +182,7 @@ enum OutType {
Line,
Column,
Match,
Highlight,
}
/// The specification type.
@@ -201,6 +203,8 @@ enum Style {
NoIntense,
Underline,
NoUnderline,
Italic,
NoItalic,
}
impl ColorSpecs {
@@ -214,6 +218,7 @@ impl ColorSpecs {
OutType::Line => spec.merge_into(&mut merged.line),
OutType::Column => spec.merge_into(&mut merged.column),
OutType::Match => spec.merge_into(&mut merged.matched),
OutType::Highlight => spec.merge_into(&mut merged.highlight),
}
}
merged
@@ -247,6 +252,12 @@ impl ColorSpecs {
pub fn matched(&self) -> &ColorSpec {
&self.matched
}
/// Return the color specification for coloring entire line if there is a
/// matched text.
pub fn highlight(&self) -> &ColorSpec {
&self.highlight
}
}
impl UserColorSpec {
@@ -286,6 +297,12 @@ impl SpecValue {
Style::NoUnderline => {
cspec.set_underline(false);
}
Style::Italic => {
cspec.set_italic(true);
}
Style::NoItalic => {
cspec.set_italic(false);
}
},
}
}
@@ -340,6 +357,7 @@ impl std::str::FromStr for OutType {
"line" => Ok(OutType::Line),
"column" => Ok(OutType::Column),
"match" => Ok(OutType::Match),
"highlight" => Ok(OutType::Highlight),
_ => Err(ColorError::UnrecognizedOutType(s.to_string())),
}
}
@@ -370,6 +388,8 @@ impl std::str::FromStr for Style {
"nointense" => Ok(Style::NoIntense),
"underline" => Ok(Style::Underline),
"nounderline" => Ok(Style::NoUnderline),
"italic" => Ok(Style::Italic),
"noitalic" => Ok(Style::NoItalic),
_ => Err(ColorError::UnrecognizedStyle(s.to_string())),
}
}

View File

@@ -0,0 +1,92 @@
use crate::hyperlink::HyperlinkAlias;
/// Aliases to well-known hyperlink schemes.
///
/// These need to be sorted by name.
pub(super) const HYPERLINK_PATTERN_ALIASES: &[HyperlinkAlias] = &[
alias(
"cursor",
"Cursor scheme (cursor://)",
"cursor://file{path}:{line}:{column}",
),
prioritized_alias(
0,
"default",
"RFC 8089 scheme (file://) (platform-aware)",
{
#[cfg(not(windows))]
{
"file://{host}{path}"
}
#[cfg(windows)]
{
"file://{path}"
}
},
),
alias(
"file",
"RFC 8089 scheme (file://) with host",
"file://{host}{path}",
),
// https://github.com/misaki-web/grepp
alias("grep+", "grep+ scheme (grep+://)", "grep+://{path}:{line}"),
alias(
"kitty",
"kitty-style RFC 8089 scheme (file://) with line number",
"file://{host}{path}#{line}",
),
// https://macvim.org/docs/gui_mac.txt.html#mvim%3A%2F%2F
alias(
"macvim",
"MacVim scheme (mvim://)",
"mvim://open?url=file://{path}&line={line}&column={column}",
),
prioritized_alias(1, "none", "disable hyperlinks", ""),
// https://macromates.com/blog/2007/the-textmate-url-scheme/
alias(
"textmate",
"TextMate scheme (txmt://)",
"txmt://open?url=file://{path}&line={line}&column={column}",
),
// https://code.visualstudio.com/docs/editor/command-line#_opening-vs-code-with-urls
alias(
"vscode",
"VS Code scheme (vscode://)",
"vscode://file{path}:{line}:{column}",
),
alias(
"vscode-insiders",
"VS Code Insiders scheme (vscode-insiders://)",
"vscode-insiders://file{path}:{line}:{column}",
),
alias(
"vscodium",
"VSCodium scheme (vscodium://)",
"vscodium://file{path}:{line}:{column}",
),
];
/// Creates a [`HyperlinkAlias`].
const fn alias(
name: &'static str,
description: &'static str,
format: &'static str,
) -> HyperlinkAlias {
HyperlinkAlias { name, description, format, display_priority: None }
}
/// Creates a [`HyperlinkAlias`] with a display priority.
const fn prioritized_alias(
priority: i16,
name: &'static str,
description: &'static str,
format: &'static str,
) -> HyperlinkAlias {
HyperlinkAlias {
name,
description,
format,
display_priority: Some(priority),
}
}

View File

@@ -5,7 +5,11 @@ use {
termcolor::{HyperlinkSpec, WriteColor},
};
use crate::{hyperlink_aliases, util::DecimalFormatter};
use crate::util::DecimalFormatter;
use self::aliases::HYPERLINK_PATTERN_ALIASES;
mod aliases;
/// Hyperlink configuration.
///
@@ -107,8 +111,8 @@ impl std::str::FromStr for HyperlinkFormat {
}
let mut builder = FormatBuilder::new();
let input = match hyperlink_aliases::find(s) {
Some(format) => format,
let input = match HyperlinkAlias::find(s) {
Some(alias) => alias.format(),
None => s,
};
let mut name = String::new();
@@ -179,6 +183,63 @@ impl std::fmt::Display for HyperlinkFormat {
}
}
/// An alias for a hyperlink format.
///
/// Hyperlink aliases are built-in formats, therefore they hold static values.
/// Some of their features are usable in const blocks.
#[derive(Clone, Debug)]
pub struct HyperlinkAlias {
name: &'static str,
description: &'static str,
format: &'static str,
display_priority: Option<i16>,
}
impl HyperlinkAlias {
/// Returns the name of the alias.
pub const fn name(&self) -> &str {
self.name
}
/// Returns a very short description of this hyperlink alias.
pub const fn description(&self) -> &str {
self.description
}
/// Returns the display priority of this alias.
///
/// If no priority is set, then `None` is returned.
///
/// The display priority is meant to reflect some special status associated
/// with an alias. For example, the `default` and `none` aliases have a
/// display priority. This is meant to encourage listing them first in
/// documentation.
///
/// A lower display priority implies the alias should be shown before
/// aliases with a higher (or absent) display priority.
///
/// Callers cannot rely on any specific display priority value to remain
/// stable across semver compatible releases of this crate.
pub const fn display_priority(&self) -> Option<i16> {
self.display_priority
}
/// Returns the format string of the alias.
const fn format(&self) -> &'static str {
self.format
}
/// Looks for the hyperlink alias defined by the given name.
///
/// If one does not exist, `None` is returned.
fn find(name: &str) -> Option<&HyperlinkAlias> {
HYPERLINK_PATTERN_ALIASES
.binary_search_by_key(&name, |alias| alias.name())
.map(|i| &HYPERLINK_PATTERN_ALIASES[i])
.ok()
}
}
/// A static environment for hyperlink interpolation.
///
/// This environment permits setting the values of variables used in hyperlink
@@ -255,15 +316,18 @@ impl std::fmt::Display for HyperlinkFormatError {
match self.kind {
NoVariables => {
let aliases = hyperlink_aliases::iter()
.map(|(name, _)| name)
.collect::<Vec<&str>>()
.join(", ");
let mut aliases = hyperlink_aliases();
aliases.sort_by_key(|alias| {
alias.display_priority().unwrap_or(i16::MAX)
});
let names: Vec<&str> =
aliases.iter().map(|alias| alias.name()).collect();
write!(
f,
"at least a {{path}} variable is required in a \
hyperlink format, or otherwise use a valid alias: {}",
aliases,
hyperlink format, or otherwise use a valid alias: \
{aliases}",
aliases = names.join(", "),
)
}
NoPathVariable => {
@@ -418,7 +482,7 @@ impl FormatBuilder {
let err_invalid_scheme = HyperlinkFormatError {
kind: HyperlinkFormatErrorKind::InvalidScheme,
};
let Some(Part::Text(ref part)) = self.parts.first() else {
let Some(Part::Text(part)) = self.parts.first() else {
return Err(err_invalid_scheme);
};
let Some(colon) = part.find_byte(b':') else {
@@ -474,7 +538,7 @@ impl Part {
values: &Values,
dest: &mut Vec<u8>,
) {
match self {
match *self {
Part::Text(ref text) => dest.extend_from_slice(text),
Part::Host => dest.extend_from_slice(
env.host.as_ref().map(|s| s.as_bytes()).unwrap_or(b""),
@@ -702,16 +766,20 @@ impl HyperlinkPath {
/// Returns a hyperlink path from an OS path.
#[cfg(windows)]
pub(crate) fn from_path(original_path: &Path) -> Option<HyperlinkPath> {
// On Windows, Path::canonicalize returns the result of
// GetFinalPathNameByHandleW with VOLUME_NAME_DOS,
// which produces paths such as the following:
// On Windows, we use `std::path::absolute` instead of `Path::canonicalize`
// as it can be much faster since it does not touch the file system.
// It wraps the [`GetFullPathNameW`][1] API, except for verbatim paths
// (those which start with `\\?\`, see [the documentation][2] for details).
//
// Here, we strip any verbatim path prefixes since we cannot use them
// in hyperlinks anyway. This can only happen if the user explicitly
// supplies a verbatim path as input, which already needs to be absolute:
//
// \\?\C:\dir\file.txt (local path)
// \\?\UNC\server\dir\file.txt (network share)
//
// The \\?\ prefix comes from VOLUME_NAME_DOS and is constant.
// It is followed either by the drive letter, or by UNC\
// (universal naming convention), which denotes a network share.
// The `\\?\` prefix is constant for verbatim paths, and can be followed
// by `UNC\` (universal naming convention), which denotes a network share.
//
// Given that the default URL format on Windows is file://{path}
// we need to return the following from this function:
@@ -750,18 +818,19 @@ impl HyperlinkPath {
//
// It doesn't parse any other number of slashes in "file//server" as a
// network path.
//
// [1]: https://learn.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-getfullpathnamew
// [2]: https://learn.microsoft.com/en-us/windows/win32/fileio/naming-a-file
const WIN32_NAMESPACE_PREFIX: &str = r"\\?\";
const UNC_PREFIX: &str = r"UNC\";
// As for Unix, we canonicalize the path to make sure we have an
// absolute path.
let path = match original_path.canonicalize() {
let path = match std::path::absolute(original_path) {
Ok(path) => path,
Err(err) => {
log::debug!(
"hyperlink creation for {:?} failed, error occurred \
during path canonicalization: {}",
during conversion to absolute path: {}",
original_path,
err,
);
@@ -784,24 +853,20 @@ impl HyperlinkPath {
return None;
}
};
// As the comment above says, we expect all canonicalized paths to
// begin with a \\?\. If it doesn't, then something weird is happening
// and we should just give up.
if !string.starts_with(WIN32_NAMESPACE_PREFIX) {
log::debug!(
"hyperlink creation for {:?} failed, canonicalization \
returned {:?}, which does not start with \\\\?\\",
original_path,
path,
);
return None;
}
string = &string[WIN32_NAMESPACE_PREFIX.len()..];
// And as above, drop the UNC prefix too, but keep the leading slash.
if string.starts_with(UNC_PREFIX) {
string = &string[(UNC_PREFIX.len() - 1)..];
// Strip verbatim path prefixes (see the comment above for details).
if string.starts_with(WIN32_NAMESPACE_PREFIX) {
string = &string[WIN32_NAMESPACE_PREFIX.len()..];
// Drop the UNC prefix if there is one, but keep the leading slash.
if string.starts_with(UNC_PREFIX) {
string = &string[(UNC_PREFIX.len() - 1)..];
}
} else if string.starts_with(r"\\") || string.starts_with(r"//") {
// Drop one of the two leading slashes of network paths, it will be added back.
string = &string[1..];
}
// Finally, add a leading slash. In the local file case, this turns
// C:\foo\bar into /C:\foo\bar (and then percent encoding turns it into
// /C:/foo/bar). In the network share case, this turns \share\foo\bar
@@ -811,6 +876,13 @@ impl HyperlinkPath {
Some(HyperlinkPath::encode(with_slash.as_bytes()))
}
/// For other platforms (not windows, not unix), return None and log a debug message.
#[cfg(not(any(windows, unix)))]
pub(crate) fn from_path(original_path: &Path) -> Option<HyperlinkPath> {
log::debug!("hyperlinks are not supported on this platform");
None
}
/// Percent-encodes a path.
///
/// The alphanumeric ASCII characters and "-", ".", "_", "~" are unreserved
@@ -855,6 +927,26 @@ impl HyperlinkPath {
}
}
/// Returns the set of hyperlink aliases supported by this crate.
///
/// Aliases are supported by the `FromStr` trait implementation of a
/// [`HyperlinkFormat`]. That is, if an alias is seen, then it is automatically
/// replaced with the corresponding format. For example, the `vscode` alias
/// maps to `vscode://file{path}:{line}:{column}`.
///
/// This is exposed to allow callers to include hyperlink aliases in
/// documentation in a way that is guaranteed to match what is actually
/// supported.
///
/// The list returned is guaranteed to be sorted lexicographically
/// by the alias name. Callers may want to re-sort the list using
/// [`HyperlinkAlias::display_priority`] via a stable sort when showing the
/// list to users. This will cause special aliases like `none` and `default` to
/// appear first.
pub fn hyperlink_aliases() -> Vec<HyperlinkAlias> {
HYPERLINK_PATTERN_ALIASES.iter().cloned().collect()
}
#[cfg(test)]
mod tests {
use std::str::FromStr;
@@ -999,4 +1091,75 @@ mod tests {
err(InvalidVariable("bar{{".to_string())),
);
}
#[test]
#[cfg(windows)]
fn convert_to_hyperlink_path() {
let convert = |path| {
String::from_utf8(
HyperlinkPath::from_path(Path::new(path)).unwrap().0,
)
.unwrap()
};
assert_eq!(convert(r"C:\dir\file.txt"), "/C:/dir/file.txt");
assert_eq!(
convert(r"C:\foo\bar\..\other\baz.txt"),
"/C:/foo/other/baz.txt"
);
assert_eq!(convert(r"\\server\dir\file.txt"), "//server/dir/file.txt");
assert_eq!(
convert(r"\\server\dir\foo\..\other\file.txt"),
"//server/dir/other/file.txt"
);
assert_eq!(convert(r"\\?\C:\dir\file.txt"), "/C:/dir/file.txt");
assert_eq!(
convert(r"\\?\UNC\server\dir\file.txt"),
"//server/dir/file.txt"
);
}
#[test]
fn aliases_are_sorted() {
let aliases = hyperlink_aliases();
let mut prev =
aliases.first().expect("aliases should be non-empty").name();
for alias in aliases.iter().skip(1) {
let name = alias.name();
assert!(
name > prev,
"'{prev}' should come before '{name}' in \
HYPERLINK_PATTERN_ALIASES",
);
prev = name;
}
}
#[test]
fn alias_names_are_reasonable() {
for alias in hyperlink_aliases() {
// There's no hard rule here, but if we want to define an alias
// with a name that doesn't pass this assert, then we should
// probably flag it as worthy of consideration. For example, we
// really do not want to define an alias that contains `{` or `}`,
// which might confuse it for a variable.
assert!(alias.name().chars().all(|c| c.is_alphanumeric()
|| c == '+'
|| c == '-'
|| c == '.'));
}
}
#[test]
fn aliases_are_valid_formats() {
for alias in hyperlink_aliases() {
let (name, format) = (alias.name(), alias.format());
assert!(
format.parse::<HyperlinkFormat>().is_ok(),
"invalid hyperlink alias '{name}': {format}",
);
}
}
}

View File

@@ -1,85 +0,0 @@
/// Aliases to well-known hyperlink schemes.
///
/// These need to be sorted by name.
const HYPERLINK_PATTERN_ALIASES: &[(&str, &str)] = &[
#[cfg(not(windows))]
("default", "file://{host}{path}"),
#[cfg(windows)]
("default", "file://{path}"),
("file", "file://{host}{path}"),
// https://github.com/misaki-web/grepp
("grep+", "grep+://{path}:{line}"),
("kitty", "file://{host}{path}#{line}"),
// https://macvim.org/docs/gui_mac.txt.html#mvim%3A%2F%2F
("macvim", "mvim://open?url=file://{path}&line={line}&column={column}"),
("none", ""),
// https://macromates.com/blog/2007/the-textmate-url-scheme/
("textmate", "txmt://open?url=file://{path}&line={line}&column={column}"),
// https://code.visualstudio.com/docs/editor/command-line#_opening-vs-code-with-urls
("vscode", "vscode://file{path}:{line}:{column}"),
("vscode-insiders", "vscode-insiders://file{path}:{line}:{column}"),
("vscodium", "vscodium://file{path}:{line}:{column}"),
];
/// Look for the hyperlink format defined by the given alias name.
///
/// If one does not exist, `None` is returned.
pub(crate) fn find(name: &str) -> Option<&str> {
HYPERLINK_PATTERN_ALIASES
.binary_search_by_key(&name, |&(name, _)| name)
.map(|i| HYPERLINK_PATTERN_ALIASES[i].1)
.ok()
}
/// Return an iterator over all available alias names and their definitions.
pub(crate) fn iter() -> impl Iterator<Item = (&'static str, &'static str)> {
HYPERLINK_PATTERN_ALIASES.iter().copied()
}
#[cfg(test)]
mod tests {
use crate::HyperlinkFormat;
use super::*;
#[test]
fn is_sorted() {
let mut prev = HYPERLINK_PATTERN_ALIASES
.get(0)
.expect("aliases should be non-empty")
.0;
for &(name, _) in HYPERLINK_PATTERN_ALIASES.iter().skip(1) {
assert!(
name > prev,
"'{prev}' should come before '{name}' in \
HYPERLINK_PATTERN_ALIASES",
);
prev = name;
}
}
#[test]
fn alias_names_are_reasonable() {
for &(name, _) in HYPERLINK_PATTERN_ALIASES.iter() {
// There's no hard rule here, but if we want to define an alias
// with a name that doesn't pass this assert, then we should
// probably flag it as worthy of consideration. For example, we
// really do not want to define an alias that contains `{` or `}`,
// which might confuse it for a variable.
assert!(name.chars().all(|c| c.is_alphanumeric()
|| c == '+'
|| c == '-'
|| c == '.'));
}
}
#[test]
fn aliases_are_valid_formats() {
for (name, definition) in HYPERLINK_PATTERN_ALIASES {
assert!(
definition.parse::<HyperlinkFormat>().is_ok(),
"invalid hyperlink alias '{name}': {definition}",
);
}
}
}

View File

@@ -1,19 +1,19 @@
use std::{
io::{self, Write},
path::Path,
sync::Arc,
time::Instant,
};
use {
grep_matcher::{Match, Matcher},
grep_searcher::{
Searcher, Sink, SinkContext, SinkContextKind, SinkFinish, SinkMatch,
},
grep_searcher::{Searcher, Sink, SinkContext, SinkFinish, SinkMatch},
serde_json as json,
};
use crate::{
counter::CounterWriter, jsont, stats::Stats, util::find_iter_at_in_context,
counter::CounterWriter, jsont, stats::Stats, util::Replacer,
util::find_iter_at_in_context,
};
/// The configuration for the JSON printer.
@@ -24,13 +24,17 @@ use crate::{
#[derive(Debug, Clone)]
struct Config {
pretty: bool,
max_matches: Option<u64>,
always_begin_end: bool,
replacement: Arc<Option<Vec<u8>>>,
}
impl Default for Config {
fn default() -> Config {
Config { pretty: false, max_matches: None, always_begin_end: false }
Config {
pretty: false,
always_begin_end: false,
replacement: Arc::new(None),
}
}
}
@@ -77,16 +81,6 @@ impl JSONBuilder {
self
}
/// Set the maximum amount of matches that are printed.
///
/// If multi line search is enabled and a match spans multiple lines, then
/// that match is counted exactly once for the purposes of enforcing this
/// limit, regardless of how many lines it spans.
pub fn max_matches(&mut self, limit: Option<u64>) -> &mut JSONBuilder {
self.config.max_matches = limit;
self
}
/// When enabled, the `begin` and `end` messages are always emitted, even
/// when no match is found.
///
@@ -98,6 +92,24 @@ impl JSONBuilder {
self.config.always_begin_end = yes;
self
}
/// Set the bytes that will be used to replace each occurrence of a match
/// found.
///
/// The replacement bytes given may include references to capturing groups,
/// which may either be in index form (e.g., `$2`) or can reference named
/// capturing groups if present in the original pattern (e.g., `$foo`).
///
/// For documentation on the full format, please see the `Capture` trait's
/// `interpolate` method in the
/// [grep-printer](https://docs.rs/grep-printer) crate.
pub fn replacement(
&mut self,
replacement: Option<Vec<u8>>,
) -> &mut JSONBuilder {
self.config.replacement = Arc::new(replacement);
self
}
}
/// The JSON printer, which emits results in a JSON lines format.
@@ -256,7 +268,8 @@ impl JSONBuilder {
/// encoded, then the byte offsets correspond to the data after base64
/// decoding.) The `submatch` objects are guaranteed to be sorted by their
/// starting offsets. Note that it is possible for this array to be empty,
/// for example, when searching reports inverted matches.
/// for example, when searching reports inverted matches. If the configuration
/// specifies a replacement, the resulting replacement text is also present.
///
/// #### Message: **context**
///
@@ -286,7 +299,9 @@ impl JSONBuilder {
/// decoding.) The `submatch` objects are guaranteed to be sorted by
/// their starting offsets. Note that it is possible for this array to be
/// non-empty, for example, when searching reports inverted matches such that
/// the original matcher could match things in the contextual lines.
/// the original matcher could match things in the contextual lines. If the
/// configuration specifies a replacemement, the resulting replacement text
/// is also present.
///
/// #### Object: **submatch**
///
@@ -308,6 +323,10 @@ impl JSONBuilder {
/// the `lines` field in the
/// [`match`](#message-match) or [`context`](#message-context)
/// messages.
/// * **replacement** (optional) - An
/// [arbitrary data object](#object-arbitrary-data) corresponding to the
/// replacement text for this submatch, if the configuration specifies
/// a replacement.
///
/// #### Object: **stats**
///
@@ -447,6 +466,23 @@ impl JSONBuilder {
/// }
/// }
/// ```
/// and here's what a match type item would looks like if a replacement text
/// of 'Moriarity' was given as a parameter:
/// ```json
/// {
/// "type": "match",
/// "data": {
/// "path": {"text": "/home/andrew/sherlock"},
/// "lines": {"text": "For the Doctor Watsons of this world, as opposed to the Sherlock\n"},
/// "line_number": 1,
/// "absolute_offset": 0,
/// "submatches": [
/// {"match": {"text": "Watson"}, "replacement": {"text": "Moriarity"}, "start": 15, "end": 21}
/// ]
/// }
/// }
/// ```
#[derive(Clone, Debug)]
pub struct JSON<W> {
config: Config,
@@ -471,11 +507,11 @@ impl<W: io::Write> JSON<W> {
) -> JSONSink<'static, 's, M, W> {
JSONSink {
matcher,
replacer: Replacer::new(),
json: self,
path: None,
start_time: Instant::now(),
match_count: 0,
after_context_remaining: 0,
binary_byte_offset: None,
begin_printed: false,
stats: Stats::new(),
@@ -497,11 +533,11 @@ impl<W: io::Write> JSON<W> {
{
JSONSink {
matcher,
replacer: Replacer::new(),
json: self,
path: Some(path.as_ref()),
start_time: Instant::now(),
match_count: 0,
after_context_remaining: 0,
binary_byte_offset: None,
begin_printed: false,
stats: Stats::new(),
@@ -519,7 +555,7 @@ impl<W: io::Write> JSON<W> {
} else {
json::to_writer(&mut self.wtr, message)?;
}
self.wtr.write(&[b'\n'])?;
let _ = self.wtr.write(b"\n")?; // This will always be Ok(1) when successful.
Ok(())
}
}
@@ -559,11 +595,11 @@ impl<W> JSON<W> {
#[derive(Debug)]
pub struct JSONSink<'p, 's, M: Matcher, W> {
matcher: M,
replacer: Replacer<M>,
json: &'s mut JSON<W>,
path: Option<&'p Path>,
start_time: Instant,
match_count: u64,
after_context_remaining: u64,
binary_byte_offset: Option<u64>,
begin_printed: bool,
stats: Stats,
@@ -643,30 +679,29 @@ impl<'p, 's, M: Matcher, W: io::Write> JSONSink<'p, 's, M, W> {
Ok(())
}
/// Returns true if this printer should quit.
/// If the configuration specifies a replacement, then this executes the
/// replacement, lazily allocating memory if necessary.
///
/// This implements the logic for handling quitting after seeing a certain
/// amount of matches. In most cases, the logic is simple, but we must
/// permit all "after" contextual lines to print after reaching the limit.
fn should_quit(&self) -> bool {
let limit = match self.json.config.max_matches {
None => return false,
Some(limit) => limit,
};
if self.match_count < limit {
return false;
/// To access the result of a replacement, use `replacer.replacement()`.
fn replace(
&mut self,
searcher: &Searcher,
bytes: &[u8],
range: std::ops::Range<usize>,
) -> io::Result<()> {
self.replacer.clear();
if self.json.config.replacement.is_some() {
let replacement =
(*self.json.config.replacement).as_ref().map(|r| &*r).unwrap();
self.replacer.replace_all(
searcher,
&self.matcher,
bytes,
range,
replacement,
)?;
}
self.after_context_remaining == 0
}
/// Returns whether the current match count exceeds the configured limit.
/// If there is no limit, then this always returns false.
fn match_more_than_limit(&self) -> bool {
let limit = match self.json.config.max_matches {
None => return false,
Some(limit) => limit,
};
self.match_count > limit
Ok(())
}
/// Write the "begin" message.
@@ -689,32 +724,23 @@ impl<'p, 's, M: Matcher, W: io::Write> Sink for JSONSink<'p, 's, M, W> {
searcher: &Searcher,
mat: &SinkMatch<'_>,
) -> Result<bool, io::Error> {
self.write_begin_message()?;
self.match_count += 1;
// When we've exceeded our match count, then the remaining context
// lines should not be reset, but instead, decremented. This avoids a
// bug where we display more matches than a configured limit. The main
// idea here is that 'matched' might be called again while printing
// an after-context line. In that case, we should treat this as a
// contextual line rather than a matching line for the purposes of
// termination.
if self.match_more_than_limit() {
self.after_context_remaining =
self.after_context_remaining.saturating_sub(1);
} else {
self.after_context_remaining = searcher.after_context() as u64;
}
self.write_begin_message()?;
self.record_matches(
searcher,
mat.buffer(),
mat.bytes_range_in_buffer(),
)?;
self.replace(searcher, mat.buffer(), mat.bytes_range_in_buffer())?;
self.stats.add_matches(self.json.matches.len() as u64);
self.stats.add_matched_lines(mat.lines().count() as u64);
let submatches = SubMatches::new(mat.bytes(), &self.json.matches);
let submatches = SubMatches::new(
mat.bytes(),
&self.json.matches,
self.replacer.replacement(),
);
let msg = jsont::Message::Match(jsont::Match {
path: self.path,
lines: mat.bytes(),
@@ -723,7 +749,7 @@ impl<'p, 's, M: Matcher, W: io::Write> Sink for JSONSink<'p, 's, M, W> {
submatches: submatches.as_slice(),
});
self.json.write_message(&msg)?;
Ok(!self.should_quit())
Ok(true)
}
fn context(
@@ -734,13 +760,14 @@ impl<'p, 's, M: Matcher, W: io::Write> Sink for JSONSink<'p, 's, M, W> {
self.write_begin_message()?;
self.json.matches.clear();
if ctx.kind() == &SinkContextKind::After {
self.after_context_remaining =
self.after_context_remaining.saturating_sub(1);
}
let submatches = if searcher.invert_match() {
self.record_matches(searcher, ctx.bytes(), 0..ctx.bytes().len())?;
SubMatches::new(ctx.bytes(), &self.json.matches)
self.replace(searcher, ctx.bytes(), 0..ctx.bytes().len())?;
SubMatches::new(
ctx.bytes(),
&self.json.matches,
self.replacer.replacement(),
)
} else {
SubMatches::empty()
};
@@ -752,7 +779,7 @@ impl<'p, 's, M: Matcher, W: io::Write> Sink for JSONSink<'p, 's, M, W> {
submatches: submatches.as_slice(),
});
self.json.write_message(&msg)?;
Ok(!self.should_quit())
Ok(true)
}
fn binary_data(
@@ -776,11 +803,7 @@ impl<'p, 's, M: Matcher, W: io::Write> Sink for JSONSink<'p, 's, M, W> {
self.json.wtr.reset_count();
self.start_time = Instant::now();
self.match_count = 0;
self.after_context_remaining = 0;
self.binary_byte_offset = None;
if self.json.config.max_matches == Some(0) {
return Ok(false);
}
if !self.json.config.always_begin_end {
return Ok(true);
@@ -794,10 +817,6 @@ impl<'p, 's, M: Matcher, W: io::Write> Sink for JSONSink<'p, 's, M, W> {
_searcher: &Searcher,
finish: &SinkFinish,
) -> Result<(), io::Error> {
if !self.begin_printed {
return Ok(());
}
self.binary_byte_offset = finish.binary_byte_offset();
self.stats.add_elapsed(self.start_time.elapsed());
self.stats.add_searches(1);
@@ -807,6 +826,9 @@ impl<'p, 's, M: Matcher, W: io::Write> Sink for JSONSink<'p, 's, M, W> {
self.stats.add_bytes_searched(finish.byte_count());
self.stats.add_bytes_printed(self.json.wtr.count());
if !self.begin_printed {
return Ok(());
}
let msg = jsont::Message::End(jsont::End {
path: self.path,
binary_offset: finish.binary_byte_offset(),
@@ -831,19 +853,27 @@ enum SubMatches<'a> {
impl<'a> SubMatches<'a> {
/// Create a new set of match ranges from a set of matches and the
/// corresponding bytes that those matches apply to.
fn new(bytes: &'a [u8], matches: &[Match]) -> SubMatches<'a> {
fn new(
bytes: &'a [u8],
matches: &[Match],
replacement: Option<(&'a [u8], &'a [Match])>,
) -> SubMatches<'a> {
if matches.len() == 1 {
let mat = matches[0];
SubMatches::Small([jsont::SubMatch {
m: &bytes[mat],
replacement: replacement
.map(|(rbuf, rmatches)| &rbuf[rmatches[0]]),
start: mat.start(),
end: mat.end(),
}])
} else {
let mut match_ranges = vec![];
for &mat in matches {
for (i, &mat) in matches.iter().enumerate() {
match_ranges.push(jsont::SubMatch {
m: &bytes[mat],
replacement: replacement
.map(|(rbuf, rmatches)| &rbuf[rmatches[i]]),
start: mat.start(),
end: mat.end(),
});
@@ -873,7 +903,7 @@ mod tests {
use grep_regex::{RegexMatcher, RegexMatcherBuilder};
use grep_searcher::SearcherBuilder;
use super::{JSONBuilder, JSON};
use super::{JSON, JSONBuilder};
const SHERLOCK: &'static [u8] = b"\
For the Doctor Watsons of this world, as opposed to the Sherlock
@@ -919,9 +949,9 @@ and exhibited clearly, with a label attached.\
#[test]
fn max_matches() {
let matcher = RegexMatcher::new(r"Watson").unwrap();
let mut printer =
JSONBuilder::new().max_matches(Some(1)).build(vec![]);
let mut printer = JSONBuilder::new().build(vec![]);
SearcherBuilder::new()
.max_matches(Some(1))
.build()
.search_reader(&matcher, SHERLOCK, printer.sink(&matcher))
.unwrap();
@@ -946,10 +976,10 @@ d
e
";
let matcher = RegexMatcher::new(r"d").unwrap();
let mut printer =
JSONBuilder::new().max_matches(Some(1)).build(vec![]);
let mut printer = JSONBuilder::new().build(vec![]);
SearcherBuilder::new()
.after_context(2)
.max_matches(Some(1))
.build()
.search_reader(
&matcher,

View File

@@ -135,6 +135,7 @@ impl<'a> serde::Serialize for Context<'a> {
pub(crate) struct SubMatch<'a> {
pub(crate) m: &'a [u8],
pub(crate) replacement: Option<&'a [u8]>,
pub(crate) start: usize,
pub(crate) end: usize,
}
@@ -148,6 +149,9 @@ impl<'a> serde::Serialize for SubMatch<'a> {
let mut state = s.serialize_struct("SubMatch", 3)?;
state.serialize_field("match", &Data::from_bytes(self.m))?;
if let Some(r) = self.replacement {
state.serialize_field("replacement", &Data::from_bytes(r))?;
}
state.serialize_field("start", &self.start)?;
state.serialize_field("end", &self.end)?;
state.end()
@@ -186,7 +190,7 @@ impl<'a> Data<'a> {
}
#[cfg(not(unix))]
fn from_path(path: &Path) -> Data {
fn from_path(path: &Path) -> Data<'_> {
// Using lossy conversion means some paths won't round trip precisely,
// but it's not clear what we should actually do. Serde rejects
// non-UTF-8 paths, and OsStr's are serialized as a sequence of UTF-16

View File

@@ -58,22 +58,22 @@ assert_eq!(output, expected);
*/
#![deny(missing_docs)]
#![cfg_attr(docsrs, feature(doc_auto_cfg))]
#![cfg_attr(docsrs, feature(doc_cfg))]
pub use crate::{
color::{default_color_specs, ColorError, ColorSpecs, UserColorSpec},
color::{ColorError, ColorSpecs, UserColorSpec, default_color_specs},
hyperlink::{
HyperlinkConfig, HyperlinkEnvironment, HyperlinkFormat,
HyperlinkFormatError,
HyperlinkAlias, HyperlinkConfig, HyperlinkEnvironment,
HyperlinkFormat, HyperlinkFormatError, hyperlink_aliases,
},
path::{PathPrinter, PathPrinterBuilder},
standard::{Standard, StandardBuilder, StandardSink},
standard::{SquashMode, Standard, StandardBuilder, StandardSink},
stats::Stats,
summary::{Summary, SummaryBuilder, SummaryKind, SummarySink},
};
#[cfg(feature = "serde")]
pub use crate::json::{JSONBuilder, JSONSink, JSON};
pub use crate::json::{JSON, JSONBuilder, JSONSink};
// The maximum number of bytes to execute a search to account for look-ahead.
//
@@ -92,7 +92,6 @@ mod macros;
mod color;
mod counter;
mod hyperlink;
mod hyperlink_aliases;
#[cfg(feature = "serde")]
mod json;
#[cfg(feature = "serde")]

File diff suppressed because it is too large Load Diff

View File

@@ -17,7 +17,7 @@ use crate::{
counter::CounterWriter,
hyperlink::{self, HyperlinkConfig},
stats::Stats,
util::{find_iter_at_in_context, PrinterPath},
util::{PrinterPath, find_iter_at_in_context},
};
/// The configuration for the summary printer.
@@ -32,7 +32,6 @@ struct Config {
hyperlink: HyperlinkConfig,
stats: bool,
path: bool,
max_matches: Option<u64>,
exclude_zero: bool,
separator_field: Arc<Vec<u8>>,
separator_path: Option<u8>,
@@ -47,7 +46,6 @@ impl Default for Config {
hyperlink: HyperlinkConfig::default(),
stats: false,
path: true,
max_matches: None,
exclude_zero: true,
separator_field: Arc::new(b":".to_vec()),
separator_path: None,
@@ -87,7 +85,13 @@ pub enum SummaryKind {
///
/// Note that if `stats` is enabled, then searching continues in order to
/// compute statistics.
Quiet,
QuietWithMatch,
/// Don't show any output and the stop the search once a non-matching file
/// is found.
///
/// Note that if `stats` is enabled, then searching continues in order to
/// compute statistics.
QuietWithoutMatch,
}
impl SummaryKind {
@@ -101,7 +105,7 @@ impl SummaryKind {
match *self {
PathWithMatch | PathWithoutMatch => true,
Count | CountMatches | Quiet => false,
Count | CountMatches | QuietWithMatch | QuietWithoutMatch => false,
}
}
@@ -112,7 +116,8 @@ impl SummaryKind {
match *self {
CountMatches => true,
Count | PathWithMatch | PathWithoutMatch | Quiet => false,
Count | PathWithMatch | PathWithoutMatch | QuietWithMatch
| QuietWithoutMatch => false,
}
}
@@ -122,8 +127,10 @@ impl SummaryKind {
use self::SummaryKind::*;
match *self {
PathWithMatch | Quiet => true,
Count | CountMatches | PathWithoutMatch => false,
PathWithMatch | QuietWithMatch => true,
Count | CountMatches | PathWithoutMatch | QuietWithoutMatch => {
false
}
}
}
}
@@ -246,9 +253,9 @@ impl SummaryBuilder {
///
/// When this is enabled, this printer may need to do extra work in order
/// to compute certain statistics, which could cause the search to take
/// longer. For example, in `Quiet` mode, a search can quit after finding
/// the first match, but if `stats` is enabled, then the search will
/// continue after the first match in order to compute statistics.
/// longer. For example, in `QuietWithMatch` mode, a search can quit after
/// finding the first match, but if `stats` is enabled, then the search
/// will continue after the first match in order to compute statistics.
///
/// For a complete description of available statistics, see [`Stats`].
///
@@ -273,18 +280,6 @@ impl SummaryBuilder {
self
}
/// Set the maximum amount of matches that are printed.
///
/// If multi line search is enabled and a match spans multiple lines, then
/// that match is counted exactly once for the purposes of enforcing this
/// limit, regardless of how many lines it spans.
///
/// This is disabled by default.
pub fn max_matches(&mut self, limit: Option<u64>) -> &mut SummaryBuilder {
self.config.max_matches = limit;
self
}
/// Exclude count-related summary results with no matches.
///
/// When enabled and the mode is either `Count` or `CountMatches`, then
@@ -505,7 +500,9 @@ impl<'p, 's, M: Matcher, W: WriteColor> SummarySink<'p, 's, M, W> {
/// search.
pub fn has_match(&self) -> bool {
match self.summary.config.kind {
SummaryKind::PathWithoutMatch => self.match_count == 0,
SummaryKind::PathWithoutMatch | SummaryKind::QuietWithoutMatch => {
self.match_count == 0
}
_ => self.match_count > 0,
}
}
@@ -544,19 +541,6 @@ impl<'p, 's, M: Matcher, W: WriteColor> SummarySink<'p, 's, M, W> {
searcher.multi_line_with_matcher(&self.matcher)
}
/// Returns true if this printer should quit.
///
/// This implements the logic for handling quitting after seeing a certain
/// amount of matches. In most cases, the logic is simple, but we must
/// permit all "after" contextual lines to print after reaching the limit.
fn should_quit(&self) -> bool {
let limit = match self.summary.config.max_matches {
None => return false,
Some(limit) => limit,
};
self.match_count >= limit
}
/// If this printer has a file path associated with it, then this will
/// write that path to the underlying writer followed by a line terminator.
/// (If a path terminator is set, then that is used instead of the line
@@ -672,7 +656,11 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for SummarySink<'p, 's, M, W> {
true
},
)?;
count
// Because of `find_iter_at_in_context` being a giant
// kludge internally, it's possible that it won't find
// *any* matches even though we clearly know that there is
// at least one. So make sure we record at least one here.
count.max(1)
};
if is_multi_line {
self.match_count += sink_match_count;
@@ -685,7 +673,7 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for SummarySink<'p, 's, M, W> {
} else if self.summary.config.kind.quit_early() {
return Ok(false);
}
Ok(!self.should_quit())
Ok(true)
}
fn binary_data(
@@ -716,10 +704,6 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for SummarySink<'p, 's, M, W> {
self.start_time = Instant::now();
self.match_count = 0;
self.binary_byte_offset = None;
if self.summary.config.max_matches == Some(0) {
return Ok(false);
}
Ok(true)
}
@@ -749,14 +733,14 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for SummarySink<'p, 's, M, W> {
// don't quit and therefore search the entire contents of the file.
//
// There is an unfortunate inconsistency here. Namely, when using
// Quiet or PathWithMatch, then the printer can quit after the first
// match seen, which could be long before seeing binary data. This
// means that using PathWithMatch can print a path where as using
// QuietWithMatch or PathWithMatch, then the printer can quit after the
// first match seen, which could be long before seeing binary data.
// This means that using PathWithMatch can print a path where as using
// Count might not print it at all because of binary data.
//
// It's not possible to fix this without also potentially significantly
// impacting the performance of Quiet or PathWithMatch, so we accept
// the bug.
// impacting the performance of QuietWithMatch or PathWithMatch, so we
// accept the bug.
if self.binary_byte_offset.is_some()
&& searcher.binary_detection().quit_byte().is_some()
{
@@ -798,7 +782,7 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for SummarySink<'p, 's, M, W> {
self.write_path_line(searcher)?;
}
}
SummaryKind::Quiet => {}
SummaryKind::QuietWithMatch | SummaryKind::QuietWithoutMatch => {}
}
Ok(())
}
@@ -1012,9 +996,9 @@ and exhibited clearly, with a label attached.
let matcher = RegexMatcher::new(r"Watson").unwrap();
let mut printer = SummaryBuilder::new()
.kind(SummaryKind::Count)
.max_matches(Some(1))
.build_no_color(vec![]);
SearcherBuilder::new()
.max_matches(Some(1))
.build()
.search_reader(&matcher, SHERLOCK, printer.sink(&matcher))
.unwrap();
@@ -1122,7 +1106,7 @@ and exhibited clearly, with a label attached.
fn quiet() {
let matcher = RegexMatcher::new(r"Watson|Sherlock").unwrap();
let mut printer = SummaryBuilder::new()
.kind(SummaryKind::Quiet)
.kind(SummaryKind::QuietWithMatch)
.build_no_color(vec![]);
let match_count = {
let mut sink = printer.sink_with_path(&matcher, "sherlock");
@@ -1144,7 +1128,7 @@ and exhibited clearly, with a label attached.
fn quiet_with_stats() {
let matcher = RegexMatcher::new(r"Watson|Sherlock").unwrap();
let mut printer = SummaryBuilder::new()
.kind(SummaryKind::Quiet)
.kind(SummaryKind::QuietWithMatch)
.stats(true)
.build_no_color(vec![]);
let match_count = {

View File

@@ -8,7 +8,7 @@ use {
},
};
use crate::{hyperlink::HyperlinkPath, MAX_LOOK_AHEAD};
use crate::{MAX_LOOK_AHEAD, hyperlink::HyperlinkPath};
/// A type for handling replacements while amortizing allocation.
pub(crate) struct Replacer<M: Matcher> {
@@ -59,19 +59,24 @@ impl<M: Matcher> Replacer<M> {
// See the giant comment in 'find_iter_at_in_context' below for why we
// do this dance.
let is_multi_line = searcher.multi_line_with_matcher(&matcher);
if is_multi_line {
// Get the line_terminator that was removed (if any) so we can add it
// back.
let line_terminator = if is_multi_line {
if haystack[range.end..].len() >= MAX_LOOK_AHEAD {
haystack = &haystack[..range.end + MAX_LOOK_AHEAD];
}
&[]
} else {
// When searching a single line, we should remove the line
// terminator. Otherwise, it's possible for the regex (via
// look-around) to observe the line terminator and not match
// because of it.
let mut m = Match::new(0, range.end);
trim_line_terminator(searcher, haystack, &mut m);
let line_terminator =
trim_line_terminator(searcher, haystack, &mut m);
haystack = &haystack[..m.end()];
}
line_terminator
};
{
let &mut Space { ref mut dst, ref mut caps, ref mut matches } =
self.allocate(matcher)?;
@@ -81,6 +86,7 @@ impl<M: Matcher> Replacer<M> {
replace_with_captures_in_context(
matcher,
haystack,
line_terminator,
range.clone(),
caps,
dst,
@@ -508,6 +514,8 @@ where
// Otherwise, it's possible for the regex (via look-around) to observe
// the line terminator and not match because of it.
let mut m = Match::new(0, range.end);
// No need to rember the line terminator as we aren't doing a replace
// here.
trim_line_terminator(searcher, bytes, &mut m);
bytes = &bytes[..m.end()];
}
@@ -523,19 +531,23 @@ where
/// Given a buf and some bounds, if there is a line terminator at the end of
/// the given bounds in buf, then the bounds are trimmed to remove the line
/// terminator.
pub(crate) fn trim_line_terminator(
/// terminator, returning the slice of the removed line terminator (if any).
pub(crate) fn trim_line_terminator<'b>(
searcher: &Searcher,
buf: &[u8],
buf: &'b [u8],
line: &mut Match,
) {
) -> &'b [u8] {
let lineterm = searcher.line_terminator();
if lineterm.is_suffix(&buf[*line]) {
let mut end = line.end() - 1;
if lineterm.is_crlf() && end > 0 && buf.get(end - 1) == Some(&b'\r') {
end -= 1;
}
let orig_end = line.end();
*line = line.with_end(end);
&buf[end..orig_end]
} else {
&[]
}
}
@@ -545,6 +557,7 @@ pub(crate) fn trim_line_terminator(
fn replace_with_captures_in_context<M, F>(
matcher: M,
bytes: &[u8],
line_terminator: &[u8],
range: std::ops::Range<usize>,
caps: &mut M::Captures,
dst: &mut Vec<u8>,
@@ -564,8 +577,14 @@ where
last_match = m.end();
append(caps, dst)
})?;
let end = std::cmp::min(bytes.len(), range.end);
let end = if last_match > range.end {
bytes.len()
} else {
std::cmp::min(bytes.len(), range.end)
};
dst.extend(&bytes[last_match..end]);
// Add back any line terminator.
dst.extend(line_terminator);
Ok(())
}

View File

@@ -1,6 +1,6 @@
[package]
name = "grep-regex"
version = "0.1.12" #:version
version = "0.1.14" #:version
authors = ["Andrew Gallant <jamslam@gmail.com>"]
description = """
Use Rust's regex library with the 'grep' crate.
@@ -11,11 +11,11 @@ repository = "https://github.com/BurntSushi/ripgrep/tree/master/crates/regex"
readme = "README.md"
keywords = ["regex", "grep", "search", "pattern", "line"]
license = "Unlicense OR MIT"
edition = "2021"
edition = "2024"
[dependencies]
bstr = "1.6.2"
grep-matcher = { version = "0.1.7", path = "../matcher" }
grep-matcher = { version = "0.1.8", path = "../matcher" }
log = "0.4.20"
regex-automata = { version = "0.4.0" }
regex-syntax = "0.8.0"

View File

@@ -9,7 +9,7 @@ pub(crate) fn check(expr: &Hir, byte: u8) -> Result<(), Error> {
assert!(byte.is_ascii(), "ban byte must be ASCII");
let ch = char::from(byte);
let invalid = || Err(Error::new(ErrorKind::Banned(byte)));
match expr.kind() {
match *expr.kind() {
HirKind::Empty => {}
HirKind::Literal(hir::Literal(ref lit)) => {
if lit.iter().find(|&&b| b == byte).is_some() {

View File

@@ -233,7 +233,7 @@ impl ConfiguredHIR {
&self.config
}
/// Return a reference to the underyling HIR.
/// Return a reference to the underlying HIR.
pub(crate) fn hir(&self) -> &Hir {
&self.hir
}
@@ -341,11 +341,7 @@ impl ConfiguredHIR {
/// Returns the "end line" anchor for this configuration.
fn line_anchor_end(&self) -> hir::Look {
if self.config.crlf {
hir::Look::EndCRLF
} else {
hir::Look::EndLF
}
if self.config.crlf { hir::Look::EndCRLF } else { hir::Look::EndLF }
}
}

View File

@@ -1,9 +1,8 @@
use {
regex_automata::meta::Regex,
regex_syntax::hir::{
self,
self, Hir,
literal::{Literal, Seq},
Hir,
},
};
@@ -15,7 +14,7 @@ use crate::{config::ConfiguredHIR, error::Error};
/// that are in turn used to build a simpler regex that is more amenable to
/// optimization.
///
/// The main idea underyling the validity of this technique is the fact
/// The main idea underlying the validity of this technique is the fact
/// that ripgrep searches individuals lines and not across lines. (Unless
/// -U/--multiline is enabled.) Namely, we can pluck literals out of the regex,
/// search for them, find the bounds of the line in which that literal occurs
@@ -223,11 +222,7 @@ impl Extractor {
// extracting prefixes or suffixes.
seq = self.cross(seq, self.extract(hir));
}
if let Some(prev) = prev {
prev.choose(seq)
} else {
seq
}
if let Some(prev) = prev { prev.choose(seq) } else { seq }
}
/// Extract a sequence from the given alternation.
@@ -430,6 +425,7 @@ impl Extractor {
}
seq1.union(seq2);
assert!(seq1.len().map_or(true, |x| x <= self.limit_total));
seq1.prefix = seq1.prefix && seq2.prefix;
seq1
}
@@ -586,10 +582,15 @@ impl TSeq {
lits.iter().any(is_poisonous)
}
/// Compare the two sequences and return the one that is believed to be best
/// according to a hodge podge of heuristics.
/// Compare the two sequences and return the one that is believed to be
/// best according to a hodge podge of heuristics.
fn choose(self, other: TSeq) -> TSeq {
let (seq1, seq2) = (self, other);
let (mut seq1, mut seq2) = (self, other);
// Whichever one we pick, by virtue of picking one, we choose
// to not take the other. So we must consider the result inexact.
seq1.make_inexact();
seq2.make_inexact();
if !seq1.is_finite() {
return seq2;
} else if !seq2.is_finite() {
@@ -681,7 +682,7 @@ mod tests {
assert_eq!(e(r"foo"), seq([E("foo")]));
assert_eq!(e(r"[a-z]foo[a-z]"), seq([I("foo")]));
assert_eq!(e(r"[a-z](foo)(bar)[a-z]"), seq([I("foobar")]));
assert_eq!(e(r"[a-z]([a-z]foo)(bar[a-z])[a-z]"), seq([I("foobar")]));
assert_eq!(e(r"[a-z]([a-z]foo)(bar[a-z])[a-z]"), seq([I("foo")]));
assert_eq!(e(r"[a-z]([a-z]foo)([a-z]foo)[a-z]"), seq([I("foo")]));
assert_eq!(e(r"(\d{1,3}\.){3}\d{1,3}"), seq([I(".")]));
assert_eq!(e(r"[a-z]([a-z]foo){3}[a-z]"), seq([I("foo")]));
@@ -689,7 +690,7 @@ mod tests {
assert_eq!(e(r"[a-z]([a-z]foo[a-z]){3}[a-z]"), seq([I("foo")]));
assert_eq!(
e(r"[a-z]([a-z]foo){3}(bar[a-z]){3}[a-z]"),
seq([I("foobar")])
seq([I("foo")])
);
}
@@ -935,14 +936,14 @@ mod tests {
assert_eq!(Seq::infinite(), e(r"[A-Z]+"));
assert_eq!(seq([I("1")]), e(r"1[A-Z]"));
assert_eq!(seq([I("1")]), e(r"1[A-Z]2"));
assert_eq!(seq([E("123")]), e(r"[A-Z]+123"));
assert_eq!(seq([I("123")]), e(r"[A-Z]+123"));
assert_eq!(seq([I("123")]), e(r"[A-Z]+123[A-Z]+"));
assert_eq!(Seq::infinite(), e(r"1|[A-Z]|3"));
assert_eq!(seq([E("1"), I("2"), E("3")]), e(r"1|2[A-Z]|3"),);
assert_eq!(seq([E("1"), I("2"), E("3")]), e(r"1|[A-Z]2[A-Z]|3"),);
assert_eq!(seq([E("1"), E("2"), E("3")]), e(r"1|[A-Z]2|3"),);
assert_eq!(seq([E("1"), I("2"), E("3")]), e(r"1|[A-Z]2|3"),);
assert_eq!(seq([E("1"), I("2"), E("4")]), e(r"1|2[A-Z]3|4"),);
assert_eq!(seq([E("2")]), e(r"(?:|1)[A-Z]2"));
assert_eq!(seq([I("2")]), e(r"(?:|1)[A-Z]2"));
assert_eq!(inexact([I("a")]), e(r"a.z"));
}
@@ -1005,4 +1006,11 @@ mod tests {
let s = e(r"foobarfoo|foo| |foofoo");
assert_eq!(Seq::infinite(), s);
}
// Regression test for: https://github.com/BurntSushi/ripgrep/issues/2884
#[test]
fn case_insensitive_alternation() {
let s = e(r"(?i:e.x|ex)");
assert_eq!(s, seq([I("X"), I("x")]));
}
}

View File

@@ -4,8 +4,8 @@ use {
NoError,
},
regex_automata::{
meta::Regex, util::captures::Captures as AutomataCaptures, Input,
PatternID,
Input, PatternID, meta::Regex,
util::captures::Captures as AutomataCaptures,
},
};
@@ -552,8 +552,6 @@ impl RegexCaptures {
#[cfg(test)]
mod tests {
use grep_matcher::{LineMatchKind, Matcher};
use super::*;
// Test that enabling word matches does the right thing and demonstrate
@@ -589,10 +587,12 @@ mod tests {
// and the regex could not be modified to remove a line terminator.
#[test]
fn line_terminator_error() {
assert!(RegexMatcherBuilder::new()
.line_terminator(Some(b'\n'))
.build(r"a\nz")
.is_err())
assert!(
RegexMatcherBuilder::new()
.line_terminator(Some(b'\n'))
.build(r"a\nz")
.is_err()
)
}
// Test that enabling CRLF permits `$` to match at the end of a line.

View File

@@ -122,7 +122,7 @@ fn strip_from_match_ascii(expr: Hir, byte: u8) -> Result<Hir, Error> {
mod tests {
use regex_syntax::Parser;
use super::{strip_from_match, LineTerminator};
use super::{LineTerminator, strip_from_match};
use crate::error::Error;
fn roundtrip(pattern: &str, byte: u8) -> String {

View File

@@ -1,6 +1,6 @@
[package]
name = "grep-searcher"
version = "0.1.13" #:version
version = "0.1.16" #:version
authors = ["Andrew Gallant <jamslam@gmail.com>"]
description = """
Fast line oriented regex searching as a library.
@@ -11,23 +11,22 @@ repository = "https://github.com/BurntSushi/ripgrep/tree/master/crates/searcher"
readme = "README.md"
keywords = ["regex", "grep", "egrep", "search", "pattern"]
license = "Unlicense OR MIT"
edition = "2021"
edition = "2024"
[dependencies]
bstr = { version = "1.6.2", default-features = false, features = ["std"] }
encoding_rs = "0.8.33"
encoding_rs_io = "0.1.7"
grep-matcher = { version = "0.1.7", path = "../matcher" }
grep-matcher = { version = "0.1.8", path = "../matcher" }
log = "0.4.20"
memchr = "2.6.3"
memmap = { package = "memmap2", version = "0.9.0" }
[dev-dependencies]
grep-regex = { version = "0.1.12", path = "../regex" }
grep-regex = { version = "0.1.14", path = "../regex" }
regex = "1.9.5"
[features]
simd-accel = ["encoding_rs/simd-accel"]
# This feature is DEPRECATED. Runtime dispatch is used for SIMD now.
# These features are DEPRECATED. Runtime dispatch is used for SIMD now.
simd-accel = []
avx-accel = []

View File

@@ -4,8 +4,8 @@ use std::io;
use std::process;
use grep_regex::RegexMatcher;
use grep_searcher::sinks::UTF8;
use grep_searcher::Searcher;
use grep_searcher::sinks::UTF8;
fn main() {
if let Err(err) = example() {
@@ -18,7 +18,7 @@ fn example() -> Result<(), Box<dyn Error>> {
let pattern = match env::args().nth(1) {
Some(pattern) => pattern,
None => {
return Err(From::from(format!("Usage: search-stdin <pattern>")))
return Err(From::from(format!("Usage: search-stdin <pattern>")));
}
};
let matcher = RegexMatcher::new(&pattern)?;

View File

@@ -90,8 +90,8 @@ pub use crate::{
SearcherBuilder,
},
sink::{
sinks, Sink, SinkContext, SinkContextKind, SinkError, SinkFinish,
SinkMatch,
Sink, SinkContext, SinkContextKind, SinkError, SinkFinish, SinkMatch,
sinks,
},
};

View File

@@ -538,6 +538,11 @@ fn replace_bytes(
while let Some(i) = bytes.find_byte(src) {
bytes[i] = replacement;
bytes = &mut bytes[i + 1..];
// To search for adjacent `src` bytes we use a different strategy.
// Since binary data tends to have long runs of NUL terminators,
// it is faster to compare one-byte-at-a-time than to stop and start
// memchr (through `find_byte`) for every byte in a sequence.
while bytes.get(0) == Some(&src) {
bytes[0] = replacement;
bytes = &mut bytes[1..];
@@ -548,7 +553,7 @@ fn replace_bytes(
#[cfg(test)]
mod tests {
use bstr::{ByteSlice, ByteVec};
use bstr::ByteVec;
use super::*;
@@ -577,6 +582,9 @@ and exhibited clearly, with a label attached.\
#[test]
fn replace() {
assert_eq!(replace_str("", b'b', b'z'), (s(""), None));
assert_eq!(replace_str("a", b'a', b'a'), (s("a"), None));
assert_eq!(replace_str("a", b'b', b'z'), (s("a"), None));
assert_eq!(replace_str("abc", b'b', b'z'), (s("azc"), Some(1)));
assert_eq!(replace_str("abb", b'b', b'z'), (s("azz"), Some(1)));
assert_eq!(replace_str("aba", b'a', b'z'), (s("zbz"), Some(0)));

View File

@@ -198,8 +198,6 @@ fn preceding_by_pos(
#[cfg(test)]
mod tests {
use grep_matcher::Match;
use super::*;
const SHERLOCK: &'static str = "\

View File

@@ -33,6 +33,7 @@ pub(crate) struct Core<'s, M: 's, S> {
after_context_left: usize,
has_sunk: bool,
has_matched: bool,
count: u64,
}
impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
@@ -59,6 +60,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
after_context_left: 0,
has_sunk: false,
has_matched: false,
count: 0,
};
if !core.searcher.multi_line_with_matcher(&core.matcher) {
if core.is_line_by_line_fast() {
@@ -78,6 +80,14 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
self.pos = pos;
}
fn count(&self) -> u64 {
self.count
}
fn increment_count(&mut self) {
self.count += 1;
}
pub(crate) fn binary_byte_offset(&self) -> Option<u64> {
self.binary_byte_offset.map(|offset| offset as u64)
}
@@ -101,6 +111,47 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
self.sink.binary_data(&self.searcher, binary_byte_offset)
}
fn is_match(&self, line: &[u8]) -> Result<bool, S::Error> {
// We need to strip the line terminator here to match the
// semantics of line-by-line searching. Namely, regexes
// like `(?m)^$` can match at the final position beyond a
// line terminator, which is non-sensical in line oriented
// matching.
let line = lines::without_terminator(line, self.config.line_term);
self.matcher.is_match(line).map_err(S::Error::error_message)
}
pub(crate) fn find(
&mut self,
slice: &[u8],
) -> Result<Option<Range>, S::Error> {
if self.has_exceeded_match_limit() {
return Ok(None);
}
match self.matcher().find(slice) {
Err(err) => Err(S::Error::error_message(err)),
Ok(None) => Ok(None),
Ok(Some(m)) => {
self.increment_count();
Ok(Some(m))
}
}
}
fn shortest_match(
&mut self,
slice: &[u8],
) -> Result<Option<usize>, S::Error> {
if self.has_exceeded_match_limit() {
return Ok(None);
}
match self.matcher.shortest_match(slice) {
Err(err) => return Err(S::Error::error_message(err)),
Ok(None) => return Ok(None),
Ok(Some(m)) => Ok(Some(m)),
}
}
pub(crate) fn begin(&mut self) -> Result<bool, S::Error> {
self.sink.begin(&self.searcher)
}
@@ -140,10 +191,14 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
// separator (when before_context==0 and after_context>0), we
// need to know something about the position of the previous
// line visited, even if we're at the beginning of the buffer.
//
// ... however, we only need to find the N preceding lines based
// on before context. We can skip this (potentially costly, for
// large values of N) step when before_context==0.
let context_start = lines::preceding(
buf,
self.config.line_term.as_byte(),
self.config.max_context(),
self.config.before_context,
);
let consumed =
std::cmp::max(context_start, self.last_line_visited);
@@ -157,6 +212,18 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
consumed
}
pub(crate) fn advance_buffer(&mut self, buf: &[u8], consumed: usize) {
if consumed == 0 {
return;
}
self.count_lines(buf, consumed);
self.absolute_byte_offset += consumed as u64;
self.last_line_counted = 0;
self.last_line_visited =
self.last_line_visited.saturating_sub(consumed);
self.set_pos(self.pos().saturating_sub(consumed));
}
pub(crate) fn detect_binary(
&mut self,
buf: &[u8],
@@ -226,6 +293,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
if self.after_context_left == 0 {
return Ok(true);
}
let exceeded_match_limit = self.has_exceeded_match_limit();
let range = Range::new(self.last_line_visited, upto);
let mut stepper = LineStep::new(
self.config.line_term.as_byte(),
@@ -233,7 +301,16 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
range.end(),
);
while let Some(line) = stepper.next_match(buf) {
if !self.sink_after_context(buf, &line)? {
if exceeded_match_limit
&& self.is_match(&buf[line])? != self.config.invert_match
{
let after_context_left = self.after_context_left;
self.set_pos(line.end());
if !self.sink_matched(buf, &line)? {
return Ok(false);
}
self.after_context_left = after_context_left - 1;
} else if !self.sink_after_context(buf, &line)? {
return Ok(false);
}
if self.after_context_left == 0 {
@@ -272,6 +349,12 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
range.end(),
);
while let Some(line) = stepper.next_match(buf) {
if self.has_exceeded_match_limit()
&& !self.config.passthru
&& self.after_context_left == 0
{
return Ok(false);
}
let matched = {
// Stripping the line terminator is necessary to prevent some
// classes of regexes from matching the empty position *after*
@@ -281,15 +364,14 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
&buf[line],
self.config.line_term,
);
match self.matcher.shortest_match(slice) {
Err(err) => return Err(S::Error::error_message(err)),
Ok(result) => result.is_some(),
}
self.shortest_match(slice)?.is_some()
};
self.set_pos(line.end());
let success = matched != self.config.invert_match;
if success {
self.has_matched = true;
self.increment_count();
if !self.before_context_by_line(buf, line.start())? {
return Ok(false);
}
@@ -325,10 +407,11 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
}
if self.config.invert_match {
if !self.match_by_line_fast_invert(buf)? {
return Ok(Stop);
break;
}
} else if let Some(line) = self.find_by_line_fast(buf)? {
self.has_matched = true;
self.increment_count();
if self.config.max_context() > 0 {
if !self.after_context_by_line(buf, line.start())? {
return Ok(Stop);
@@ -348,6 +431,9 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
if !self.after_context_by_line(buf, buf.len())? {
return Ok(Stop);
}
if self.has_exceeded_match_limit() && self.after_context_left == 0 {
return Ok(Stop);
}
self.set_pos(buf.len());
Ok(Continue)
}
@@ -387,16 +473,20 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
invert_match.end(),
);
while let Some(line) = stepper.next_match(buf) {
self.increment_count();
if !self.sink_matched(buf, &line)? {
return Ok(false);
}
if self.has_exceeded_match_limit() {
return Ok(false);
}
}
Ok(true)
}
#[inline(always)]
fn find_by_line_fast(
&self,
&mut self,
buf: &[u8],
) -> Result<Option<Range>, S::Error> {
debug_assert!(!self.searcher.multi_line_with_matcher(&self.matcher));
@@ -404,6 +494,9 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
let mut pos = self.pos();
while !buf[pos..].is_empty() {
if self.has_exceeded_match_limit() {
return Ok(None);
}
match self.matcher.find_candidate_line(&buf[pos..]) {
Err(err) => return Err(S::Error::error_message(err)),
Ok(None) => return Ok(None),
@@ -427,23 +520,10 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
self.config.line_term.as_byte(),
Range::zero(i).offset(pos),
);
// We need to strip the line terminator here to match the
// semantics of line-by-line searching. Namely, regexes
// like `(?m)^$` can match at the final position beyond a
// line terminator, which is non-sensical in line oriented
// matching.
let slice = lines::without_terminator(
&buf[line],
self.config.line_term,
);
match self.matcher.is_match(slice) {
Err(err) => return Err(S::Error::error_message(err)),
Ok(true) => return Ok(Some(line)),
Ok(false) => {
pos = line.end();
continue;
}
if self.is_match(&buf[line])? {
return Ok(Some(line));
}
pos = line.end();
}
}
}
@@ -638,4 +718,8 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
}
false
}
fn has_exceeded_match_limit(&self) -> bool {
self.config.max_matches.map_or(false, |limit| self.count() >= limit)
}
}

Some files were not shown because too many files have changed in this diff Show More