Compare commits
2 Commits
grep-searc
...
ignore-0.3
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
71b5b9c22c | ||
|
|
3f505931bd |
3
.gitignore
vendored
3
.gitignore
vendored
@@ -6,7 +6,6 @@ target
|
||||
/ignore/Cargo.lock
|
||||
/termcolor/Cargo.lock
|
||||
/wincolor/Cargo.lock
|
||||
/deployment
|
||||
|
||||
# Snapcraft files
|
||||
stage
|
||||
@@ -14,4 +13,4 @@ prime
|
||||
parts
|
||||
*.snap
|
||||
*.pyc
|
||||
ripgrep*_source.tar.bz2
|
||||
ripgrep*_source.tar.bz2
|
||||
109
.travis.yml
109
.travis.yml
@@ -1,31 +1,21 @@
|
||||
language: rust
|
||||
dist: xenial
|
||||
|
||||
env:
|
||||
global:
|
||||
- PROJECT_NAME: ripgrep
|
||||
- PROJECT_NAME=ripgrep
|
||||
- RUST_BACKTRACE: full
|
||||
|
||||
addons:
|
||||
apt:
|
||||
packages:
|
||||
# For generating man page.
|
||||
- libxslt1-dev
|
||||
- asciidoc
|
||||
- docbook-xsl
|
||||
- xsltproc
|
||||
- libxml2-utils
|
||||
# Needed for completion-function test.
|
||||
# Needed for completion-function test
|
||||
- zsh
|
||||
# Needed for testing decompression search.
|
||||
- xz-utils
|
||||
- liblz4-tool
|
||||
# For building MUSL static builds on Linux.
|
||||
- musl-tools
|
||||
|
||||
matrix:
|
||||
fast_finish: true
|
||||
include:
|
||||
# Nightly channel.
|
||||
# All *nix releases are done on the nightly channel to take advantage
|
||||
# of the regex library's multiple pattern SIMD search.
|
||||
# (All *nix releases are done on the nightly channel to take advantage
|
||||
# of the regex library's multiple pattern SIMD search.)
|
||||
- os: linux
|
||||
rust: nightly
|
||||
env: TARGET=i686-unknown-linux-musl
|
||||
@@ -34,77 +24,58 @@ matrix:
|
||||
env: TARGET=x86_64-unknown-linux-musl
|
||||
- os: osx
|
||||
rust: nightly
|
||||
# XML_CATALOG_FILES is apparently necessary for asciidoc on macOS.
|
||||
env: TARGET=x86_64-apple-darwin XML_CATALOG_FILES=/usr/local/etc/xml/catalog
|
||||
- os: linux
|
||||
rust: nightly
|
||||
env: TARGET=arm-unknown-linux-gnueabihf GCC_VERSION=4.8
|
||||
addons:
|
||||
apt:
|
||||
packages:
|
||||
- gcc-4.8-arm-linux-gnueabihf
|
||||
- binutils-arm-linux-gnueabihf
|
||||
- libc6-armhf-cross
|
||||
- libc6-dev-armhf-cross
|
||||
# For generating man page.
|
||||
- libxslt1-dev
|
||||
- asciidoc
|
||||
- docbook-xsl
|
||||
- xsltproc
|
||||
- libxml2-utils
|
||||
# Beta channel. We enable these to make sure there are no regressions in
|
||||
# Rust beta releases.
|
||||
env: TARGET=x86_64-apple-darwin
|
||||
# Beta channel.
|
||||
- os: linux
|
||||
rust: beta
|
||||
env: TARGET=x86_64-unknown-linux-musl
|
||||
- os: linux
|
||||
rust: beta
|
||||
env: TARGET=x86_64-unknown-linux-gnu
|
||||
# Minimum Rust supported channel. We enable these to make sure ripgrep
|
||||
# continues to work on the advertised minimum Rust version.
|
||||
# Minimum Rust supported channel.
|
||||
- os: linux
|
||||
rust: 1.34.0
|
||||
rust: 1.17.0
|
||||
env: TARGET=x86_64-unknown-linux-gnu
|
||||
- os: linux
|
||||
rust: 1.34.0
|
||||
rust: 1.17.0
|
||||
env: TARGET=x86_64-unknown-linux-musl
|
||||
- os: linux
|
||||
rust: 1.34.0
|
||||
env: TARGET=arm-unknown-linux-gnueabihf GCC_VERSION=4.8
|
||||
addons:
|
||||
apt:
|
||||
packages:
|
||||
- gcc-4.8-arm-linux-gnueabihf
|
||||
- binutils-arm-linux-gnueabihf
|
||||
- libc6-armhf-cross
|
||||
- libc6-dev-armhf-cross
|
||||
# For generating man page.
|
||||
- libxslt1-dev
|
||||
- asciidoc
|
||||
- docbook-xsl
|
||||
- xsltproc
|
||||
- libxml2-utils
|
||||
install: ci/install.sh
|
||||
script: ci/script.sh
|
||||
before_deploy: ci/before_deploy.sh
|
||||
|
||||
before_install:
|
||||
- export PATH="$PATH:$HOME/.cargo/bin"
|
||||
|
||||
install:
|
||||
- bash ci/install.sh
|
||||
|
||||
script:
|
||||
- bash ci/script.sh
|
||||
|
||||
before_deploy:
|
||||
- bash ci/before_deploy.sh
|
||||
|
||||
deploy:
|
||||
provider: releases
|
||||
file_glob: true
|
||||
file: deployment/${PROJECT_NAME}-${TRAVIS_TAG}-${TARGET}.tar.gz
|
||||
skip_cleanup: true
|
||||
on:
|
||||
condition: $TRAVIS_RUST_VERSION = nightly
|
||||
branch: master # i guess we do need this after all?
|
||||
tags: true
|
||||
api_key:
|
||||
secure: "IbSnsbGkxSydR/sozOf1/SRvHplzwRUHzcTjM7BKnr7GccL86gRPUrsrvD103KjQUGWIc1TnK1YTq5M0Onswg/ORDjqa1JEJPkPdPnVh9ipbF7M2De/7IlB4X4qXLKoApn8+bx2x/mfYXu4G+G1/2QdbaKK2yfXZKyjz0YFx+6CNrVCT2Nk8q7aHvOOzAL58vsG8iPDpupuhxlMDDn/UhyOWVInmPPQ0iJR1ZUJN8xJwXvKvBbfp3AhaBiAzkhXHNLgBR8QC5noWWMXnuVDMY3k4f3ic0V+p/qGUCN/nhptuceLxKFicMCYObSZeUzE5RAI0/OBW7l3z2iCoc+TbAnn+JrX/ObJCfzgAOXAU3tLaBFMiqQPGFKjKg1ltSYXomOFP/F7zALjpvFp4lYTBajRR+O3dqaxA9UQuRjw27vOeUpMcga4ZzL4VXFHzrxZKBHN//XIGjYAVhJ1NSSeGpeJV5/+jYzzWKfwSagRxQyVCzMooYFFXzn8Yxdm3PJlmp3GaAogNkdB9qKcrEvRINCelalzALPi0hD/HUDi8DD2PNTCLLMo6VSYtvc685Zbe+KgNzDV1YyTrRCUW6JotrS0r2ULLwnsh40hSB//nNv3XmwNmC/CmW5QAnIGj8cBMF4S2t6ohADIndojdAfNiptmaZOIT6owK7bWMgPMyopo="
|
||||
file_glob: true
|
||||
file: ${PROJECT_NAME}-${TRAVIS_TAG}-${TARGET}.*
|
||||
# don't delete the artifacts from previous phases
|
||||
skip_cleanup: true
|
||||
# deploy when a new tag is pushed
|
||||
on:
|
||||
# channel to use to produce the release artifacts
|
||||
# NOTE make sure you only release *once* per target
|
||||
# TODO you may want to pick a different channel
|
||||
condition: $TRAVIS_RUST_VERSION = nightly
|
||||
tags: true
|
||||
|
||||
branches:
|
||||
only:
|
||||
# Pushes and PR to the master branch
|
||||
- master
|
||||
# Ruby regex to match tags. Required, or travis won't trigger deploys when
|
||||
# a new tag is pushed.
|
||||
# IMPORTANT Ruby regex to match tags. Required, or travis won't trigger deploys when a new tag
|
||||
# is pushed. This regex matches semantic versions like v1.2.3-rc4+2016.02.22
|
||||
- /^\d+\.\d+\.\d+.*$/
|
||||
|
||||
notifications:
|
||||
email:
|
||||
on_success: never
|
||||
|
||||
511
CHANGELOG.md
511
CHANGELOG.md
@@ -1,514 +1,3 @@
|
||||
11.0.0 (TBD)
|
||||
============
|
||||
ripgrep 11 is a new major version release of ripgrep that contains many bug
|
||||
fixes, some performance improvements and a few feature enhancements. Notably,
|
||||
ripgrep's user experience for binary file filtering has been improved. See the
|
||||
[guide's new section on binary data](GUIDE.md#binary-data) for more details.
|
||||
|
||||
This release also marks a change in ripgrep's versioning. Where as the previous
|
||||
version was `0.10.0`, this version is `11.0.0`. Moving forward, ripgrep's
|
||||
major version will be increased a few times per year. ripgrep will continue to
|
||||
be conservative with respect to backwards compatibility, but may occasionally
|
||||
introduce breaking changes, which will always be documented in this CHANGELOG.
|
||||
See [issue 1172](https://github.com/BurntSushi/ripgrep/issues/1172) for a bit
|
||||
more detail on why this versioning change was made.
|
||||
|
||||
This release increases the **minimum supported Rust version** from 1.28.0 to
|
||||
1.34.0.
|
||||
|
||||
**BREAKING CHANGES**:
|
||||
|
||||
* ripgrep has tweaked its exit status codes to be more like GNU grep's. Namely,
|
||||
if a non-fatal error occurs during a search, then ripgrep will now always
|
||||
emit a `2` exit status code, regardless of whether a match is found or not.
|
||||
Previously, ripgrep would only emit a `2` exit status code for a catastrophic
|
||||
error (e.g., regex syntax error). One exception to this is if ripgrep is run
|
||||
with `-q/--quiet`. In that case, if an error occurs and a match is found,
|
||||
then ripgrep will exit with a `0` exit status code.
|
||||
* Supplying the `-u/--unrestricted` flag three times is now equivalent to
|
||||
supplying `--no-ignore --hidden --binary`. Previously, `-uuu` was equivalent
|
||||
to `--no-ignore --hidden --text`. The difference is that `--binary` disables
|
||||
binary file filtering without potentially dumping binary data into your
|
||||
terminal. That is, `rg -uuu foo` should now be equivalent to `grep -r foo`.
|
||||
* The `avx-accel` feature of ripgrep has been removed since it is no longer
|
||||
necessary. All uses of AVX in ripgrep are now enabled automatically via
|
||||
runtime CPU feature detection. The `simd-accel` feature does remain
|
||||
available, however, it does increase compilation times substantially at the
|
||||
moment.
|
||||
|
||||
Performance improvements:
|
||||
|
||||
* [PERF #497](https://github.com/BurntSushi/ripgrep/issues/497),
|
||||
[PERF #838](https://github.com/BurntSushi/ripgrep/issues/838):
|
||||
Make `rg -F -f dictionary-of-literals` much faster.
|
||||
|
||||
Feature enhancements:
|
||||
|
||||
* Added or improved file type filtering for Apache Thrift, ASP, Bazel, Brotli,
|
||||
BuildStream, bzip2, C, C++, Cython, gzip, Java, Make, Postscript, QML, Tex,
|
||||
XML, xz, zig and zstd.
|
||||
* [FEATURE #855](https://github.com/BurntSushi/ripgrep/issues/855):
|
||||
Add `--binary` flag for disabling binary file filtering.
|
||||
* [FEATURE #1078](https://github.com/BurntSushi/ripgrep/pull/1078):
|
||||
Add `--max-columns-preview` flag for showing a preview of long lines.
|
||||
* [FEATURE #1099](https://github.com/BurntSushi/ripgrep/pull/1099):
|
||||
Add support for Brotli and Zstd to the `-z/--search-zip` flag.
|
||||
* [FEATURE #1138](https://github.com/BurntSushi/ripgrep/pull/1138):
|
||||
Add `--no-ignore-dot` flag for ignoring `.ignore` files.
|
||||
* [FEATURE #1155](https://github.com/BurntSushi/ripgrep/pull/1155):
|
||||
Add `--auto-hybrid-regex` flag for automatically falling back to PCRE2.
|
||||
* [FEATURE #1159](https://github.com/BurntSushi/ripgrep/pull/1159):
|
||||
ripgrep's exit status logic should now match GNU grep. See updated man page.
|
||||
* [FEATURE #1164](https://github.com/BurntSushi/ripgrep/pull/1164):
|
||||
Add `--ignore-file-case-insensitive` for case insensitive ignore globs.
|
||||
* [FEATURE #1185](https://github.com/BurntSushi/ripgrep/pull/1185):
|
||||
Add `-I` flag as a short option for the `--no-filename` flag.
|
||||
* [FEATURE #1207](https://github.com/BurntSushi/ripgrep/pull/1207):
|
||||
Add `none` value to `-E/--encoding` to forcefully disable all transcoding.
|
||||
* [FEATURE da9d7204](https://github.com/BurntSushi/ripgrep/commit/da9d7204):
|
||||
Add `--pcre2-version` for querying showing PCRE2 version information.
|
||||
|
||||
Bug fixes:
|
||||
|
||||
* [BUG #306](https://github.com/BurntSushi/ripgrep/issues/306),
|
||||
[BUG #855](https://github.com/BurntSushi/ripgrep/issues/855):
|
||||
Improve the user experience for ripgrep's binary file filtering.
|
||||
* [BUG #373](https://github.com/BurntSushi/ripgrep/issues/373),
|
||||
[BUG #1098](https://github.com/BurntSushi/ripgrep/issues/1098):
|
||||
`**` is now accepted as valid syntax anywhere in a glob.
|
||||
* [BUG #916](https://github.com/BurntSushi/ripgrep/issues/916):
|
||||
ripgrep no longer hangs when searching `/proc` with a zombie process present.
|
||||
* [BUG #1052](https://github.com/BurntSushi/ripgrep/issues/1052):
|
||||
Fix bug where ripgrep could panic when transcoding UTF-16 files.
|
||||
* [BUG #1055](https://github.com/BurntSushi/ripgrep/issues/1055):
|
||||
Suggest `-U/--multiline` when a pattern contains a `\n`.
|
||||
* [BUG #1063](https://github.com/BurntSushi/ripgrep/issues/1063):
|
||||
Always strip a BOM if it's present, even for UTF-8.
|
||||
* [BUG #1064](https://github.com/BurntSushi/ripgrep/issues/1064):
|
||||
Fix inner literal detection that could lead to incorrect matches.
|
||||
* [BUG #1079](https://github.com/BurntSushi/ripgrep/issues/1079):
|
||||
Fixes a bug where the order of globs could result in missing a match.
|
||||
* [BUG #1089](https://github.com/BurntSushi/ripgrep/issues/1089):
|
||||
Fix another bug where ripgrep could panic when transcoding UTF-16 files.
|
||||
* [BUG #1091](https://github.com/BurntSushi/ripgrep/issues/1091):
|
||||
Add note about inverted flags to the man page.
|
||||
* [BUG #1093](https://github.com/BurntSushi/ripgrep/pull/1093):
|
||||
Fix handling of literal slashes in gitignore patterns.
|
||||
* [BUG #1095](https://github.com/BurntSushi/ripgrep/issues/1095):
|
||||
Fix corner cases involving the `--crlf` flag.
|
||||
* [BUG #1101](https://github.com/BurntSushi/ripgrep/issues/1101):
|
||||
Fix AsciiDoc escaping for man page output.
|
||||
* [BUG #1103](https://github.com/BurntSushi/ripgrep/issues/1103):
|
||||
Clarify what `--encoding auto` does.
|
||||
* [BUG #1106](https://github.com/BurntSushi/ripgrep/issues/1106):
|
||||
`--files-with-matches` and `--files-without-match` work with one file.
|
||||
* [BUG #1121](https://github.com/BurntSushi/ripgrep/issues/1121):
|
||||
Fix bug that was triggering Windows antimalware when using the `--files`
|
||||
flag.
|
||||
* [BUG #1125](https://github.com/BurntSushi/ripgrep/issues/1125),
|
||||
[BUG #1159](https://github.com/BurntSushi/ripgrep/issues/1159):
|
||||
ripgrep shouldn't panic for `rg -h | rg` and should emit correct exit status.
|
||||
* [BUG #1144](https://github.com/BurntSushi/ripgrep/issues/1144):
|
||||
Fixes a bug where line numbers could be wrong on big-endian machines.
|
||||
* [BUG #1154](https://github.com/BurntSushi/ripgrep/issues/1154):
|
||||
Windows files with "hidden" attribute are now treated as hidden.
|
||||
* [BUG #1173](https://github.com/BurntSushi/ripgrep/issues/1173):
|
||||
Fix handling of `**` patterns in gitignore files.
|
||||
* [BUG #1174](https://github.com/BurntSushi/ripgrep/issues/1174):
|
||||
Fix handling of repeated `**` patterns in gitignore files.
|
||||
* [BUG #1176](https://github.com/BurntSushi/ripgrep/issues/1176):
|
||||
Fix bug where `-F`/`-x` weren't applied to patterns given via `-f`.
|
||||
* [BUG #1189](https://github.com/BurntSushi/ripgrep/issues/1189):
|
||||
Document cases where ripgrep may use a lot of memory.
|
||||
* [BUG #1203](https://github.com/BurntSushi/ripgrep/issues/1203):
|
||||
Fix a matching bug related to the suffix literal optimization.
|
||||
* [BUG 8f14cb18](https://github.com/BurntSushi/ripgrep/commit/8f14cb18):
|
||||
Increase the default stack size for PCRE2's JIT.
|
||||
|
||||
|
||||
0.10.0 (2018-09-07)
|
||||
===================
|
||||
This is a new minor version release of ripgrep that contains some major new
|
||||
features, a huge number of bug fixes, and is the first release based on
|
||||
libripgrep. The entirety of ripgrep's core search and printing code has been
|
||||
rewritten and generalized so that anyone can make use of it.
|
||||
|
||||
Major new features include PCRE2 support, multi-line search and a JSON output
|
||||
format.
|
||||
|
||||
**BREAKING CHANGES**:
|
||||
|
||||
* The minimum version required to compile Rust has now changed to track the
|
||||
latest stable version of Rust. Patch releases will continue to compile with
|
||||
the same version of Rust as the previous patch release, but new minor
|
||||
versions will use the current stable version of the Rust compile as its
|
||||
minimum supported version.
|
||||
* The match semantics of `-w/--word-regexp` have changed slightly. They used
|
||||
to be `\b(?:<your pattern>)\b`, but now it's
|
||||
`(?:^|\W)(?:<your pattern>)(?:$|\W)`. This matches the behavior of GNU grep
|
||||
and is believed to be closer to the intended semantics of the flag. See
|
||||
[#389](https://github.com/BurntSushi/ripgrep/issues/389) for more details.
|
||||
|
||||
Feature enhancements:
|
||||
|
||||
* [FEATURE #162](https://github.com/BurntSushi/ripgrep/issues/162):
|
||||
libripgrep is now a thing. The primary crate is
|
||||
[`grep`](https://docs.rs/grep).
|
||||
* [FEATURE #176](https://github.com/BurntSushi/ripgrep/issues/176):
|
||||
Add `-U/--multiline` flag that permits matching over multiple lines.
|
||||
* [FEATURE #188](https://github.com/BurntSushi/ripgrep/issues/188):
|
||||
Add `-P/--pcre2` flag that gives support for look-around and backreferences.
|
||||
* [FEATURE #244](https://github.com/BurntSushi/ripgrep/issues/244):
|
||||
Add `--json` flag that prints results in a JSON Lines format.
|
||||
* [FEATURE #321](https://github.com/BurntSushi/ripgrep/issues/321):
|
||||
Add `--one-file-system` flag to skip directories on different file systems.
|
||||
* [FEATURE #404](https://github.com/BurntSushi/ripgrep/issues/404):
|
||||
Add `--sort` and `--sortr` flag for more sorting. Deprecate `--sort-files`.
|
||||
* [FEATURE #416](https://github.com/BurntSushi/ripgrep/issues/416):
|
||||
Add `--crlf` flag to permit `$` to work with carriage returns on Windows.
|
||||
* [FEATURE #917](https://github.com/BurntSushi/ripgrep/issues/917):
|
||||
The `--trim` flag strips prefix whitespace from all lines printed.
|
||||
* [FEATURE #993](https://github.com/BurntSushi/ripgrep/issues/993):
|
||||
Add `--null-data` flag, which makes ripgrep use NUL as a line terminator.
|
||||
* [FEATURE #997](https://github.com/BurntSushi/ripgrep/issues/997):
|
||||
The `--passthru` flag now works with the `--replace` flag.
|
||||
* [FEATURE #1038-1](https://github.com/BurntSushi/ripgrep/issues/1038):
|
||||
Add `--line-buffered` and `--block-buffered` for forcing a buffer strategy.
|
||||
* [FEATURE #1038-2](https://github.com/BurntSushi/ripgrep/issues/1038):
|
||||
Add `--pre-glob` for filtering files through the `--pre` flag.
|
||||
|
||||
Bug fixes:
|
||||
|
||||
* [BUG #2](https://github.com/BurntSushi/ripgrep/issues/2):
|
||||
Searching with non-zero context can now use memory maps if appropriate.
|
||||
* [BUG #200](https://github.com/BurntSushi/ripgrep/issues/200):
|
||||
ripgrep will now stop correctly when its output pipe is closed.
|
||||
* [BUG #389](https://github.com/BurntSushi/ripgrep/issues/389):
|
||||
The `-w/--word-regexp` flag now works more intuitively.
|
||||
* [BUG #643](https://github.com/BurntSushi/ripgrep/issues/643):
|
||||
Detection of readable stdin has improved on Windows.
|
||||
* [BUG #441](https://github.com/BurntSushi/ripgrep/issues/441),
|
||||
[BUG #690](https://github.com/BurntSushi/ripgrep/issues/690),
|
||||
[BUG #980](https://github.com/BurntSushi/ripgrep/issues/980):
|
||||
Matching empty lines now works correctly in several corner cases.
|
||||
* [BUG #764](https://github.com/BurntSushi/ripgrep/issues/764):
|
||||
Color escape sequences now coalesce, which reduces output size.
|
||||
* [BUG #842](https://github.com/BurntSushi/ripgrep/issues/842):
|
||||
Add man page to binary Debian package.
|
||||
* [BUG #922](https://github.com/BurntSushi/ripgrep/issues/922):
|
||||
ripgrep is now more robust with respect to memory maps failing.
|
||||
* [BUG #937](https://github.com/BurntSushi/ripgrep/issues/937):
|
||||
Color escape sequences are no longer emitted for empty matches.
|
||||
* [BUG #940](https://github.com/BurntSushi/ripgrep/issues/940):
|
||||
Context from the `--passthru` flag should not impact process exit status.
|
||||
* [BUG #984](https://github.com/BurntSushi/ripgrep/issues/984):
|
||||
Fixes bug in `ignore` crate where first path was always treated as a symlink.
|
||||
* [BUG #990](https://github.com/BurntSushi/ripgrep/issues/990):
|
||||
Read stderr asynchronously when running a process.
|
||||
* [BUG #1013](https://github.com/BurntSushi/ripgrep/issues/1013):
|
||||
Add compile time and runtime CPU features to `--version` output.
|
||||
* [BUG #1028](https://github.com/BurntSushi/ripgrep/pull/1028):
|
||||
Don't complete bare pattern after `-f` in zsh.
|
||||
|
||||
|
||||
0.9.0 (2018-08-03)
|
||||
==================
|
||||
This is a new minor version release of ripgrep that contains some minor new
|
||||
features and a panoply of bug fixes.
|
||||
|
||||
Releases provided on Github for `x86_64` will now work on all target CPUs, and
|
||||
will also automatically take advantage of features found on modern CPUs (such
|
||||
as AVX2) for additional optimizations.
|
||||
|
||||
This release increases the **minimum supported Rust version** from 1.20.0 to
|
||||
1.23.0.
|
||||
|
||||
It is anticipated that the next release of ripgrep (0.10.0) will provide
|
||||
multi-line search support and a JSON output format.
|
||||
|
||||
**BREAKING CHANGES**:
|
||||
|
||||
* When `--count` and `--only-matching` are provided simultaneously, the
|
||||
behavior of ripgrep is as if the `--count-matches` flag was given. That is,
|
||||
the total number of matches is reported, where there may be multiple matches
|
||||
per line. Previously, the behavior of ripgrep was to report the total number
|
||||
of matching lines. (Note that this behavior diverges from the behavior of
|
||||
GNU grep.)
|
||||
* Octal syntax is no longer supported. ripgrep previously accepted expressions
|
||||
like `\1` as syntax for matching `U+0001`, but ripgrep will now report an
|
||||
error instead.
|
||||
* The `--line-number-width` flag has been removed. Its functionality was not
|
||||
carefully considered with all ripgrep output formats.
|
||||
See [#795](https://github.com/BurntSushi/ripgrep/issues/795) for more
|
||||
details.
|
||||
|
||||
Feature enhancements:
|
||||
|
||||
* Added or improved file type filtering for Android, Bazel, Fuchsia, Haskell,
|
||||
Java and Puppet.
|
||||
* [FEATURE #411](https://github.com/BurntSushi/ripgrep/issues/411):
|
||||
Add a `--stats` flag, which emits aggregate statistics after search results.
|
||||
* [FEATURE #646](https://github.com/BurntSushi/ripgrep/issues/646):
|
||||
Add a `--no-ignore-messages` flag, which suppresses parse errors from reading
|
||||
`.ignore` and `.gitignore` files.
|
||||
* [FEATURE #702](https://github.com/BurntSushi/ripgrep/issues/702):
|
||||
Support `\u{..}` Unicode escape sequences.
|
||||
* [FEATURE #812](https://github.com/BurntSushi/ripgrep/issues/812):
|
||||
Add `-b/--byte-offset` flag that shows the byte offset of each matching line.
|
||||
* [FEATURE #814](https://github.com/BurntSushi/ripgrep/issues/814):
|
||||
Add `--count-matches` flag, which is like `--count`, but for each match.
|
||||
* [FEATURE #880](https://github.com/BurntSushi/ripgrep/issues/880):
|
||||
Add a `--no-column` flag, which disables column numbers in the output.
|
||||
* [FEATURE #898](https://github.com/BurntSushi/ripgrep/issues/898):
|
||||
Add support for `lz4` when using the `-z/--search-zip` flag.
|
||||
* [FEATURE #924](https://github.com/BurntSushi/ripgrep/issues/924):
|
||||
`termcolor` has moved to its own repository:
|
||||
https://github.com/BurntSushi/termcolor
|
||||
* [FEATURE #934](https://github.com/BurntSushi/ripgrep/issues/934):
|
||||
Add a new flag, `--no-ignore-global`, that permits disabling global
|
||||
gitignores.
|
||||
* [FEATURE #967](https://github.com/BurntSushi/ripgrep/issues/967):
|
||||
Rename `--maxdepth` to `--max-depth` for consistency. Keep `--maxdepth` for
|
||||
backwards compatibility.
|
||||
* [FEATURE #978](https://github.com/BurntSushi/ripgrep/issues/978):
|
||||
Add a `--pre` option to filter inputs with an arbitrary program.
|
||||
* [FEATURE fca9709d](https://github.com/BurntSushi/ripgrep/commit/fca9709d):
|
||||
Improve zsh completion.
|
||||
|
||||
Bug fixes:
|
||||
|
||||
* [BUG #135](https://github.com/BurntSushi/ripgrep/issues/135):
|
||||
Release portable binaries that conditionally use SSSE3, AVX2, etc., at
|
||||
runtime.
|
||||
* [BUG #268](https://github.com/BurntSushi/ripgrep/issues/268):
|
||||
Print descriptive error message when trying to use look-around or
|
||||
backreferences.
|
||||
* [BUG #395](https://github.com/BurntSushi/ripgrep/issues/395):
|
||||
Show comprehensible error messages for regexes like `\s*{`.
|
||||
* [BUG #526](https://github.com/BurntSushi/ripgrep/issues/526):
|
||||
Support backslash escapes in globs.
|
||||
* [BUG #795](https://github.com/BurntSushi/ripgrep/issues/795):
|
||||
Fix problems with `--line-number-width` by removing it.
|
||||
* [BUG #832](https://github.com/BurntSushi/ripgrep/issues/832):
|
||||
Clarify usage instructions for `-f/--file` flag.
|
||||
* [BUG #835](https://github.com/BurntSushi/ripgrep/issues/835):
|
||||
Fix small performance regression while crawling very large directory trees.
|
||||
* [BUG #851](https://github.com/BurntSushi/ripgrep/issues/851):
|
||||
Fix `-S/--smart-case` detection once and for all.
|
||||
* [BUG #852](https://github.com/BurntSushi/ripgrep/issues/852):
|
||||
Be robust with respect to `ENOMEM` errors returned by `mmap`.
|
||||
* [BUG #853](https://github.com/BurntSushi/ripgrep/issues/853):
|
||||
Upgrade `grep` crate to `regex-syntax 0.6.0`.
|
||||
* [BUG #893](https://github.com/BurntSushi/ripgrep/issues/893):
|
||||
Improve support for git submodules.
|
||||
* [BUG #900](https://github.com/BurntSushi/ripgrep/issues/900):
|
||||
When no patterns are given, ripgrep should never match anything.
|
||||
* [BUG #907](https://github.com/BurntSushi/ripgrep/issues/907):
|
||||
ripgrep will now stop traversing after the first file when `--quiet --files`
|
||||
is used.
|
||||
* [BUG #918](https://github.com/BurntSushi/ripgrep/issues/918):
|
||||
Don't skip tar archives when `-z/--search-zip` is used.
|
||||
* [BUG #934](https://github.com/BurntSushi/ripgrep/issues/934):
|
||||
Don't respect gitignore files when searching outside git repositories.
|
||||
* [BUG #948](https://github.com/BurntSushi/ripgrep/issues/948):
|
||||
Use exit code 2 to indicate error, and use exit code 1 to indicate no
|
||||
matches.
|
||||
* [BUG #951](https://github.com/BurntSushi/ripgrep/issues/951):
|
||||
Add stdin example to ripgrep usage documentation.
|
||||
* [BUG #955](https://github.com/BurntSushi/ripgrep/issues/955):
|
||||
Use buffered writing when not printing to a tty, which fixes a performance
|
||||
regression.
|
||||
* [BUG #957](https://github.com/BurntSushi/ripgrep/issues/957):
|
||||
Improve the error message shown for `--path separator /` in some Windows
|
||||
shells.
|
||||
* [BUG #964](https://github.com/BurntSushi/ripgrep/issues/964):
|
||||
Add a `--no-fixed-strings` flag to disable `-F/--fixed-strings`.
|
||||
* [BUG #988](https://github.com/BurntSushi/ripgrep/issues/988):
|
||||
Fix a bug in the `ignore` crate that prevented the use of explicit ignore
|
||||
files after disabling all other ignore rules.
|
||||
* [BUG #995](https://github.com/BurntSushi/ripgrep/issues/995):
|
||||
Respect `$XDG_CONFIG_DIR/git/config` for detecting `core.excludesFile`.
|
||||
|
||||
|
||||
0.8.1 (2018-02-20)
|
||||
==================
|
||||
This is a patch release of ripgrep that primarily fixes regressions introduced
|
||||
in 0.8.0 (#820 and #824) in directory traversal on Windows. These regressions
|
||||
do not impact non-Windows users.
|
||||
|
||||
Feature enhancements:
|
||||
|
||||
* Added or improved file type filtering for csv and VHDL.
|
||||
* [FEATURE #798](https://github.com/BurntSushi/ripgrep/issues/798):
|
||||
Add `underline` support to `termcolor` and ripgrep. See documentation on the
|
||||
`--colors` flag for details.
|
||||
|
||||
Bug fixes:
|
||||
|
||||
* [BUG #684](https://github.com/BurntSushi/ripgrep/issues/684):
|
||||
Improve documentation for the `--ignore-file` flag.
|
||||
* [BUG #789](https://github.com/BurntSushi/ripgrep/issues/789):
|
||||
Don't show `(rev )` if the revision wasn't available during the build.
|
||||
* [BUG #791](https://github.com/BurntSushi/ripgrep/issues/791):
|
||||
Add man page to ARM release.
|
||||
* [BUG #797](https://github.com/BurntSushi/ripgrep/issues/797):
|
||||
Improve documentation for "intense" setting in `termcolor`.
|
||||
* [BUG #800](https://github.com/BurntSushi/ripgrep/issues/800):
|
||||
Fix a bug in the `ignore` crate for custom ignore files. This had no impact
|
||||
on ripgrep.
|
||||
* [BUG #807](https://github.com/BurntSushi/ripgrep/issues/807):
|
||||
Fix a bug where `rg --hidden .` behaved differently from `rg --hidden ./`.
|
||||
* [BUG #815](https://github.com/BurntSushi/ripgrep/issues/815):
|
||||
Clarify a common failure mode in user guide.
|
||||
* [BUG #820](https://github.com/BurntSushi/ripgrep/issues/820):
|
||||
Fixes a bug on Windows where symlinks were followed even if not requested.
|
||||
* [BUG #824](https://github.com/BurntSushi/ripgrep/issues/824):
|
||||
Fix a performance regression in directory traversal on Windows.
|
||||
|
||||
|
||||
0.8.0 (2018-02-11)
|
||||
==================
|
||||
This is a new minor version releae of ripgrep that satisfies several popular
|
||||
feature requests (config files, search compressed files, true colors), fixes
|
||||
many bugs and improves the quality of life for ripgrep maintainers. This
|
||||
release also includes greatly improved documentation in the form of a
|
||||
[User Guide](GUIDE.md) and a [FAQ](FAQ.md).
|
||||
|
||||
This release increases the **minimum supported Rust version** from 1.17 to
|
||||
1.20.
|
||||
|
||||
**BREAKING CHANGES**:
|
||||
|
||||
Note that these are all very minor and unlikely to impact most users.
|
||||
|
||||
* In order to support configuration files, flag overrides needed to be
|
||||
rethought. In some cases, this changed ripgrep's behavior. For example,
|
||||
in ripgrep 0.7.1, `rg foo -s -i` will perform a case sensitive search
|
||||
since the `-s/--case-sensitive` flag was defined to always take precedence
|
||||
over the `-i/--ignore-case` flag, regardless of position. In ripgrep 0.8.0
|
||||
however, the override rule for all flags has changed to "the most recent
|
||||
flag wins among competing flags." That is, `rg foo -s -i` now performs a
|
||||
case insensitive search.
|
||||
* The `-M/--max-columns` flag was tweaked so that specifying a value of `0`
|
||||
now makes ripgrep behave as if the flag was absent. This makes it possible
|
||||
to set a default value in a configuration file and then override it. The
|
||||
previous ripgrep behavior was to suppress all matching non-empty lines.
|
||||
* In all globs, `[^...]` is now equivalent to `[!...]` (indicating class
|
||||
negation). Previously, `^` had no special significance in a character class.
|
||||
* For **downstream packagers**, the directory hierarchy in ripgrep's archive
|
||||
releases has changed. The root directory now only contains the executable,
|
||||
README and license. There is now a new directory called `doc` which contains
|
||||
the man page (previously in the root), a user guide (new), a FAQ (new) and
|
||||
the CHANGELOG (previously not included in release). The `complete`
|
||||
directory remains the same.
|
||||
|
||||
Feature enhancements:
|
||||
|
||||
* Added or improved file type filtering for
|
||||
Apache Avro, C++, GN, Google Closure Templates, Jupyter notebooks, man pages,
|
||||
Protocol Buffers, Smarty and Web IDL.
|
||||
* [FEATURE #196](https://github.com/BurntSushi/ripgrep/issues/196):
|
||||
Support a configuration file. See
|
||||
[the new user guide](GUIDE.md#configuration-file)
|
||||
for details.
|
||||
* [FEATURE #261](https://github.com/BurntSushi/ripgrep/issues/261):
|
||||
Add extended or "true" color support. Works in Windows 10!
|
||||
[See the FAQ for details.](FAQ.md#colors)
|
||||
* [FEATURE #539](https://github.com/BurntSushi/ripgrep/issues/539):
|
||||
Search gzip, bzip2, lzma or xz files when given `-z/--search-zip` flag.
|
||||
* [FEATURE #544](https://github.com/BurntSushi/ripgrep/issues/544):
|
||||
Add support for line number alignment via a new `--line-number-width` flag.
|
||||
* [FEATURE #654](https://github.com/BurntSushi/ripgrep/pull/654):
|
||||
Support linuxbrew in ripgrep's Brew tap.
|
||||
* [FEATURE #673](https://github.com/BurntSushi/ripgrep/issues/673):
|
||||
Bring back `.rgignore` files. (A higher precedent, application specific
|
||||
version of `.ignore`.)
|
||||
* [FEATURE #676](https://github.com/BurntSushi/ripgrep/issues/676):
|
||||
Provide ARM binaries. **WARNING:** This will be provided on a best effort
|
||||
basis.
|
||||
* [FEATURE #709](https://github.com/BurntSushi/ripgrep/issues/709):
|
||||
Suggest `-F/--fixed-strings` flag on a regex syntax error.
|
||||
* [FEATURE #740](https://github.com/BurntSushi/ripgrep/issues/740):
|
||||
Add a `--passthru` flag that causes ripgrep to print every line it reads.
|
||||
* [FEATURE #785](https://github.com/BurntSushi/ripgrep/pull/785):
|
||||
Overhaul documentation. Cleaned up README, added user guide and FAQ.
|
||||
* [FEATURE 7f5c07](https://github.com/BurntSushi/ripgrep/commit/7f5c07434be92103b5bf7e216b9c7494aed2d8cb):
|
||||
Add hidden flags for convenient overrides (e.g., `--no-text`).
|
||||
|
||||
Bug fixes:
|
||||
|
||||
* [BUG #553](https://github.com/BurntSushi/ripgrep/issues/553):
|
||||
Permit flags to be repeated.
|
||||
* [BUG #633](https://github.com/BurntSushi/ripgrep/issues/633):
|
||||
Fix a bug where ripgrep would panic on Windows while following symlinks.
|
||||
* [BUG #649](https://github.com/BurntSushi/ripgrep/issues/649):
|
||||
Fix handling of `!**/` in `.gitignore`.
|
||||
* [BUG #663](https://github.com/BurntSushi/ripgrep/issues/663):
|
||||
**BREAKING CHANGE:** Support `[^...]` glob syntax (as identical to `[!...]`).
|
||||
* [BUG #693](https://github.com/BurntSushi/ripgrep/issues/693):
|
||||
Don't display context separators when not printing matches.
|
||||
* [BUG #705](https://github.com/BurntSushi/ripgrep/issues/705):
|
||||
Fix a bug that prevented ripgrep from searching OneDrive directories.
|
||||
* [BUG #717](https://github.com/BurntSushi/ripgrep/issues/717):
|
||||
Improve `--smart-case` uppercase character detection.
|
||||
* [BUG #725](https://github.com/BurntSushi/ripgrep/issues/725):
|
||||
Clarify that globs do not override explicitly given paths to search.
|
||||
* [BUG #742](https://github.com/BurntSushi/ripgrep/pull/742):
|
||||
Write ANSI reset code as `\x1B[0m` instead of `\x1B[m`.
|
||||
* [BUG #747](https://github.com/BurntSushi/ripgrep/issues/747):
|
||||
Remove `yarn.lock` from YAML file type.
|
||||
* [BUG #760](https://github.com/BurntSushi/ripgrep/issues/760):
|
||||
ripgrep can now search `/sys/devices/system/cpu/vulnerabilities/*` files.
|
||||
* [BUG #761](https://github.com/BurntSushi/ripgrep/issues/761):
|
||||
Fix handling of gitignore patterns that contain a `/`.
|
||||
* [BUG #776](https://github.com/BurntSushi/ripgrep/pull/776):
|
||||
**BREAKING CHANGE:** `--max-columns=0` now disables the limit.
|
||||
* [BUG #779](https://github.com/BurntSushi/ripgrep/issues/779):
|
||||
Clarify documentation for `--files-without-match`.
|
||||
* [BUG #780](https://github.com/BurntSushi/ripgrep/issues/780),
|
||||
[BUG #781](https://github.com/BurntSushi/ripgrep/issues/781):
|
||||
Fix bug where ripgrep missed some matching lines.
|
||||
|
||||
Maintenance fixes:
|
||||
|
||||
* [MAINT #772](https://github.com/BurntSushi/ripgrep/pull/772):
|
||||
Drop `env_logger` in favor of simpler logger to avoid many new dependencies.
|
||||
* [MAINT #772](https://github.com/BurntSushi/ripgrep/pull/772):
|
||||
Add git revision hash to ripgrep's version string.
|
||||
* [MAINT #772](https://github.com/BurntSushi/ripgrep/pull/772):
|
||||
(Seemingly) improve compile times.
|
||||
* [MAINT #776](https://github.com/BurntSushi/ripgrep/pull/776):
|
||||
Automatically generate man page during build.
|
||||
* [MAINT #786](https://github.com/BurntSushi/ripgrep/pull/786):
|
||||
Remove use of `unsafe` in `globset`. :tada:
|
||||
* [MAINT e9d448](https://github.com/BurntSushi/ripgrep/commit/e9d448e93bb4e1fb3b0c1afc29adb5af6ed5283d):
|
||||
Add an issue template (has already drastically improved bug reports).
|
||||
* [MAINT ae2d03](https://github.com/BurntSushi/ripgrep/commit/ae2d036dd4ba2a46acac9c2d77c32e7c667eb850):
|
||||
Remove the `compile` script.
|
||||
|
||||
Friends of ripgrep:
|
||||
|
||||
I'd like to extend my gratitude to
|
||||
[@balajisivaraman](https://github.com/balajisivaraman)
|
||||
for their recent hard work in a number of areas, and in particular, for
|
||||
implementing the "search compressed files" feature. Their work in sketching out
|
||||
a specification for that and other work has been exemplary.
|
||||
|
||||
Thanks
|
||||
[@balajisivaraman](https://github.com/balajisivaraman)!
|
||||
|
||||
|
||||
0.7.1 (2017-10-22)
|
||||
==================
|
||||
This is a patch release of ripgrep that includes a fix to very bad regression
|
||||
introduced in ripgrep 0.7.0.
|
||||
|
||||
Bug fixes:
|
||||
|
||||
* [BUG #648](https://github.com/BurntSushi/ripgrep/issues/648):
|
||||
Fix a bug where it was very easy to exceed standard file descriptor limits.
|
||||
|
||||
|
||||
0.7.0 (2017-10-20)
|
||||
==================
|
||||
This is a new minor version release of ripgrep that includes mostly bug fixes.
|
||||
|
||||
770
Cargo.lock
generated
770
Cargo.lock
generated
@@ -1,459 +1,217 @@
|
||||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
[root]
|
||||
name = "ripgrep"
|
||||
version = "0.7.0"
|
||||
dependencies = [
|
||||
"atty 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"bytecount 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"clap 2.26.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"encoding_rs 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"env_logger 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"grep 0.1.7",
|
||||
"ignore 0.3.1",
|
||||
"lazy_static 0.2.9 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libc 0.2.32 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"log 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"memchr 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"memmap 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"num_cpus 1.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"same-file 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"termcolor 0.3.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aho-corasick"
|
||||
version = "0.7.3"
|
||||
version = "0.6.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"memchr 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"memchr 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ansi_term"
|
||||
version = "0.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "atty"
|
||||
version = "0.2.11"
|
||||
version = "0.2.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"libc 0.2.51 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libc 0.2.32 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"termion 1.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"winapi 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "autocfg"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "base64"
|
||||
version = "0.10.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"byteorder 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bitflags"
|
||||
version = "1.0.4"
|
||||
version = "0.9.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "bstr"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"memchr 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex-automata 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bytecount"
|
||||
version = "0.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "byteorder"
|
||||
version = "1.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "cc"
|
||||
version = "1.0.35"
|
||||
version = "0.1.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"simd 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cfg-if"
|
||||
version = "0.1.7"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "clap"
|
||||
version = "2.33.0"
|
||||
version = "2.26.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"strsim 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"textwrap 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"ansi_term 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"atty 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"bitflags 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"strsim 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"term_size 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"textwrap 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"unicode-width 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"vec_map 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cloudabi"
|
||||
version = "0.0.3"
|
||||
name = "crossbeam"
|
||||
version = "0.2.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-channel"
|
||||
version = "0.3.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"crossbeam-utils 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"smallvec 0.6.9 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-utils"
|
||||
version = "0.6.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"cfg-if 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "encoding_rs"
|
||||
version = "0.8.17"
|
||||
version = "0.7.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"cfg-if 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"packed_simd 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"simd 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "encoding_rs_io"
|
||||
version = "0.1.6"
|
||||
name = "env_logger"
|
||||
version = "0.4.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"encoding_rs 0.8.17 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"log 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fnv"
|
||||
version = "1.0.6"
|
||||
version = "1.0.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "fuchsia-cprng"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "glob"
|
||||
version = "0.3.0"
|
||||
name = "fs2"
|
||||
version = "0.4.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libc 0.2.32 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "globset"
|
||||
version = "0.4.3"
|
||||
version = "0.2.1"
|
||||
dependencies = [
|
||||
"aho-corasick 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"bstr 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"glob 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex 1.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"aho-corasick 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"fnv 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"log 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"memchr 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "grep"
|
||||
version = "0.2.3"
|
||||
version = "0.1.7"
|
||||
dependencies = [
|
||||
"grep-cli 0.1.1",
|
||||
"grep-matcher 0.1.2",
|
||||
"grep-pcre2 0.1.3",
|
||||
"grep-printer 0.1.1",
|
||||
"grep-regex 0.1.3",
|
||||
"grep-searcher 0.1.4",
|
||||
"termcolor 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"walkdir 2.2.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "grep-cli"
|
||||
version = "0.1.1"
|
||||
dependencies = [
|
||||
"atty 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"bstr 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"globset 0.4.3",
|
||||
"lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex 1.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"same-file 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"termcolor 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"winapi-util 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "grep-matcher"
|
||||
version = "0.1.2"
|
||||
dependencies = [
|
||||
"memchr 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex 1.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "grep-pcre2"
|
||||
version = "0.1.3"
|
||||
dependencies = [
|
||||
"grep-matcher 0.1.2",
|
||||
"pcre2 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "grep-printer"
|
||||
version = "0.1.1"
|
||||
dependencies = [
|
||||
"base64 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"bstr 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"grep-matcher 0.1.2",
|
||||
"grep-regex 0.1.3",
|
||||
"grep-searcher 0.1.4",
|
||||
"serde 1.0.90 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde_derive 1.0.90 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde_json 1.0.39 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"termcolor 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "grep-regex"
|
||||
version = "0.1.3"
|
||||
dependencies = [
|
||||
"aho-corasick 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"grep-matcher 0.1.2",
|
||||
"log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex 1.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex-syntax 0.6.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"utf8-ranges 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "grep-searcher"
|
||||
version = "0.1.4"
|
||||
dependencies = [
|
||||
"bstr 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"bytecount 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"encoding_rs 0.8.17 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"encoding_rs_io 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"grep-matcher 0.1.2",
|
||||
"grep-regex 0.1.3",
|
||||
"log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"memmap 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex 1.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"log 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"memchr 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex-syntax 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ignore"
|
||||
version = "0.4.7"
|
||||
version = "0.3.1"
|
||||
dependencies = [
|
||||
"crossbeam-channel 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"globset 0.4.3",
|
||||
"lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"memchr 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex 1.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"same-file 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"tempfile 3.0.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"walkdir 2.2.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"winapi-util 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"crossbeam 0.2.10 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"globset 0.2.1",
|
||||
"lazy_static 0.2.9 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"log 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"memchr 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"same-file 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"thread_local 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"walkdir 2.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "itoa"
|
||||
version = "0.4.3"
|
||||
name = "kernel32-sys"
|
||||
version = "0.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lazy_static"
|
||||
version = "1.3.0"
|
||||
version = "0.2.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.51"
|
||||
version = "0.2.32"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "log"
|
||||
version = "0.4.6"
|
||||
version = "0.3.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "memchr"
|
||||
version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"cfg-if 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libc 0.2.32 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "memchr"
|
||||
version = "2.2.0"
|
||||
version = "2.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"libc 0.2.32 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "memmap"
|
||||
version = "0.7.0"
|
||||
version = "0.5.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"libc 0.2.51 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"winapi 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"fs2 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libc 0.2.32 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num_cpus"
|
||||
version = "1.10.0"
|
||||
version = "1.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"libc 0.2.51 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "packed_simd"
|
||||
version = "0.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"cfg-if 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pcre2"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"libc 0.2.51 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"pcre2-sys 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pcre2-sys"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"cc 1.0.35 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libc 0.2.51 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"pkg-config 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pkg-config"
|
||||
version = "0.3.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "0.4.27"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "0.6.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"proc-macro2 0.4.27 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand"
|
||||
version = "0.6.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"autocfg 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libc 0.2.51 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rand_chacha 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rand_core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rand_hc 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rand_isaac 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rand_jitter 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rand_os 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rand_pcg 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rand_xorshift 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"winapi 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_chacha"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"autocfg 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rand_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_core"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"rand_core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_core"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "rand_hc"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"rand_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_isaac"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"rand_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_jitter"
|
||||
version = "0.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"libc 0.2.51 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rand_core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"winapi 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_os"
|
||||
version = "0.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"fuchsia-cprng 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libc 0.2.51 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rand_core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rdrand 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"winapi 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_pcg"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"autocfg 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rand_core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_xorshift"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"rand_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rdrand"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"rand_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libc 0.2.32 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "redox_syscall"
|
||||
version = "0.1.54"
|
||||
version = "0.1.31"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
@@ -461,140 +219,66 @@ name = "redox_termios"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"redox_syscall 0.1.54 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"redox_syscall 0.1.31 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex"
|
||||
version = "1.1.5"
|
||||
version = "0.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"aho-corasick 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"memchr 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex-syntax 0.6.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"utf8-ranges 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-automata"
|
||||
version = "0.1.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"byteorder 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"aho-corasick 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"memchr 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex-syntax 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"simd 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"thread_local 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"utf8-ranges 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-syntax"
|
||||
version = "0.6.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"ucd-util 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "remove_dir_all"
|
||||
version = "0.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"winapi 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ripgrep"
|
||||
version = "0.10.0"
|
||||
dependencies = [
|
||||
"bstr 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"grep 0.2.3",
|
||||
"ignore 0.4.7",
|
||||
"lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"num_cpus 1.10.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex 1.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde 1.0.90 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde_derive 1.0.90 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde_json 1.0.39 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"termcolor 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ryu"
|
||||
version = "0.2.7"
|
||||
version = "0.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "same-file"
|
||||
version = "1.0.4"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"winapi-util 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.90"
|
||||
name = "simd"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "serde_derive"
|
||||
version = "1.0.90"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"proc-macro2 0.4.27 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"quote 0.6.12 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"syn 0.15.31 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_json"
|
||||
version = "1.0.39"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"itoa 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"ryu 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde 1.0.90 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "smallvec"
|
||||
version = "0.6.9"
|
||||
name = "simd"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "strsim"
|
||||
version = "0.8.0"
|
||||
version = "0.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "0.15.31"
|
||||
name = "term_size"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"proc-macro2 0.4.27 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"quote 0.6.12 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tempfile"
|
||||
version = "3.0.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"cfg-if 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libc 0.2.51 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rand 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"redox_syscall 0.1.54 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"remove_dir_all 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"winapi 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libc 0.2.32 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "termcolor"
|
||||
version = "1.0.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
version = "0.3.3"
|
||||
dependencies = [
|
||||
"wincolor 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"wincolor 0.1.4",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -602,163 +286,121 @@ name = "termion"
|
||||
version = "1.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"libc 0.2.51 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"redox_syscall 0.1.54 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libc 0.2.32 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"redox_syscall 0.1.31 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"redox_termios 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "textwrap"
|
||||
version = "0.11.0"
|
||||
version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"term_size 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"unicode-width 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thread_local"
|
||||
version = "0.3.6"
|
||||
version = "0.3.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"lazy_static 0.2.9 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ucd-util"
|
||||
version = "0.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-width"
|
||||
version = "0.1.5"
|
||||
version = "0.1.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-xid"
|
||||
version = "0.1.0"
|
||||
name = "unreachable"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "utf8-ranges"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "vec_map"
|
||||
version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "void"
|
||||
version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "walkdir"
|
||||
version = "2.2.7"
|
||||
version = "2.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"same-file 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"winapi 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"winapi-util 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"same-file 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winapi"
|
||||
version = "0.3.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winapi-i686-pc-windows-gnu"
|
||||
version = "0.4.0"
|
||||
version = "0.2.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "winapi-util"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"winapi 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winapi-x86_64-pc-windows-gnu"
|
||||
version = "0.4.0"
|
||||
name = "winapi-build"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "wincolor"
|
||||
version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
version = "0.1.4"
|
||||
dependencies = [
|
||||
"winapi 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"winapi-util 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[metadata]
|
||||
"checksum aho-corasick 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)" = "e6f484ae0c99fec2e858eb6134949117399f222608d84cadb3f58c1f97c2364c"
|
||||
"checksum atty 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "9a7d5b8723950951411ee34d271d99dddcc2035a16ab25310ea2c8cfd4369652"
|
||||
"checksum autocfg 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a6d640bee2da49f60a4068a7fae53acde8982514ab7bae8b8cea9e88cbcfd799"
|
||||
"checksum base64 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)" = "0b25d992356d2eb0ed82172f5248873db5560c4721f564b13cb5193bda5e668e"
|
||||
"checksum bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "228047a76f468627ca71776ecdebd732a3423081fcf5125585bcd7c49886ce12"
|
||||
"checksum bstr 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6c8203ca06c502958719dae5f653a79e0cc6ba808ed02beffbf27d09610f2143"
|
||||
"checksum bytecount 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "be0fdd54b507df8f22012890aadd099979befdba27713c767993f8380112ca7c"
|
||||
"checksum byteorder 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "a019b10a2a7cdeb292db131fc8113e57ea2a908f6e7894b0c3c671893b65dbeb"
|
||||
"checksum cc 1.0.35 (registry+https://github.com/rust-lang/crates.io-index)" = "5e5f3fee5eeb60324c2781f1e41286bdee933850fff9b3c672587fed5ec58c83"
|
||||
"checksum cfg-if 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "11d43355396e872eefb45ce6342e4374ed7bc2b3a502d1b28e36d6e23c05d1f4"
|
||||
"checksum clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)" = "5067f5bb2d80ef5d68b4c87db81601f0b75bca627bc2ef76b141d7b846a3c6d9"
|
||||
"checksum cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f"
|
||||
"checksum crossbeam-channel 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)" = "0f0ed1a4de2235cabda8558ff5840bffb97fcb64c97827f354a451307df5f72b"
|
||||
"checksum crossbeam-utils 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)" = "f8306fcef4a7b563b76b7dd949ca48f52bc1141aa067d2ea09565f3e2652aa5c"
|
||||
"checksum encoding_rs 0.8.17 (registry+https://github.com/rust-lang/crates.io-index)" = "4155785c79f2f6701f185eb2e6b4caf0555ec03477cb4c70db67b465311620ed"
|
||||
"checksum encoding_rs_io 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "9619ee7a2bf4e777e020b95c1439abaf008f8ea8041b78a0552c4f1bcf4df32c"
|
||||
"checksum fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "2fad85553e09a6f881f739c29f0b00b0f01357c743266d478b68951ce23285f3"
|
||||
"checksum fuchsia-cprng 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba"
|
||||
"checksum glob 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574"
|
||||
"checksum itoa 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "1306f3464951f30e30d12373d31c79fbd52d236e5e896fd92f96ec7babbbe60b"
|
||||
"checksum lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bc5729f27f159ddd61f4df6228e827e86643d4d3e7c32183cb30a1c08f604a14"
|
||||
"checksum libc 0.2.51 (registry+https://github.com/rust-lang/crates.io-index)" = "bedcc7a809076656486ffe045abeeac163da1b558e963a31e29fbfbeba916917"
|
||||
"checksum log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c84ec4b527950aa83a329754b01dbe3f58361d1c5efacd1f6d68c494d08a17c6"
|
||||
"checksum memchr 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "2efc7bc57c883d4a4d6e3246905283d8dae951bb3bd32f49d6ef297f546e1c39"
|
||||
"checksum memmap 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "6585fd95e7bb50d6cc31e20d4cf9afb4e2ba16c5846fc76793f11218da9c475b"
|
||||
"checksum num_cpus 1.10.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1a23f0ed30a54abaa0c7e83b1d2d87ada7c3c23078d1d87815af3e3b6385fbba"
|
||||
"checksum packed_simd 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a85ea9fc0d4ac0deb6fe7911d38786b32fc11119afd9e9d38b84ff691ce64220"
|
||||
"checksum pcre2 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a08c8195dd1d8a2a1b5e2af94bf0c4c3c195c2359930442a016bf123196f7155"
|
||||
"checksum pcre2-sys 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1e0092a7eae1c569cf7dbec61eef956516df93eb4afda8f600ccb16980aca849"
|
||||
"checksum pkg-config 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)" = "676e8eb2b1b4c9043511a9b7bea0915320d7e502b0a079fb03f9635a5252b18c"
|
||||
"checksum proc-macro2 0.4.27 (registry+https://github.com/rust-lang/crates.io-index)" = "4d317f9caece796be1980837fd5cb3dfec5613ebdb04ad0956deea83ce168915"
|
||||
"checksum quote 0.6.12 (registry+https://github.com/rust-lang/crates.io-index)" = "faf4799c5d274f3868a4aae320a0a182cbd2baee377b378f080e16a23e9d80db"
|
||||
"checksum rand 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)" = "6d71dacdc3c88c1fde3885a3be3fbab9f35724e6ce99467f7d9c5026132184ca"
|
||||
"checksum rand_chacha 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "556d3a1ca6600bfcbab7c7c91ccb085ac7fbbcd70e008a98742e7847f4f7bcef"
|
||||
"checksum rand_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7a6fdeb83b075e8266dcc8762c22776f6877a63111121f5f8c7411e5be7eed4b"
|
||||
"checksum rand_core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d0e7a549d590831370895ab7ba4ea0c1b6b011d106b5ff2da6eee112615e6dc0"
|
||||
"checksum rand_hc 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7b40677c7be09ae76218dc623efbf7b18e34bced3f38883af07bb75630a21bc4"
|
||||
"checksum rand_isaac 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "ded997c9d5f13925be2a6fd7e66bf1872597f759fd9dd93513dd7e92e5a5ee08"
|
||||
"checksum rand_jitter 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "7b9ea758282efe12823e0d952ddb269d2e1897227e464919a554f2a03ef1b832"
|
||||
"checksum rand_os 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "7b75f676a1e053fc562eafbb47838d67c84801e38fc1ba459e8f180deabd5071"
|
||||
"checksum rand_pcg 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "abf9b09b01790cfe0364f52bf32995ea3c39f4d2dd011eac241d2914146d0b44"
|
||||
"checksum rand_xorshift 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cbf7e9e623549b0e21f6e97cf8ecf247c1a8fd2e8a992ae265314300b2455d5c"
|
||||
"checksum rdrand 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2"
|
||||
"checksum redox_syscall 0.1.54 (registry+https://github.com/rust-lang/crates.io-index)" = "12229c14a0f65c4f1cb046a3b52047cdd9da1f4b30f8a39c5063c8bae515e252"
|
||||
"checksum aho-corasick 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)" = "500909c4f87a9e52355b26626d890833e9e1d53ac566db76c36faa984b889699"
|
||||
"checksum ansi_term 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "23ac7c30002a5accbf7e8987d0632fa6de155b7c3d39d0067317a391e00a2ef6"
|
||||
"checksum atty 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "21e50800ec991574876040fff8ee46b136a53e985286fbe6a3bdfe6421b78860"
|
||||
"checksum bitflags 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4efd02e230a02e18f92fc2735f44597385ed02ad8f831e7c1c1156ee5e1ab3a5"
|
||||
"checksum bytecount 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "4bbeb7c30341fce29f6078b4bdf876ea4779600866e98f5b2d203a534f195050"
|
||||
"checksum cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "d4c819a1287eb618df47cc647173c5c4c66ba19d888a6e50d605672aed3140de"
|
||||
"checksum clap 2.26.2 (registry+https://github.com/rust-lang/crates.io-index)" = "3451e409013178663435d6f15fdb212f14ee4424a3d74f979d081d0a66b6f1f2"
|
||||
"checksum crossbeam 0.2.10 (registry+https://github.com/rust-lang/crates.io-index)" = "0c5ea215664ca264da8a9d9c3be80d2eaf30923c259d03e870388eb927508f97"
|
||||
"checksum encoding_rs 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)" = "f5215aabf22b83153be3ee44dfe3f940214541b2ce13d419c55e7a115c8c51a9"
|
||||
"checksum env_logger 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3ddf21e73e016298f5cb37d6ef8e8da8e39f91f9ec8b0df44b7deb16a9f8cd5b"
|
||||
"checksum fnv 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "6cc484842f1e2884faf56f529f960cc12ad8c71ce96cc7abba0a067c98fee344"
|
||||
"checksum fs2 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "9ab76cfd2aaa59b7bf6688ad9ba15bbae64bff97f04ea02144cfd3443e5c2866"
|
||||
"checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d"
|
||||
"checksum lazy_static 0.2.9 (registry+https://github.com/rust-lang/crates.io-index)" = "c9e5e58fa1a4c3b915a561a78a22ee0cac6ab97dca2504428bc1cb074375f8d5"
|
||||
"checksum libc 0.2.32 (registry+https://github.com/rust-lang/crates.io-index)" = "56cce3130fd040c28df6f495c8492e5ec5808fb4c9093c310df02b0c8f030148"
|
||||
"checksum log 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)" = "880f77541efa6e5cc74e76910c9884d9859683118839d6a1dc3b11e63512565b"
|
||||
"checksum memchr 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "148fab2e51b4f1cfc66da2a7c32981d1d3c083a803978268bb11fe4b86925e7a"
|
||||
"checksum memchr 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e01e64d9017d18e7fc09d8e4fe0e28ff6931019e979fb8019319db7ca827f8a6"
|
||||
"checksum memmap 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)" = "46f3c7359028b31999287dae4e5047ddfe90a23b7dca2282ce759b491080c99b"
|
||||
"checksum num_cpus 1.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "514f0d73e64be53ff320680ca671b64fe3fb91da01e1ae2ddc99eb51d453b20d"
|
||||
"checksum redox_syscall 0.1.31 (registry+https://github.com/rust-lang/crates.io-index)" = "8dde11f18c108289bef24469638a04dce49da56084f2d50618b226e47eb04509"
|
||||
"checksum redox_termios 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7e891cfe48e9100a70a3b6eb652fef28920c117d366339687bd5576160db0f76"
|
||||
"checksum regex 1.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "559008764a17de49a3146b234641644ed37d118d1ef641a0bb573d146edc6ce0"
|
||||
"checksum regex-automata 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "a25a7daa2eea48550e9946133d6cc9621020d29cc7069089617234bf8b6a8693"
|
||||
"checksum regex-syntax 0.6.6 (registry+https://github.com/rust-lang/crates.io-index)" = "dcfd8681eebe297b81d98498869d4aae052137651ad7b96822f09ceb690d0a96"
|
||||
"checksum remove_dir_all 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3488ba1b9a2084d38645c4c08276a1752dcbf2c7130d74f1569681ad5d2799c5"
|
||||
"checksum ryu 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "eb9e9b8cde282a9fe6a42dd4681319bfb63f121b8a8ee9439c6f4107e58a46f7"
|
||||
"checksum same-file 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "8f20c4be53a8a1ff4c1f1b2bd14570d2f634628709752f0702ecdd2b3f9a5267"
|
||||
"checksum serde 1.0.90 (registry+https://github.com/rust-lang/crates.io-index)" = "aa5f7c20820475babd2c077c3ab5f8c77a31c15e16ea38687b4c02d3e48680f4"
|
||||
"checksum serde_derive 1.0.90 (registry+https://github.com/rust-lang/crates.io-index)" = "58fc82bec244f168b23d1963b45c8bf5726e9a15a9d146a067f9081aeed2de79"
|
||||
"checksum serde_json 1.0.39 (registry+https://github.com/rust-lang/crates.io-index)" = "5a23aa71d4a4d43fdbfaac00eff68ba8a06a51759a89ac3304323e800c4dd40d"
|
||||
"checksum smallvec 0.6.9 (registry+https://github.com/rust-lang/crates.io-index)" = "c4488ae950c49d403731982257768f48fada354a5203fe81f9bb6f43ca9002be"
|
||||
"checksum strsim 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
|
||||
"checksum syn 0.15.31 (registry+https://github.com/rust-lang/crates.io-index)" = "d2b4cfac95805274c6afdb12d8f770fa2d27c045953e7b630a81801953699a9a"
|
||||
"checksum tempfile 3.0.7 (registry+https://github.com/rust-lang/crates.io-index)" = "b86c784c88d98c801132806dadd3819ed29d8600836c4088e855cdf3e178ed8a"
|
||||
"checksum termcolor 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "4096add70612622289f2fdcdbd5086dc81c1e2675e6ae58d6c4f62a16c6d7f2f"
|
||||
"checksum regex 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "1731164734096285ec2a5ec7fea5248ae2f5485b3feeb0115af4fda2183b2d1b"
|
||||
"checksum regex-syntax 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "ad890a5eef7953f55427c50575c680c42841653abd2b028b68cd223d157f62db"
|
||||
"checksum same-file 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "70a18720d745fb9ca6a041b37cb36d0b21066006b6cff8b5b360142d4b81fb60"
|
||||
"checksum simd 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "63b5847c2d766ca7ce7227672850955802fabd779ba616aeabead4c2c3877023"
|
||||
"checksum simd 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7a94d14a2ae1f1f110937de5fb69e494372560181c7e1739a097fcc2cee37ba0"
|
||||
"checksum strsim 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b4d15c810519a91cf877e7e36e63fe068815c678181439f2f29e2562147c3694"
|
||||
"checksum term_size 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e2b6b55df3198cc93372e85dd2ed817f0e38ce8cc0f22eb32391bfad9c4bf209"
|
||||
"checksum termion 1.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "689a3bdfaab439fd92bc87df5c4c78417d3cbe537487274e9b0b2dce76e92096"
|
||||
"checksum textwrap 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060"
|
||||
"checksum thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c6b53e329000edc2b34dbe8545fd20e55a333362d0a321909685a19bd28c3f1b"
|
||||
"checksum ucd-util 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "535c204ee4d8434478593480b8f86ab45ec9aae0e83c568ca81abf0fd0e88f86"
|
||||
"checksum unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "882386231c45df4700b275c7ff55b6f3698780a650026380e72dabe76fa46526"
|
||||
"checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc"
|
||||
"checksum utf8-ranges 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "796f7e48bef87609f7ade7e06495a87d5cd06c7866e6a5cbfceffc558a243737"
|
||||
"checksum walkdir 2.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "9d9d7ed3431229a144296213105a390676cc49c9b6a72bd19f3176c98e129fa1"
|
||||
"checksum winapi 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)" = "f10e386af2b13e47c89e7236a7a14a086791a2b88ebad6df9bf42040195cf770"
|
||||
"checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
|
||||
"checksum winapi-util 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7168bab6e1daee33b4557efd0e95d5ca70a03706d39fa5f3fe7a236f584b03c9"
|
||||
"checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
|
||||
"checksum wincolor 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "561ed901ae465d6185fa7864d63fbd5720d0ef718366c9a4dc83cf6170d7e9ba"
|
||||
"checksum textwrap 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "df8e08afc40ae3459e4838f303e465aa50d823df8d7f83ca88108f6d3afe7edd"
|
||||
"checksum thread_local 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "1697c4b57aeeb7a536b647165a2825faddffb1d3bad386d507709bd51a90bb14"
|
||||
"checksum unicode-width 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "bf3a113775714a22dcb774d8ea3655c53a32debae63a063acc00a91cc586245f"
|
||||
"checksum unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "382810877fe448991dfc7f0dd6e3ae5d58088fd0ea5e35189655f84e6814fa56"
|
||||
"checksum utf8-ranges 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "662fab6525a98beff2921d7f61a39e7d59e0b425ebc7d0d9e66d316e55124122"
|
||||
"checksum vec_map 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "887b5b631c2ad01628bbbaa7dd4c869f80d3186688f8d0b6f58774fbe324988c"
|
||||
"checksum void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d"
|
||||
"checksum walkdir 2.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "40b6d201f4f8998a837196b6de9c73e35af14c992cbb92c4ab641d2c2dce52de"
|
||||
"checksum winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a"
|
||||
"checksum winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc"
|
||||
|
||||
103
Cargo.toml
103
Cargo.toml
@@ -1,11 +1,10 @@
|
||||
[package]
|
||||
name = "ripgrep"
|
||||
version = "0.10.0" #:version
|
||||
version = "0.7.0" #:version
|
||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||
description = """
|
||||
ripgrep is a line-oriented search tool that recursively searches your current
|
||||
directory for a regex pattern while respecting your gitignore rules. ripgrep
|
||||
has first class support on Windows, macOS and Linux
|
||||
Line oriented search tool using Rust's regex library. Combines the raw
|
||||
performance of grep with the usability of the silver searcher.
|
||||
"""
|
||||
documentation = "https://github.com/BurntSushi/ripgrep"
|
||||
homepage = "https://github.com/BurntSushi/ripgrep"
|
||||
@@ -13,11 +12,9 @@ repository = "https://github.com/BurntSushi/ripgrep"
|
||||
readme = "README.md"
|
||||
keywords = ["regex", "grep", "egrep", "search", "pattern"]
|
||||
categories = ["command-line-utilities", "text-processing"]
|
||||
license = "Unlicense OR MIT"
|
||||
license = "Unlicense/MIT"
|
||||
exclude = ["HomebrewFormula"]
|
||||
build = "build.rs"
|
||||
autotests = false
|
||||
edition = "2018"
|
||||
|
||||
[badges]
|
||||
travis-ci = { repository = "BurntSushi/ripgrep" }
|
||||
@@ -32,77 +29,35 @@ name = "rg"
|
||||
name = "integration"
|
||||
path = "tests/tests.rs"
|
||||
|
||||
[workspace]
|
||||
members = [
|
||||
"globset",
|
||||
"grep",
|
||||
"grep-cli",
|
||||
"grep-matcher",
|
||||
"grep-pcre2",
|
||||
"grep-printer",
|
||||
"grep-regex",
|
||||
"grep-searcher",
|
||||
"ignore",
|
||||
]
|
||||
|
||||
[dependencies]
|
||||
bstr = "0.1.2"
|
||||
grep = { version = "0.2.3", path = "grep" }
|
||||
ignore = { version = "0.4.7", path = "ignore" }
|
||||
lazy_static = "1.1.0"
|
||||
log = "0.4.5"
|
||||
num_cpus = "1.8.0"
|
||||
regex = "1.0.5"
|
||||
serde_json = "1.0.23"
|
||||
termcolor = "1.0.3"
|
||||
|
||||
[dependencies.clap]
|
||||
version = "2.32.0"
|
||||
default-features = false
|
||||
features = ["suggestions"]
|
||||
atty = "0.2.2"
|
||||
bytecount = "0.1.4"
|
||||
clap = "2.26"
|
||||
encoding_rs = "0.7"
|
||||
env_logger = { version = "0.4", default-features = false }
|
||||
grep = { version = "0.1.7", path = "grep" }
|
||||
ignore = { version = "0.3.1", path = "ignore" }
|
||||
lazy_static = "0.2"
|
||||
libc = "0.2"
|
||||
log = "0.3"
|
||||
memchr = "2"
|
||||
memmap = "0.5"
|
||||
num_cpus = "1"
|
||||
regex = "0.2.1"
|
||||
same-file = "1"
|
||||
termcolor = { version = "0.3.3", path = "termcolor" }
|
||||
|
||||
[build-dependencies]
|
||||
lazy_static = "1.1.0"
|
||||
|
||||
[build-dependencies.clap]
|
||||
version = "2.32.0"
|
||||
default-features = false
|
||||
features = ["suggestions"]
|
||||
|
||||
[dev-dependencies]
|
||||
serde = "1.0.77"
|
||||
serde_derive = "1.0.77"
|
||||
clap = "2.26"
|
||||
lazy_static = "0.2"
|
||||
|
||||
[features]
|
||||
simd-accel = ["grep/simd-accel"]
|
||||
pcre2 = ["grep/pcre2"]
|
||||
avx-accel = ["bytecount/avx-accel"]
|
||||
simd-accel = [
|
||||
"bytecount/simd-accel",
|
||||
"regex/simd-accel",
|
||||
"encoding_rs/simd-accel",
|
||||
]
|
||||
|
||||
[profile.release]
|
||||
debug = 1
|
||||
|
||||
[package.metadata.deb]
|
||||
features = ["pcre2"]
|
||||
section = "utils"
|
||||
assets = [
|
||||
["target/release/rg", "usr/bin/", "755"],
|
||||
["COPYING", "usr/share/doc/ripgrep/", "644"],
|
||||
["LICENSE-MIT", "usr/share/doc/ripgrep/", "644"],
|
||||
["UNLICENSE", "usr/share/doc/ripgrep/", "644"],
|
||||
["CHANGELOG.md", "usr/share/doc/ripgrep/CHANGELOG", "644"],
|
||||
["README.md", "usr/share/doc/ripgrep/README", "644"],
|
||||
["FAQ.md", "usr/share/doc/ripgrep/FAQ", "644"],
|
||||
# The man page is automatically generated by ripgrep's build process, so
|
||||
# this file isn't actually commited. Instead, to create a dpkg, either
|
||||
# create a deployment/deb directory and copy the man page to it, or use the
|
||||
# 'ci/build_deb.sh' script.
|
||||
["deployment/deb/rg.1", "usr/share/man/man1/rg.1", "644"],
|
||||
# Similarly for shell completions.
|
||||
["deployment/deb/rg.bash", "usr/share/bash-completion/completions/rg", "644"],
|
||||
["deployment/deb/rg.fish", "usr/share/fish/completions/rg.fish", "644"],
|
||||
["deployment/deb/_rg", "usr/share/zsh/vendor-completions/", "644"],
|
||||
]
|
||||
extended-description = """\
|
||||
ripgrep (rg) recursively searches your current directory for a regex pattern.
|
||||
By default, ripgrep will respect your .gitignore and automatically skip hidden
|
||||
files/directories and binary files.
|
||||
"""
|
||||
debug = true
|
||||
|
||||
983
FAQ.md
983
FAQ.md
@@ -1,983 +0,0 @@
|
||||
## FAQ
|
||||
|
||||
* [Does ripgrep support configuration files?](#config)
|
||||
* [What's changed in ripgrep recently?](#changelog)
|
||||
* [When is the next release?](#release)
|
||||
* [Does ripgrep have a man page?](#manpage)
|
||||
* [Does ripgrep have support for shell auto-completion?](#complete)
|
||||
* [How do I use lookaround and/or backreferences?](#fancy)
|
||||
* [How do I configure ripgrep's colors?](#colors)
|
||||
* [How do I enable true colors on Windows?](#truecolors-windows)
|
||||
* [How do I stop ripgrep from messing up colors when I kill it?](#stop-ripgrep)
|
||||
* [How can I get results in a consistent order?](#order)
|
||||
* [How do I search files that aren't UTF-8?](#encoding)
|
||||
* [How do I search compressed files?](#compressed)
|
||||
* [How do I search over multiple lines?](#multiline)
|
||||
* [How do I get around the regex size limit?](#size-limit)
|
||||
* [How do I make the `-f/--file` flag faster?](#dfa-size)
|
||||
* [How do I make the output look like The Silver Searcher's output?](#silver-searcher-output)
|
||||
* [Why does ripgrep get slower when I enabled PCRE2 regexes?](#pcre2-slow)
|
||||
* [When I run `rg`, why does it execute some other command?](#rg-other-cmd)
|
||||
* [How do I create an alias for ripgrep on Windows?](#rg-alias-windows)
|
||||
* [How do I create a PowerShell profile?](#powershell-profile)
|
||||
* [How do I pipe non-ASCII content to ripgrep on Windows?](#pipe-non-ascii-windows)
|
||||
* [How can I search and replace with ripgrep?](#search-and-replace)
|
||||
* [How is ripgrep licensed?](#license)
|
||||
* [Can ripgrep replace grep?](#posix4ever)
|
||||
* [What does the "rip" in ripgrep mean?](#intentcountsforsomething)
|
||||
|
||||
|
||||
<h3 name="config">
|
||||
Does ripgrep support configuration files?
|
||||
</h3>
|
||||
|
||||
Yes. See the
|
||||
[guide's section on configuration files](GUIDE.md#configuration-file).
|
||||
|
||||
|
||||
<h3 name="changelog">
|
||||
What's changed in ripgrep recently?
|
||||
</h3>
|
||||
|
||||
Please consult ripgrep's [CHANGELOG](CHANGELOG.md).
|
||||
|
||||
|
||||
<h3 name="release">
|
||||
When is the next release?
|
||||
</h3>
|
||||
|
||||
ripgrep is a project whose contributors are volunteers. A release schedule
|
||||
adds undue stress to said volunteers. Therefore, releases are made on a best
|
||||
effort basis and no dates **will ever be given**.
|
||||
|
||||
One exception to this is high impact bugs. If a ripgrep release contains a
|
||||
significant regression, then there will generally be a strong push to get a
|
||||
patch release out with a fix.
|
||||
|
||||
|
||||
<h3 name="manpage">
|
||||
Does ripgrep have a man page?
|
||||
</h3>
|
||||
|
||||
Yes! Whenever ripgrep is compiled on a system with `asciidoc` present, then a
|
||||
man page is generated from ripgrep's argv parser. After compiling ripgrep, you
|
||||
can find the man page like so from the root of the repository:
|
||||
|
||||
```
|
||||
$ find ./target -name rg.1 -print0 | xargs -0 ls -t | head -n1
|
||||
./target/debug/build/ripgrep-79899d0edd4129ca/out/rg.1
|
||||
```
|
||||
|
||||
Running `man -l ./target/debug/build/ripgrep-79899d0edd4129ca/out/rg.1` will
|
||||
show the man page in your normal pager.
|
||||
|
||||
Note that the man page's documentation for options is equivalent to the output
|
||||
shown in `rg --help`. To see more condensed documentation (one line per flag),
|
||||
run `rg -h`.
|
||||
|
||||
The man page is also included in all
|
||||
[ripgrep binary releases](https://github.com/BurntSushi/ripgrep/releases).
|
||||
|
||||
|
||||
<h3 name="complete">
|
||||
Does ripgrep have support for shell auto-completion?
|
||||
</h3>
|
||||
|
||||
Yes! Shell completions can be found in the
|
||||
[same directory as the man page](#manpage)
|
||||
after building ripgrep. Zsh completions are maintained separately and committed
|
||||
to the repository in `complete/_rg`.
|
||||
|
||||
Shell completions are also included in all
|
||||
[ripgrep binary releases](https://github.com/BurntSushi/ripgrep/releases).
|
||||
|
||||
For **bash**, move `rg.bash` to
|
||||
`$XDG_CONFIG_HOME/bash_completion` or `/etc/bash_completion.d/`.
|
||||
|
||||
For **fish**, move `rg.fish` to `$HOME/.config/fish/completions/`.
|
||||
|
||||
For **zsh**, move `_rg` to one of your `$fpath` directories.
|
||||
|
||||
For **PowerShell**, add `. _rg.ps1` to your PowerShell
|
||||
[profile](https://technet.microsoft.com/en-us/library/bb613488(v=vs.85).aspx)
|
||||
(note the leading period). If the `_rg.ps1` file is not on your `PATH`, do
|
||||
`. /path/to/_rg.ps1` instead.
|
||||
|
||||
|
||||
<h3 name="order">
|
||||
How can I get results in a consistent order?
|
||||
</h3>
|
||||
|
||||
By default, ripgrep uses parallelism to execute its search because this makes
|
||||
the search much faster on most modern systems. This in turn means that ripgrep
|
||||
has a non-deterministic aspect to it, since the interleaving of threads during
|
||||
the execution of the program is itself non-deterministic. This has the effect
|
||||
of printing results in a somewhat arbitrary order, and this order can change
|
||||
from run to run of ripgrep.
|
||||
|
||||
The only way to make the order of results consistent is to ask ripgrep to
|
||||
sort the output. Currently, this will disable all parallelism. (On smaller
|
||||
repositories, you might not notice much of a performance difference!) You
|
||||
can achieve this with the `--sort path` flag.
|
||||
|
||||
There is more discussion on this topic here:
|
||||
https://github.com/BurntSushi/ripgrep/issues/152
|
||||
|
||||
|
||||
<h3 name="encoding">
|
||||
How do I search files that aren't UTF-8?
|
||||
</h3>
|
||||
|
||||
See the [guide's section on file encoding](GUIDE.md#file-encoding).
|
||||
|
||||
|
||||
<h3 name="compressed">
|
||||
How do I search compressed files?
|
||||
</h3>
|
||||
|
||||
ripgrep's `-z/--search-zip` flag will cause it to search compressed files
|
||||
automatically. Currently, this supports gzip, bzip2, xz, lzma, lz4, Brotli and
|
||||
Zstd. Each of these requires requires the corresponding `gzip`, `bzip2`, `xz`,
|
||||
`lz4`, `brotli` and `zstd` binaries to be installed on your system. (That is,
|
||||
ripgrep does decompression by shelling out to another process.)
|
||||
|
||||
ripgrep currently does not search archive formats, so `*.tar.gz` files, for
|
||||
example, are skipped.
|
||||
|
||||
|
||||
<h3 name="multiline">
|
||||
How do I search over multiple lines?
|
||||
</h3>
|
||||
|
||||
The `-U/--multiline` flag enables ripgrep to report results that span over
|
||||
multiple lines.
|
||||
|
||||
|
||||
<h3 name="fancy">
|
||||
How do I use lookaround and/or backreferences?
|
||||
</h3>
|
||||
|
||||
ripgrep's default regex engine does not support lookaround or backreferences.
|
||||
This is primarily because the default regex engine is implemented using finite
|
||||
state machines in order to guarantee a linear worst case time complexity on all
|
||||
inputs. Backreferences are not possible to implement in this paradigm, and
|
||||
lookaround appears difficult to do efficiently.
|
||||
|
||||
However, ripgrep optionally supports using PCRE2 as the regex engine instead of
|
||||
the default one based on finite state machines. You can enable PCRE2 with the
|
||||
`-P/--pcre2` flag. For example, in the root of the ripgrep repo, you can easily
|
||||
find all palindromes:
|
||||
|
||||
```
|
||||
$ rg -P '(\w{10})\1'
|
||||
tests/misc.rs
|
||||
483: cmd.arg("--max-filesize").arg("44444444444444444444");
|
||||
globset/src/glob.rs
|
||||
1206: matches!(match7, "a*a*a*a*a*a*a*a*a", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
|
||||
```
|
||||
|
||||
If your version of ripgrep doesn't support PCRE2, then you'll get an error
|
||||
message when you try to use the `-P/--pcre2` flag:
|
||||
|
||||
```
|
||||
$ rg -P '(\w{10})\1'
|
||||
PCRE2 is not available in this build of ripgrep
|
||||
```
|
||||
|
||||
Most of the releases distributed by the ripgrep project here on GitHub will
|
||||
come bundled with PCRE2 enabled. If you installed ripgrep through a different
|
||||
means (like your system's package manager), then please reach out to the
|
||||
maintainer of that package to see whether it's possible to enable the PCRE2
|
||||
feature.
|
||||
|
||||
|
||||
<h3 name="colors">
|
||||
How do I configure ripgrep's colors?
|
||||
</h3>
|
||||
|
||||
ripgrep has two flags related to colors:
|
||||
|
||||
* `--color` controls *when* to use colors.
|
||||
* `--colors` controls *which* colors to use.
|
||||
|
||||
The `--color` flag accepts one of the following possible values: `never`,
|
||||
`auto`, `always` or `ansi`. The `auto` value is the default and will cause
|
||||
ripgrep to only enable colors when it is printing to a terminal. But if you
|
||||
pipe ripgrep to a file or some other process, then it will suppress colors.
|
||||
|
||||
The --colors` flag is a bit more complicated. The general format is:
|
||||
|
||||
```
|
||||
--colors '{type}:{attribute}:{value}'
|
||||
```
|
||||
|
||||
* `{type}` should be one of `path`, `line`, `column` or `match`. Each of these
|
||||
correspond to the four different types of things that ripgrep will add color
|
||||
to in its output. Select the type whose color you want to change.
|
||||
* `{attribute}` should be one of `fg`, `bg` or `style`, corresponding to
|
||||
foreground color, background color, or miscellaneous styling (such as whether
|
||||
to bold the output or not).
|
||||
* `{value}` is determined by the value of `{attribute}`. If
|
||||
`{attribute}` is `style`, then `{value}` should be one of `nobold`,
|
||||
`bold`, `nointense`, `intense`, `nounderline` or `underline`. If
|
||||
`{attribute}` is `fg` or `bg`, then `{value}` should be a color.
|
||||
|
||||
A color is specified by either one of eight of English names, a single 256-bit
|
||||
number or an RGB triple (with over 16 million possible values, or "true
|
||||
color").
|
||||
|
||||
The color names are `red`, `blue`, `green`, `cyan`, `magenta`, `yellow`,
|
||||
`white` or `black`.
|
||||
|
||||
A single 256-bit number is a value in the range 0-255 (inclusive). It can
|
||||
either be in decimal format (e.g., `62`) or hexadecimal format (e.g., `0x3E`).
|
||||
|
||||
An RGB triple corresponds to three numbers (decimal or hexadecimal) separated
|
||||
by commas.
|
||||
|
||||
As a special case, `--colors '{type}:none'` will clear all colors and styles
|
||||
associated with `{type}`, which lets you start with a clean slate (instead of
|
||||
building on top of ripgrep's default color settings).
|
||||
|
||||
Here's an example that makes highlights the matches with a nice blue background
|
||||
with bolded white text:
|
||||
|
||||
```
|
||||
$ rg somepattern \
|
||||
--colors 'match:none' \
|
||||
--colors 'match:bg:0x33,0x66,0xFF' \
|
||||
--colors 'match:fg:white' \
|
||||
--colors 'match:style:bold'
|
||||
```
|
||||
|
||||
Colors are an ideal candidate to set in your
|
||||
[configuration file](GUIDE.md#configuration-file). See the
|
||||
[question on emulating The Silver Searcher's output style](#silver-searcher-output)
|
||||
for an example specific to colors.
|
||||
|
||||
|
||||
<h3 name="truecolors-windows">
|
||||
How do I enable true colors on Windows?
|
||||
</h3>
|
||||
|
||||
First, see the previous question's
|
||||
[answer on configuring colors](#colors).
|
||||
|
||||
Secondly, coloring on Windows is a bit complicated. If you're using a terminal
|
||||
like Cygwin, then it's likely true color support already works out of the box.
|
||||
However, if you are using a normal Windows console (`cmd` or `PowerShell`) and
|
||||
a version of Windows prior to 10, then there is no known way to get true
|
||||
color support. If you are on Windows 10 and using a Windows console, then
|
||||
true colors should work out of the box with one caveat: you might need to
|
||||
clear ripgrep's default color settings first. That is, instead of this:
|
||||
|
||||
```
|
||||
$ rg somepattern --colors 'match:fg:0x33,0x66,0xFF'
|
||||
```
|
||||
|
||||
you should do this
|
||||
|
||||
```
|
||||
$ rg somepattern --colors 'match:none' --colors 'match:fg:0x33,0x66,0xFF'
|
||||
```
|
||||
|
||||
This is because ripgrep might set the default style for `match` to `bold`, and
|
||||
it seems like Windows 10's VT100 support doesn't permit bold and true color
|
||||
ANSI escapes to be used simultaneously. The work-around above will clear
|
||||
ripgrep's default styling, allowing you to craft it exactly as desired.
|
||||
|
||||
|
||||
<h3 name="stop-ripgrep">
|
||||
How do I stop ripgrep from messing up colors when I kill it?
|
||||
</h3>
|
||||
|
||||
Type in `color` in cmd.exe (Command Prompt) and `echo -ne "\033[0m"` on
|
||||
Unix-like systems to restore your original foreground color.
|
||||
|
||||
In PowerShell, you can add the following code to your profile which will
|
||||
restore the original foreground color when `Reset-ForegroundColor` is called.
|
||||
Including the `Set-Alias` line will allow you to call it with simply `color`.
|
||||
|
||||
```powershell
|
||||
$OrigFgColor = $Host.UI.RawUI.ForegroundColor
|
||||
function Reset-ForegroundColor {
|
||||
$Host.UI.RawUI.ForegroundColor = $OrigFgColor
|
||||
}
|
||||
Set-Alias -Name color -Value Reset-ForegroundColor
|
||||
```
|
||||
|
||||
PR [#187](https://github.com/BurntSushi/ripgrep/pull/187) fixed this, and it
|
||||
was later deprecated in
|
||||
[#281](https://github.com/BurntSushi/ripgrep/issues/281). A full explanation is
|
||||
available
|
||||
[here](https://github.com/BurntSushi/ripgrep/issues/281#issuecomment-269093893).
|
||||
|
||||
|
||||
<h3 name="size-limit">
|
||||
How do I get around the regex size limit?
|
||||
</h3>
|
||||
|
||||
If you've given ripgrep a particularly large pattern (or a large number of
|
||||
smaller patterns), then it is possible that it will fail to compile because it
|
||||
hit a pre-set limit. For example:
|
||||
|
||||
```
|
||||
$ rg '\pL{1000}'
|
||||
Compiled regex exceeds size limit of 10485760 bytes.
|
||||
```
|
||||
|
||||
(Note: `\pL{1000}` may look small, but `\pL` is the character class containing
|
||||
all Unicode letters, which is quite large. *And* it's repeated 1000 times.)
|
||||
|
||||
In this case, you can work around by simply increasing the limit:
|
||||
|
||||
```
|
||||
$ rg '\pL{1000}' --regex-size-limit 1G
|
||||
```
|
||||
|
||||
Increasing the limit to 1GB does not necessarily mean that ripgrep will use
|
||||
that much memory. The limit just says that it's allowed to (approximately) use
|
||||
that much memory for constructing the regular expression.
|
||||
|
||||
|
||||
<h3 name="dfa-size">
|
||||
How do I make the <code>-f/--file</code> flag faster?
|
||||
</h3>
|
||||
|
||||
The `-f/--file` permits one to give a file to ripgrep which contains a pattern
|
||||
on each line. ripgrep will then report any line that matches any of the
|
||||
patterns.
|
||||
|
||||
If this pattern file gets too big, then it is possible ripgrep will slow down
|
||||
dramatically. *Typically* this is because an internal cache is too small, and
|
||||
will cause ripgrep to spill over to a slower but more robust regular expression
|
||||
engine. If this is indeed the problem, then it is possible to increase this
|
||||
cache and regain speed. The cache can be controlled via the `--dfa-size-limit`
|
||||
flag. For example, using `--dfa-size-limit 1G` will set the cache size to 1GB.
|
||||
(Note that this doesn't mean ripgrep will use 1GB of memory automatically, but
|
||||
it will allow the regex engine to if it needs to.)
|
||||
|
||||
|
||||
<h3 name="silver-searcher-output">
|
||||
How do I make the output look like The Silver Searcher's output?
|
||||
</h3>
|
||||
|
||||
Use the `--colors` flag, like so:
|
||||
|
||||
```
|
||||
rg --colors line:fg:yellow \
|
||||
--colors line:style:bold \
|
||||
--colors path:fg:green \
|
||||
--colors path:style:bold \
|
||||
--colors match:fg:black \
|
||||
--colors match:bg:yellow \
|
||||
--colors match:style:nobold \
|
||||
foo
|
||||
```
|
||||
|
||||
Alternatively, add your color configuration to your ripgrep config file (which
|
||||
is activated by setting the `RIPGREP_CONFIG_PATH` environment variable to point
|
||||
to your config file). For example:
|
||||
|
||||
```
|
||||
$ cat $HOME/.config/ripgrep/rc
|
||||
--colors=line:fg:yellow
|
||||
--colors=line:style:bold
|
||||
--colors=path:fg:green
|
||||
--colors=path:style:bold
|
||||
--colors=match:fg:black
|
||||
--colors=match:bg:yellow
|
||||
--colors=match:style:nobold
|
||||
$ RIPGREP_CONFIG_PATH=$HOME/.config/ripgrep/rc rg foo
|
||||
```
|
||||
|
||||
|
||||
<h3 name="pcre2-slow">
|
||||
Why does ripgrep get slower when I enable PCRE2 regexes?
|
||||
</h3>
|
||||
|
||||
When you use the `--pcre2` (`-P` for short) flag, ripgrep will use the PCRE2
|
||||
regex engine instead of the default. Both regex engines are quite fast,
|
||||
but PCRE2 provides a number of additional features such as look-around and
|
||||
backreferences that many enjoy using. This is largely because PCRE2 uses
|
||||
a backtracking implementation where as the default regex engine uses a finite
|
||||
automaton based implementation. The former provides the ability to add lots of
|
||||
bells and whistles over the latter, but the latter executes with worst case
|
||||
linear time complexity.
|
||||
|
||||
With that out of the way, if you've used `-P` with ripgrep, you may have
|
||||
noticed that it can be slower. The reasons for why this is are quite complex,
|
||||
and they are complex because the optimizations that ripgrep uses to implement
|
||||
fast search are complex.
|
||||
|
||||
The task ripgrep has before it is somewhat simple; all it needs to do is search
|
||||
a file for occurrences of some pattern and then print the lines containing
|
||||
those occurrences. The problem lies in what is considered a valid match and how
|
||||
exactly we read the bytes from a file.
|
||||
|
||||
In terms of what is considered a valid match, remember that ripgrep will only
|
||||
report matches spanning a single line by default. The problem here is that
|
||||
some patterns can match across multiple lines, and ripgrep needs to prevent
|
||||
that from happening. For example, `foo\sbar` will match `foo\nbar`. The most
|
||||
obvious way to achieve this is to read the data from a file, and then apply
|
||||
the pattern search to that data for each line. The problem with this approach
|
||||
is that it can be quite slow; it would be much faster to let the pattern
|
||||
search across as much data as possible. It's faster because it gets rid of the
|
||||
overhead of finding the boundaries of every line, and also because it gets rid
|
||||
of the overhead of starting and stopping the pattern search for every single
|
||||
line. (This is operating under the general assumption that matching lines are
|
||||
much rarer than non-matching lines.)
|
||||
|
||||
It turns out that we can use the faster approach by applying a very simple
|
||||
restriction to the pattern: *statically prevent* the pattern from matching
|
||||
through a `\n` character. Namely, when given a pattern like `foo\sbar`,
|
||||
ripgrep will remove `\n` from the `\s` character class automatically. In some
|
||||
cases, a simple removal is not so easy. For example, ripgrep will return an
|
||||
error when your pattern includes a `\n` literal:
|
||||
|
||||
```
|
||||
$ rg '\n'
|
||||
the literal '"\n"' is not allowed in a regex
|
||||
```
|
||||
|
||||
So what does this have to do with PCRE2? Well, ripgrep's default regex engine
|
||||
exposes APIs for doing syntactic analysis on the pattern in a way that makes
|
||||
it quite easy to strip `\n` from the pattern (or otherwise detect it and report
|
||||
an error if stripping isn't possible). PCRE2 seemingly does not provide a
|
||||
similar API, so ripgrep does not do any stripping when PCRE2 is enabled. This
|
||||
forces ripgrep to use the "slow" search strategy of searching each line
|
||||
individually.
|
||||
|
||||
OK, so if enabling PCRE2 slows down the default method of searching because it
|
||||
forces matches to be limited to a single line, then why is PCRE2 also sometimes
|
||||
slower when performing multiline searches? Well, that's because there are
|
||||
*multiple* reasons why using PCRE2 in ripgrep can be slower than the default
|
||||
regex engine. This time, blame PCRE2's Unicode support, which ripgrep enables
|
||||
by default. In particular, PCRE2 cannot simultaneously enable Unicode support
|
||||
and search arbitrary data. That is, when PCRE2's Unicode support is enabled,
|
||||
the data **must** be valid UTF-8 (to do otherwise is to invoke undefined
|
||||
behavior). This is in contrast to ripgrep's default regex engine, which can
|
||||
enable Unicode support and still search arbitrary data. ripgrep's default
|
||||
regex engine simply won't match invalid UTF-8 for a pattern that can otherwise
|
||||
only match valid UTF-8. Why doesn't PCRE2 do the same? This author isn't
|
||||
familiar with its internals, so we can't comment on it here.
|
||||
|
||||
The bottom line here is that we can't enable PCRE2's Unicode support without
|
||||
simultaneously incurring a performance penalty for ensuring that we are
|
||||
searching valid UTF-8. In particular, ripgrep will transcode the contents
|
||||
of each file to UTF-8 while replacing invalid UTF-8 data with the Unicode
|
||||
replacement codepoint. ripgrep then disables PCRE2's own internal UTF-8
|
||||
checking, since we've guaranteed the data we hand it will be valid UTF-8. The
|
||||
reason why ripgrep takes this approach is because if we do hand PCRE2 invalid
|
||||
UTF-8, then it will report a match error if it comes across an invalid UTF-8
|
||||
sequence. This is not good news for ripgrep, since it will stop it from
|
||||
searching the rest of the file, and will also print potentially undesirable
|
||||
error messages to users.
|
||||
|
||||
All right, the above is a lot of information to swallow if you aren't already
|
||||
familiar with ripgrep internals. Let's make this concrete with some examples.
|
||||
First, let's get some data big enough to magnify the performance differences:
|
||||
|
||||
```
|
||||
$ curl -O 'https://burntsushi.net/stuff/subtitles2016-sample.gz'
|
||||
$ gzip -d subtitles2016-sample
|
||||
$ md5sum subtitles2016-sample
|
||||
e3cb796a20bbc602fbfd6bb43bda45f5 subtitles2016-sample
|
||||
```
|
||||
|
||||
To search this data, we will use the pattern `^\w{42}$`, which contains exactly
|
||||
one hit in the file and has no literals. Having no literals is important,
|
||||
because it ensures that the regex engine won't use literal optimizations to
|
||||
speed up the search. In other words, it lets us reason coherently about the
|
||||
actual task that the regex engine is performing.
|
||||
|
||||
Let's now walk through a few examples in light of the information above. First,
|
||||
let's consider the default search using ripgrep's default regex engine and
|
||||
then the same search with PCRE2:
|
||||
|
||||
```
|
||||
$ time rg '^\w{42}$' subtitles2016-sample
|
||||
21225780:EverymajordevelopmentinthehistoryofAmerica
|
||||
|
||||
real 0m1.783s
|
||||
user 0m1.731s
|
||||
sys 0m0.051s
|
||||
|
||||
$ time rg -P '^\w{42}$' subtitles2016-sample
|
||||
21225780:EverymajordevelopmentinthehistoryofAmerica
|
||||
|
||||
real 0m2.458s
|
||||
user 0m2.419s
|
||||
sys 0m0.038s
|
||||
```
|
||||
|
||||
In this particular example, both pattern searches are using a Unicode aware
|
||||
`\w` character class and both are counting lines in order to report line
|
||||
numbers. The key difference here is that the first search will not search
|
||||
line by line, but the second one will. We can observe which strategy ripgrep
|
||||
uses by passing the `--trace` flag:
|
||||
|
||||
```
|
||||
$ rg '^\w{42}$' subtitles2016-sample --trace
|
||||
[... snip ...]
|
||||
TRACE|grep_searcher::searcher|grep-searcher/src/searcher/mod.rs:622: Some("subtitles2016-sample"): searching via memory map
|
||||
TRACE|grep_searcher::searcher|grep-searcher/src/searcher/mod.rs:712: slice reader: searching via slice-by-line strategy
|
||||
TRACE|grep_searcher::searcher::core|grep-searcher/src/searcher/core.rs:61: searcher core: will use fast line searcher
|
||||
[... snip ...]
|
||||
|
||||
$ rg -P '^\w{42}$' subtitles2016-sample --trace
|
||||
[... snip ...]
|
||||
TRACE|grep_searcher::searcher|grep-searcher/src/searcher/mod.rs:622: Some("subtitles2016-sample"): searching via memory map
|
||||
TRACE|grep_searcher::searcher|grep-searcher/src/searcher/mod.rs:705: slice reader: needs transcoding, using generic reader
|
||||
TRACE|grep_searcher::searcher|grep-searcher/src/searcher/mod.rs:685: generic reader: searching via roll buffer strategy
|
||||
TRACE|grep_searcher::searcher::core|grep-searcher/src/searcher/core.rs:63: searcher core: will use slow line searcher
|
||||
[... snip ...]
|
||||
```
|
||||
|
||||
The first says it is using the "fast line searcher" where as the latter says
|
||||
it is using the "slow line searcher." The latter also shows that we are
|
||||
decoding the contents of the file, which also impacts performance.
|
||||
|
||||
Interestingly, in this case, the pattern does not match a `\n` and the file
|
||||
we're searching is valid UTF-8, so neither the slow line-by-line search
|
||||
strategy nor the decoding are necessary. We could fix the former issue with
|
||||
better PCRE2 introspection APIs. We can actually fix the latter issue with
|
||||
ripgrep's `--no-encoding` flag, which prevents the automatic UTF-8 decoding,
|
||||
but will enable PCRE2's own UTF-8 validity checking. Unfortunately, it's slower
|
||||
in my build of ripgrep:
|
||||
|
||||
```
|
||||
$ time rg -P '^\w{42}$' subtitles2016-sample --no-encoding
|
||||
21225780:EverymajordevelopmentinthehistoryofAmerica
|
||||
|
||||
real 0m3.074s
|
||||
user 0m3.021s
|
||||
sys 0m0.051s
|
||||
```
|
||||
|
||||
(Tip: use the `--trace` flag to verify that no decoding in ripgrep is
|
||||
happening.)
|
||||
|
||||
A possible reason why PCRE2's UTF-8 checking is slower is because it might
|
||||
not be better than the highly optimized UTF-8 checking routines found in the
|
||||
[`encoding_rs`](https://github.com/hsivonen/encoding_rs) library, which is what
|
||||
ripgrep uses for UTF-8 decoding. Moreover, my build of ripgrep enables
|
||||
`encoding_rs`'s SIMD optimizations, which may be in play here.
|
||||
|
||||
Also, note that using the `--no-encoding` flag can cause PCRE2 to report
|
||||
invalid UTF-8 errors, which causes ripgrep to stop searching the file:
|
||||
|
||||
```
|
||||
$ cat invalid-utf8
|
||||
foobar
|
||||
|
||||
$ xxd invalid-utf8
|
||||
00000000: 666f 6fff 6261 720a foo.bar.
|
||||
|
||||
$ rg foo invalid-utf8
|
||||
1:foobar
|
||||
|
||||
$ rg -P foo invalid-utf8
|
||||
1:foo<6F>bar
|
||||
|
||||
$ rg -P foo invalid-utf8 --no-encoding
|
||||
invalid-utf8: PCRE2: error matching: UTF-8 error: illegal byte (0xfe or 0xff)
|
||||
```
|
||||
|
||||
All right, so at this point, you might think that we could remove the penalty
|
||||
for line-by-line searching by enabling multiline search. After all, our
|
||||
particular pattern can't match across multiple lines anyway, so we'll still get
|
||||
the results we want. Let's try it:
|
||||
|
||||
```
|
||||
$ time rg -U '^\w{42}$' subtitles2016-sample
|
||||
21225780:EverymajordevelopmentinthehistoryofAmerica
|
||||
|
||||
real 0m1.803s
|
||||
user 0m1.748s
|
||||
sys 0m0.054s
|
||||
|
||||
$ time rg -P -U '^\w{42}$' subtitles2016-sample
|
||||
21225780:EverymajordevelopmentinthehistoryofAmerica
|
||||
|
||||
real 0m2.962s
|
||||
user 0m2.246s
|
||||
sys 0m0.713s
|
||||
```
|
||||
|
||||
Search times remain the same with the default regex engine, but the PCRE2
|
||||
search gets _slower_. What happened? The secrets can be revealed with the
|
||||
`--trace` flag once again. In the former case, ripgrep actually detects that
|
||||
the pattern can't match across multiple lines, and so will fall back to the
|
||||
"fast line search" strategy as with our search without `-U`.
|
||||
|
||||
However, for PCRE2, things are much worse. Namely, since Unicode mode is still
|
||||
enabled, ripgrep is still going to decode UTF-8 to ensure that it hands only
|
||||
valid UTF-8 to PCRE2. Unfortunately, one key downside of multiline search is
|
||||
that ripgrep cannot do it incrementally. Since matches can be arbitrarily long,
|
||||
ripgrep actually needs the entire file in memory at once. Normally, we can use
|
||||
a memory map for this, but because we need to UTF-8 decode the file before
|
||||
searching it, ripgrep winds up reading the entire contents of the file on to
|
||||
the heap before executing a search. Owch.
|
||||
|
||||
OK, so Unicode is killing us here. The file we're searching is _mostly_ ASCII,
|
||||
so maybe we're OK with missing some data. (Try `rg '[\w--\p{ascii}]'` to see
|
||||
non-ASCII word characters that an ASCII-only `\w` character class would miss.)
|
||||
We can disable Unicode in both searches, but this is done differently depending
|
||||
on the regex engine we use:
|
||||
|
||||
```
|
||||
$ time rg '(?-u)^\w{42}$' subtitles2016-sample
|
||||
21225780:EverymajordevelopmentinthehistoryofAmerica
|
||||
|
||||
real 0m1.714s
|
||||
user 0m1.669s
|
||||
sys 0m0.044s
|
||||
|
||||
$ time rg -P '^\w{42}$' subtitles2016-sample --no-pcre2-unicode
|
||||
21225780:EverymajordevelopmentinthehistoryofAmerica
|
||||
|
||||
real 0m1.997s
|
||||
user 0m1.958s
|
||||
sys 0m0.037s
|
||||
```
|
||||
|
||||
For the most part, ripgrep's default regex engine performs about the same.
|
||||
PCRE2 does improve a little bit, and is now almost as fast as the default
|
||||
regex engine. If you look at the output of `--trace`, you'll see that ripgrep
|
||||
will no longer perform UTF-8 decoding, but it does still use the slow
|
||||
line-by-line searcher.
|
||||
|
||||
At this point, we can combine all of our insights above: let's try to get off
|
||||
of the slow line-by-line searcher by enabling multiline mode, and let's stop
|
||||
UTF-8 decoding by disabling Unicode support:
|
||||
|
||||
```
|
||||
$ time rg -U '(?-u)^\w{42}$' subtitles2016-sample
|
||||
21225780:EverymajordevelopmentinthehistoryofAmerica
|
||||
|
||||
real 0m1.714s
|
||||
user 0m1.655s
|
||||
sys 0m0.058s
|
||||
|
||||
$ time rg -P -U '^\w{42}$' subtitles2016-sample --no-pcre2-unicode
|
||||
21225780:EverymajordevelopmentinthehistoryofAmerica
|
||||
|
||||
real 0m1.121s
|
||||
user 0m1.071s
|
||||
sys 0m0.048s
|
||||
```
|
||||
|
||||
Ah, there's PCRE2's JIT shining! ripgrep's default regex engine once again
|
||||
remains about the same, but PCRE2 no longer needs to search line-by-line and it
|
||||
no longer needs to do any kind of UTF-8 checks. This allows the file to get
|
||||
memory mapped and passed right through PCRE2's JIT at impressive speeds. (As
|
||||
a brief and interesting historical note, the configuration of "memory map +
|
||||
multiline + no-Unicode" is exactly the configuration used by The Silver
|
||||
Searcher. This analysis perhaps sheds some reasoning as to why that
|
||||
configuration is useful!)
|
||||
|
||||
In summary, if you want PCRE2 to go as fast as possible and you don't care
|
||||
about Unicode and you don't care about matches possibly spanning across
|
||||
multiple lines, then enable multiline mode with `-U` and disable PCRE2's
|
||||
Unicode support with the `--no-pcre2-unicode` flag.
|
||||
|
||||
Caveat emptor: This author is not a PCRE2 expert, so there may be APIs that can
|
||||
improve performance that the author missed. Similarly, there may be alternative
|
||||
designs for a searching tool that are more amenable to how PCRE2 works.
|
||||
|
||||
|
||||
<h3 name="rg-other-cmd">
|
||||
When I run <code>rg</code>, why does it execute some other command?
|
||||
</h3>
|
||||
|
||||
It's likely that you have a shell alias or even another tool called `rg` which
|
||||
is interfering with ripgrep. Run `which rg` to see what it is.
|
||||
|
||||
(Notably, the Rails plug-in for
|
||||
[Oh My Zsh](https://github.com/robbyrussell/oh-my-zsh/wiki/Plugins#rails) sets
|
||||
up an `rg` alias for `rails generate`.)
|
||||
|
||||
Problems like this can be resolved in one of several ways:
|
||||
|
||||
* If you're using the OMZ Rails plug-in, disable it by editing the `plugins`
|
||||
array in your zsh configuration.
|
||||
* Temporarily bypass an existing `rg` alias by calling ripgrep as
|
||||
`command rg`, `\rg`, or `'rg'`.
|
||||
* Temporarily bypass an existing alias or another tool named `rg` by calling
|
||||
ripgrep by its full path (e.g., `/usr/bin/rg` or `/usr/local/bin/rg`).
|
||||
* Permanently disable an existing `rg` alias by adding `unalias rg` to the
|
||||
bottom of your shell configuration file (e.g., `.bash_profile` or `.zshrc`).
|
||||
* Give ripgrep its own alias that doesn't conflict with other tools/aliases by
|
||||
adding a line like the following to the bottom of your shell configuration
|
||||
file: `alias ripgrep='command rg'`.
|
||||
|
||||
|
||||
<h3 name="rg-alias-windows">
|
||||
How do I create an alias for ripgrep on Windows?
|
||||
</h3>
|
||||
|
||||
Often you can find a need to make alias for commands you use a lot that set
|
||||
certain flags. But PowerShell function aliases do not behave like your typical
|
||||
linux shell alias. You always need to propagate arguments and `stdin` input.
|
||||
But it cannot be done simply as
|
||||
`function grep() { $input | rg.exe --hidden $args }`
|
||||
|
||||
Use below example as reference to how setup alias in PowerShell.
|
||||
|
||||
```powershell
|
||||
function grep {
|
||||
$count = @($input).Count
|
||||
$input.Reset()
|
||||
|
||||
if ($count) {
|
||||
$input | rg.exe --hidden $args
|
||||
}
|
||||
else {
|
||||
rg.exe --hidden $args
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
PowerShell special variables:
|
||||
|
||||
* input - is powershell `stdin` object that allows you to access its content.
|
||||
* args - is array of arguments passed to this function.
|
||||
|
||||
This alias checks whether there is `stdin` input and propagates only if there
|
||||
is some lines. Otherwise empty `$input` will make powershell to trigger `rg` to
|
||||
search empty `stdin`.
|
||||
|
||||
|
||||
<h3 name="powershell-profile">
|
||||
How do I create a PowerShell profile?
|
||||
</h3>
|
||||
|
||||
To customize powershell on start-up, there is a special PowerShell script that
|
||||
has to be created. In order to find its location, type `$profile`.
|
||||
See
|
||||
[Microsoft's documentation](https://technet.microsoft.com/en-us/library/bb613488(v=vs.85).aspx)
|
||||
for more details.
|
||||
|
||||
Any PowerShell code in this file gets evaluated at the start of console. This
|
||||
way you can have own aliases to be created at start.
|
||||
|
||||
|
||||
<h3 name="pipe-non-ascii-windows">
|
||||
How do I pipe non-ASCII content to ripgrep on Windows?
|
||||
</h3>
|
||||
|
||||
When piping input into native executables in PowerShell, the encoding of the
|
||||
input is controlled by the `$OutputEncoding` variable. By default, this is set
|
||||
to US-ASCII, and any characters in the pipeline that don't have encodings in
|
||||
US-ASCII are converted to `?` (question mark) characters.
|
||||
|
||||
To change this setting, set `$OutputEncoding` to a different encoding, as
|
||||
represented by a .NET encoding object. Some common examples are below. The
|
||||
value of this variable is reset when PowerShell restarts, so to make this
|
||||
change take effect every time PowerShell is started add a line setting the
|
||||
variable into your PowerShell profile.
|
||||
|
||||
Example `$OutputEncoding` settings:
|
||||
|
||||
* UTF-8 without BOM: `$OutputEncoding = [System.Text.UTF8Encoding]::new()`
|
||||
* The console's output encoding:
|
||||
`$OutputEncoding = [System.Console]::OutputEncoding`
|
||||
|
||||
If you continue to have encoding problems, you can also force the encoding
|
||||
that the console will use for printing to UTF-8 with
|
||||
`[System.Console]::OutputEncoding = [System.Text.Encoding]::UTF8`. This
|
||||
will also reset when PowerShell is restarted, so you can add that line
|
||||
to your profile as well if you want to make the setting permanent.
|
||||
|
||||
<h3 name="search-and-replace">
|
||||
How can I search and replace with ripgrep?
|
||||
</h3>
|
||||
|
||||
Using ripgrep alone, you can't. ripgrep is a search tool that will never
|
||||
touch your files. However, the output of ripgrep can be piped to other tools
|
||||
that do modify files on disk. See
|
||||
[this issue](https://github.com/BurntSushi/ripgrep/issues/74) for more
|
||||
information.
|
||||
|
||||
sed is one such tool that can modify files on disk. sed can take a filename
|
||||
and a substitution command to search and replace in the specified file.
|
||||
Files containing matching patterns can be provided to sed using
|
||||
|
||||
```
|
||||
rg foo --files-with-matches
|
||||
```
|
||||
|
||||
The output of this command is a list of filenames that contain a match for
|
||||
the `foo` pattern.
|
||||
|
||||
This list can be piped into `xargs`, which will split the filenames from
|
||||
standard input into arguments for the command following xargs. You can use this
|
||||
combination to pipe a list of filenames into sed for replacement. For example:
|
||||
|
||||
```
|
||||
rg foo --files-with-matches | xargs sed -i 's/foo/bar/g'
|
||||
```
|
||||
|
||||
will replace all instances of 'foo' with 'bar' in the files in which
|
||||
ripgrep finds the foo pattern. The `-i` flag to sed indicates that you are
|
||||
editing files in place, and `s/foo/bar/g` says that you are performing a
|
||||
**s**ubstitution of the pattren `foo` for `bar`, and that you are doing this
|
||||
substitution **g**lobally (all occurrences of the pattern in each file).
|
||||
|
||||
Note: the above command assumes that you are using GNU sed. If you are using
|
||||
BSD sed (the default on macOS and FreeBSD) then you must modify the above
|
||||
command to be the following:
|
||||
|
||||
```
|
||||
rg foo --files-with-matches | xargs sed -i '' 's/foo/bar/g'
|
||||
```
|
||||
|
||||
The `-i` flag in BSD sed requires a file extension to be given to make backups
|
||||
for all modified files. Specifying the empty string prevents file backups from
|
||||
being made.
|
||||
|
||||
Finally, if any of your file paths contain whitespace in them, then you might
|
||||
need to delimit your file paths with a NUL terminator. This requires telling
|
||||
ripgrep to output NUL bytes between each path, and telling xargs to read paths
|
||||
delimited by NUL bytes:
|
||||
|
||||
```
|
||||
rg foo --files-with-matches -0 | xargs -0 sed -i 's/foo/bar/g'
|
||||
```
|
||||
|
||||
To learn more about sed, see the sed manual
|
||||
[here](https://www.gnu.org/software/sed/manual/sed.html).
|
||||
|
||||
Additionally, Facebook has a tool called
|
||||
[fastmod](https://github.com/facebookincubator/fastmod)
|
||||
that uses some of the same libraries as ripgrep and might provide a more
|
||||
ergonomic search-and-replace experience.
|
||||
|
||||
|
||||
<h3 name="license">
|
||||
How is ripgrep licensed?
|
||||
</h3>
|
||||
|
||||
ripgrep is dual licensed under the
|
||||
[Unlicense](https://unlicense.org/)
|
||||
and MIT licenses. Specifically, you may use ripgrep under the terms of either
|
||||
license.
|
||||
|
||||
The reason why ripgrep is dual licensed this way is two-fold:
|
||||
|
||||
1. I, as ripgrep's author, would like to participate in a small bit of
|
||||
ideological activism by promoting the Unlicense's goal: to disclaim
|
||||
copyright monopoly interest.
|
||||
2. I, as ripgrep's author, would like as many people to use rigprep as
|
||||
possible. Since the Unlicense is not a proven or well known license, ripgrep
|
||||
is also offered under the MIT license, which is ubiquitous and accepted by
|
||||
almost everyone.
|
||||
|
||||
More specifically, ripgrep and all its dependencies are compatible with this
|
||||
licensing choice. In particular, ripgrep's dependencies (direct and transitive)
|
||||
will always be limited to permissive licenses. That is, ripgrep will never
|
||||
depend on code that is not permissively licensed. This means rejecting any
|
||||
dependency that uses a copyleft license such as the GPL, LGPL, MPL or any of
|
||||
the Creative Commons ShareAlike licenses. Whether the license is "weak"
|
||||
copyleft or not does not matter; ripgrep will **not** depend on it.
|
||||
|
||||
|
||||
<h3 name="posix4ever">
|
||||
Can ripgrep replace grep?
|
||||
</h3>
|
||||
|
||||
Yes and no.
|
||||
|
||||
If, upon hearing that "ripgrep can replace grep," you *actually* hear, "ripgrep
|
||||
can be used in every instance grep can be used, in exactly the same way, for
|
||||
the same use cases, with exactly the same bug-for-bug behavior," then no,
|
||||
ripgrep trivially *cannot* replace grep. Moreover, ripgrep will *never* replace
|
||||
grep.
|
||||
|
||||
If, upon hearing that "ripgrep can replace grep," you *actually* hear, "ripgrep
|
||||
can replace grep in some cases and not in other use cases," then yes, that is
|
||||
indeed true!
|
||||
|
||||
Let's go over some of those use cases in favor of ripgrep. Some of these may
|
||||
not apply to you. That's OK. There may be other use cases not listed here that
|
||||
do apply to you. That's OK too.
|
||||
|
||||
(For all claims related to performance in the following words, see my
|
||||
[blog post](https://blog.burntsushi.net/ripgrep/)
|
||||
introducing ripgrep.)
|
||||
|
||||
* Are you frequently searching a repository of code? If so, ripgrep might be a
|
||||
good choice since there's likely a good chunk of your repository that you
|
||||
don't want to search. grep, can, of course, be made to filter files using
|
||||
recursive search, and if you don't mind writing out the requisite `--exclude`
|
||||
rules or writing wrapper scripts, then grep might be sufficient. (I'm not
|
||||
kidding, I myself did this with grep for almost a decade before writing
|
||||
ripgrep.) But if you instead enjoy having a search tool respect your
|
||||
`.gitignore`, then ripgrep might be perfect for you!
|
||||
* Are you frequently searching non-ASCII text that is UTF-8 encoded? One of
|
||||
ripgrep's key features is that it can handle Unicode features in your
|
||||
patterns in a way that tends to be faster than GNU grep. Unicode features
|
||||
in ripgrep are enabled by default; there is no need to configure your locale
|
||||
settings to use ripgrep properly because ripgrep doesn't respect your locale
|
||||
settings.
|
||||
* Do you need to search UTF-16 files and you don't want to bother explicitly
|
||||
transcoding them? Great. ripgrep does this for you automatically. No need
|
||||
to enable it.
|
||||
* Do you need to search a large directory of large files? ripgrep uses
|
||||
parallelism by default, which tends to make it faster than a standard
|
||||
`grep -r` search. However, if you're OK writing the occasional
|
||||
`find ./ -print0 | xargs -P8 -0 grep` command, then maybe grep is good
|
||||
enough.
|
||||
|
||||
Here are some cases where you might *not* want to use ripgrep. The same caveats
|
||||
for the previous section apply.
|
||||
|
||||
* Are you writing portable shell scripts intended to work in a variety of
|
||||
environments? Great, probably not a good idea to use ripgrep! ripgrep is has
|
||||
nowhere near the ubquity of grep, so if you do use ripgrep, you might need
|
||||
to futz with the installation process more than you would with grep.
|
||||
* Do you care about POSIX compatibility? If so, then you can't use ripgrep
|
||||
because it never was, isn't and never will be POSIX compatible.
|
||||
* Do you hate tools that try to do something smart? If so, ripgrep is all about
|
||||
being smart, so you might prefer to just stick with grep.
|
||||
* Is there a particular feature of grep you rely on that ripgrep either doesn't
|
||||
have or never will have? If the former, file a bug report, maybe ripgrep can
|
||||
do it! If the latter, well, then, just use grep.
|
||||
|
||||
|
||||
<h3 name="intentcountsforsomething">
|
||||
What does the "rip" in ripgrep mean?
|
||||
</h3>
|
||||
|
||||
When I first started writing ripgrep, I called it `rep`, intending it to be a
|
||||
shorter variant of `grep`. Soon after, I renamed it to `xrep` since `rep`
|
||||
wasn't obvious enough of a name for my taste. And also because adding `x` to
|
||||
anything always makes it better, right?
|
||||
|
||||
Before ripgrep's first public release, I decided that I didn't like `xrep`. I
|
||||
thought it was slightly awkward to type, and despite my previous praise of the
|
||||
letter `x`, I kind of thought it was pretty lame. Being someone who really
|
||||
likes Rust, I wanted to call it "rustgrep" or maybe "rgrep" for short. But I
|
||||
thought that was just as lame, and maybe a little too in-your-face. But I
|
||||
wanted to continue using `r` so I could at least pretend Rust had something to
|
||||
do with it.
|
||||
|
||||
I spent a couple of days trying to think of very short words that began with
|
||||
the letter `r` that were even somewhat related to the task of searching. I
|
||||
don't remember how it popped into my head, but "rip" came up as something that
|
||||
meant "fast," as in, "to rip through your text." The fact that RIP is also
|
||||
an initialism for "Rest in Peace" (as in, "ripgrep kills grep") never really
|
||||
dawned on me. Perhaps the coincidence is too striking to believe that, but
|
||||
I didn't realize it until someone explicitly pointed it out to me after the
|
||||
initial public release. I admit that I found it mildly amusing, but if I had
|
||||
realized it myself before the public release, I probably would have pressed on
|
||||
and chose a different name. Alas, renaming things after a release is hard, so I
|
||||
decided to mush on.
|
||||
|
||||
Given the fact that
|
||||
[ripgrep never was, is or will be a 100% drop-in replacement for
|
||||
grep](#posix4ever),
|
||||
ripgrep is neither actually a "grep killer" nor was it ever intended to be. It
|
||||
certainly does eat into some of its use cases, but that's nothing that other
|
||||
tools like ack or The Silver Searcher weren't already doing.
|
||||
785
GUIDE.md
785
GUIDE.md
@@ -1,785 +0,0 @@
|
||||
## User Guide
|
||||
|
||||
This guide is intended to give an elementary description of ripgrep and an
|
||||
overview of its capabilities. This guide assumes that ripgrep is
|
||||
[installed](README.md#installation)
|
||||
and that readers have passing familiarity with using command line tools. This
|
||||
also assumes a Unix-like system, although most commands are probably easily
|
||||
translatable to any command line shell environment.
|
||||
|
||||
|
||||
### Table of Contents
|
||||
|
||||
* [Basics](#basics)
|
||||
* [Recursive search](#recursive-search)
|
||||
* [Automatic filtering](#automatic-filtering)
|
||||
* [Manual filtering: globs](#manual-filtering-globs)
|
||||
* [Manual filtering: file types](#manual-filtering-file-types)
|
||||
* [Replacements](#replacements)
|
||||
* [Configuration file](#configuration-file)
|
||||
* [File encoding](#file-encoding)
|
||||
* [Binary data](#binary-data)
|
||||
* [Common options](#common-options)
|
||||
|
||||
|
||||
### Basics
|
||||
|
||||
ripgrep is a command line tool that searches your files for patterns that
|
||||
you give it. ripgrep behaves as if reading each file line by line. If a line
|
||||
matches the pattern provided to ripgrep, then that line will be printed. If a
|
||||
line does not match the pattern, then the line is not printed.
|
||||
|
||||
The best way to see how this works is with an example. To show an example, we
|
||||
need something to search. Let's try searching ripgrep's source code. First
|
||||
grab a ripgrep source archive from
|
||||
https://github.com/BurntSushi/ripgrep/archive/0.7.1.zip
|
||||
and extract it:
|
||||
|
||||
```
|
||||
$ curl -LO https://github.com/BurntSushi/ripgrep/archive/0.7.1.zip
|
||||
$ unzip 0.7.1.zip
|
||||
$ cd ripgrep-0.7.1
|
||||
$ ls
|
||||
benchsuite grep tests Cargo.toml LICENSE-MIT
|
||||
ci ignore wincolor CHANGELOG.md README.md
|
||||
complete pkg appveyor.yml compile snapcraft.yaml
|
||||
doc src build.rs COPYING UNLICENSE
|
||||
globset termcolor Cargo.lock HomebrewFormula
|
||||
```
|
||||
|
||||
Let's try our first search by looking for all occurrences of the word `fast`
|
||||
in `README.md`:
|
||||
|
||||
```
|
||||
$ rg fast README.md
|
||||
75: faster than both. (N.B. It is not, strictly speaking, a "drop-in" replacement
|
||||
88: color and full Unicode support. Unlike GNU grep, `ripgrep` stays fast while
|
||||
119:### Is it really faster than everything else?
|
||||
124:Summarizing, `ripgrep` is fast because:
|
||||
129: optimizations to make searching very fast.
|
||||
```
|
||||
|
||||
(**Note:** If you see an error message from ripgrep saying that it didn't
|
||||
search any files, then re-run ripgrep with the `--debug` flag. One likely cause
|
||||
of this is that you have a `*` rule in a `$HOME/.gitignore` file.)
|
||||
|
||||
So what happened here? ripgrep read the contents of `README.md`, and for each
|
||||
line that contained `fast`, ripgrep printed it to your terminal. ripgrep also
|
||||
included the line number for each line by default. If your terminal supports
|
||||
colors, then your output might actually look something like this screenshot:
|
||||
|
||||
[](https://burntsushi.net/stuff/ripgrep-guide-sample.png)
|
||||
|
||||
In this example, we searched for something called a "literal" string. This
|
||||
means that our pattern was just some normal text that we asked ripgrep to
|
||||
find. But ripgrep supports the ability to specify patterns via [regular
|
||||
expressions](https://en.wikipedia.org/wiki/Regular_expression). As an example,
|
||||
what if we wanted to find all lines have a word that contains `fast` followed
|
||||
by some number of other letters?
|
||||
|
||||
```
|
||||
$ rg 'fast\w+' README.md
|
||||
75: faster than both. (N.B. It is not, strictly speaking, a "drop-in" replacement
|
||||
119:### Is it really faster than everything else?
|
||||
```
|
||||
|
||||
In this example, we used the pattern `fast\w+`. This pattern tells ripgrep to
|
||||
look for any lines containing the letters `fast` followed by *one or more*
|
||||
word-like characters. Namely, `\w` matches characters that compose words (like
|
||||
`a` and `L` but unlike `.` and ` `). The `+` after the `\w` means, "match the
|
||||
previous pattern one or more times." This means that the word `fast` won't
|
||||
match because there are no word characters following the final `t`. But a word
|
||||
like `faster` will. `faste` would also match!
|
||||
|
||||
Here's a different variation on this same theme:
|
||||
|
||||
```
|
||||
$ rg 'fast\w*' README.md
|
||||
75: faster than both. (N.B. It is not, strictly speaking, a "drop-in" replacement
|
||||
88: color and full Unicode support. Unlike GNU grep, `ripgrep` stays fast while
|
||||
119:### Is it really faster than everything else?
|
||||
124:Summarizing, `ripgrep` is fast because:
|
||||
129: optimizations to make searching very fast.
|
||||
```
|
||||
|
||||
In this case, we used `fast\w*` for our pattern instead of `fast\w+`. The `*`
|
||||
means that it should match *zero* or more times. In this case, ripgrep will
|
||||
print the same lines as the pattern `fast`, but if your terminal supports
|
||||
colors, you'll notice that `faster` will be highlighted instead of just the
|
||||
`fast` prefix.
|
||||
|
||||
It is beyond the scope of this guide to provide a full tutorial on regular
|
||||
expressions, but ripgrep's specific syntax is documented here:
|
||||
https://docs.rs/regex/0.2.5/regex/#syntax
|
||||
|
||||
|
||||
### Recursive search
|
||||
|
||||
In the previous section, we showed how to use ripgrep to search a single file.
|
||||
In this section, we'll show how to use ripgrep to search an entire directory
|
||||
of files. In fact, *recursively* searching your current working directory is
|
||||
the default mode of operation for ripgrep, which means doing this is very
|
||||
simple.
|
||||
|
||||
Using our unzipped archive of ripgrep source code, here's how to find all
|
||||
function definitions whose name is `write`:
|
||||
|
||||
```
|
||||
$ rg 'fn write\('
|
||||
src/printer.rs
|
||||
469: fn write(&mut self, buf: &[u8]) {
|
||||
|
||||
termcolor/src/lib.rs
|
||||
227: fn write(&mut self, b: &[u8]) -> io::Result<usize> {
|
||||
250: fn write(&mut self, b: &[u8]) -> io::Result<usize> {
|
||||
428: fn write(&mut self, b: &[u8]) -> io::Result<usize> { self.wtr.write(b) }
|
||||
441: fn write(&mut self, b: &[u8]) -> io::Result<usize> { self.wtr.write(b) }
|
||||
454: fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
|
||||
511: fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
|
||||
848: fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
|
||||
915: fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
|
||||
949: fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
|
||||
1114: fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
|
||||
1348: fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
|
||||
1353: fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
|
||||
```
|
||||
|
||||
(**Note:** We escape the `(` here because `(` has special significance inside
|
||||
regular expressions. You could also use `rg -F 'fn write('` to achieve the
|
||||
same thing, where `-F` interprets your pattern as a literal string instead of
|
||||
a regular expression.)
|
||||
|
||||
In this example, we didn't specify a file at all. Instead, ripgrep defaulted
|
||||
to searching your current directory in the absence of a path. In general,
|
||||
`rg foo` is equivalent to `rg foo ./`.
|
||||
|
||||
This particular search showed us results in both the `src` and `termcolor`
|
||||
directories. The `src` directory is the core ripgrep code where as `termcolor`
|
||||
is a dependency of ripgrep (and is used by other tools). What if we only wanted
|
||||
to search core ripgrep code? Well, that's easy, just specify the directory you
|
||||
want:
|
||||
|
||||
```
|
||||
$ rg 'fn write\(' src
|
||||
src/printer.rs
|
||||
469: fn write(&mut self, buf: &[u8]) {
|
||||
```
|
||||
|
||||
Here, ripgrep limited its search to the `src` directory. Another way of doing
|
||||
this search would be to `cd` into the `src` directory and simply use `rg 'fn
|
||||
write\('` again.
|
||||
|
||||
|
||||
### Automatic filtering
|
||||
|
||||
After recursive search, ripgrep's most important feature is what it *doesn't*
|
||||
search. By default, when you search a directory, ripgrep will ignore all of
|
||||
the following:
|
||||
|
||||
1. Files and directories that match the rules in your `.gitignore` glob
|
||||
pattern.
|
||||
2. Hidden files and directories.
|
||||
3. Binary files. (ripgrep considers any file with a `NUL` byte to be binary.)
|
||||
4. Symbolic links aren't followed.
|
||||
|
||||
All of these things can be toggled using various flags provided by ripgrep:
|
||||
|
||||
1. You can disable `.gitignore` handling with the `--no-ignore` flag.
|
||||
2. Hidden files and directories can be searched with the `--hidden` flag.
|
||||
3. Binary files can be searched via the `--text` (`-a` for short) flag.
|
||||
Be careful with this flag! Binary files may emit control characters to your
|
||||
terminal, which might cause strange behavior.
|
||||
4. ripgrep can follow symlinks with the `--follow` (`-L` for short) flag.
|
||||
|
||||
As a special convenience, ripgrep also provides a flag called `--unrestricted`
|
||||
(`-u` for short). Repeated uses of this flag will cause ripgrep to disable
|
||||
more and more of its filtering. That is, `-u` will disable `.gitignore`
|
||||
handling, `-uu` will search hidden files and directories and `-uuu` will search
|
||||
binary files. This is useful when you're using ripgrep and you aren't sure
|
||||
whether its filtering is hiding results from you. Tacking on a couple `-u`
|
||||
flags is a quick way to find out. (Use the `--debug` flag if you're still
|
||||
perplexed, and if that doesn't help,
|
||||
[file an issue](https://github.com/BurntSushi/ripgrep/issues/new).)
|
||||
|
||||
ripgrep's `.gitignore` handling actually goes a bit beyond just `.gitignore`
|
||||
files. ripgrep will also respect repository specific rules found in
|
||||
`$GIT_DIR/info/exclude`, as well as any global ignore rules in your
|
||||
`core.excludesFile` (which is usually `$XDG_CONFIG_HOME/git/ignore` on
|
||||
Unix-like systems).
|
||||
|
||||
Sometimes you want to search files that are in your `.gitignore`, so it is
|
||||
possible to specify additional ignore rules or overrides in a `.ignore`
|
||||
(application agnostic) or `.rgignore` (ripgrep specific) file.
|
||||
|
||||
For example, let's say you have a `.gitignore` file that looks like this:
|
||||
|
||||
```
|
||||
log/
|
||||
```
|
||||
|
||||
This generally means that any `log` directory won't be tracked by `git`.
|
||||
However, perhaps it contains useful output that you'd like to include in your
|
||||
searches, but you still don't want to track it in `git`. You can achieve this
|
||||
by creating a `.ignore` file in the same directory as the `.gitignore` file
|
||||
with the following contents:
|
||||
|
||||
```
|
||||
!log/
|
||||
```
|
||||
|
||||
ripgrep treats `.ignore` files with higher precedence than `.gitignore` files
|
||||
(and treats `.rgignore` files with higher precedence than `.ignore` files).
|
||||
This means ripgrep will see the `!log/` whitelist rule first and search that
|
||||
directory.
|
||||
|
||||
Like `.gitignore`, a `.ignore` file can be placed in any directory. Its rules
|
||||
will be processed with respect to the directory it resides in, just like
|
||||
`.gitignore`.
|
||||
|
||||
To process `.gitignore` and `.ignore` files case insensitively, use the flag
|
||||
`--ignore-file-case-insensitive`. This is especially useful on case insensitive
|
||||
file systems like those on Windows and macOS. Note though that this can come
|
||||
with a significant performance penalty, and is therefore disabled by default.
|
||||
|
||||
For a more in depth description of how glob patterns in a `.gitignore` file
|
||||
are interpreted, please see `man gitignore`.
|
||||
|
||||
|
||||
### Manual filtering: globs
|
||||
|
||||
In the previous section, we talked about ripgrep's filtering that it does by
|
||||
default. It is "automatic" because it reacts to your environment. That is, it
|
||||
uses already existing `.gitignore` files to produce more relevant search
|
||||
results.
|
||||
|
||||
In addition to automatic filtering, ripgrep also provides more manual or ad hoc
|
||||
filtering. This comes in two varieties: additional glob patterns specified in
|
||||
your ripgrep commands and file type filtering. This section covers glob
|
||||
patterns while the next section covers file type filtering.
|
||||
|
||||
In our ripgrep source code (see [Basics](#basics) for instructions on how to
|
||||
get a source archive to search), let's say we wanted to see which things depend
|
||||
on `clap`, our argument parser.
|
||||
|
||||
We could do this:
|
||||
|
||||
```
|
||||
$ rg clap
|
||||
[lots of results]
|
||||
```
|
||||
|
||||
But this shows us many things, and we're only interested in where we wrote
|
||||
`clap` as a dependency. Instead, we could limit ourselves to TOML files, which
|
||||
is how dependencies are communicated to Rust's build tool, Cargo:
|
||||
|
||||
```
|
||||
$ rg clap -g '*.toml'
|
||||
Cargo.toml
|
||||
35:clap = "2.26"
|
||||
51:clap = "2.26"
|
||||
```
|
||||
|
||||
The `-g '*.toml'` syntax says, "make sure every file searched matches this
|
||||
glob pattern." Note that we put `'*.toml'` in single quotes to prevent our
|
||||
shell from expanding the `*`.
|
||||
|
||||
If we wanted, we could tell ripgrep to search anything *but* `*.toml` files:
|
||||
|
||||
```
|
||||
$ rg clap -g '!*.toml'
|
||||
[lots of results]
|
||||
```
|
||||
|
||||
This will give you a lot of results again as above, but they won't include
|
||||
files ending with `.toml`. Note that the use of a `!` here to mean "negation"
|
||||
is a bit non-standard, but it was chosen to be consistent with how globs in
|
||||
`.gitignore` files are written. (Although, the meaning is reversed. In
|
||||
`.gitignore` files, a `!` prefix means whitelist, and on the command line, a
|
||||
`!` means blacklist.)
|
||||
|
||||
Globs are interpreted in exactly the same way as `.gitignore` patterns. That
|
||||
is, later globs will override earlier globs. For example, the following command
|
||||
will search only `*.toml` files:
|
||||
|
||||
```
|
||||
$ rg clap -g '!*.toml' -g '*.toml'
|
||||
```
|
||||
|
||||
Interestingly, reversing the order of the globs in this case will match
|
||||
nothing, since the presence of at least one non-blacklist glob will institute a
|
||||
requirement that every file searched must match at least one glob. In this
|
||||
case, the blacklist glob takes precedence over the previous glob and prevents
|
||||
any file from being searched at all!
|
||||
|
||||
|
||||
### Manual filtering: file types
|
||||
|
||||
Over time, you might notice that you use the same glob patterns over and over.
|
||||
For example, you might find yourself doing a lot of searches where you only
|
||||
want to see results for Rust files:
|
||||
|
||||
```
|
||||
$ rg 'fn run' -g '*.rs'
|
||||
```
|
||||
|
||||
Instead of writing out the glob every time, you can use ripgrep's support for
|
||||
file types:
|
||||
|
||||
```
|
||||
$ rg 'fn run' --type rust
|
||||
```
|
||||
|
||||
or, more succinctly,
|
||||
|
||||
```
|
||||
$ rg 'fn run' -trust
|
||||
```
|
||||
|
||||
The way the `--type` flag functions is simple. It acts as a name that is
|
||||
assigned to one or more globs that match the relevant files. This lets you
|
||||
write a single type that might encompass a broad range of file extensions. For
|
||||
example, if you wanted to search C files, you'd have to check both C source
|
||||
files and C header files:
|
||||
|
||||
```
|
||||
$ rg 'int main' -g '*.{c,h}'
|
||||
```
|
||||
|
||||
or you could just use the C file type:
|
||||
|
||||
```
|
||||
$ rg 'int main' -tc
|
||||
```
|
||||
|
||||
Just as you can write blacklist globs, you can blacklist file types too:
|
||||
|
||||
```
|
||||
$ rg clap --type-not rust
|
||||
```
|
||||
|
||||
or, more succinctly,
|
||||
|
||||
```
|
||||
$ rg clap -Trust
|
||||
```
|
||||
|
||||
That is, `-t` means "include files of this type" where as `-T` means "exclude
|
||||
files of this type."
|
||||
|
||||
To see the globs that make up a type, run `rg --type-list`:
|
||||
|
||||
```
|
||||
$ rg --type-list | rg '^make:'
|
||||
make: *.mak, *.mk, GNUmakefile, Gnumakefile, Makefile, gnumakefile, makefile
|
||||
```
|
||||
|
||||
By default, ripgrep comes with a bunch of pre-defined types. Generally, these
|
||||
types correspond to well known public formats. But you can define your own
|
||||
types as well. For example, perhaps you frequently search "web" files, which
|
||||
consist of Javascript, HTML and CSS:
|
||||
|
||||
```
|
||||
$ rg --type-add 'web:*.html' --type-add 'web:*.css' --type-add 'web:*.js' -tweb title
|
||||
```
|
||||
|
||||
or, more succinctly,
|
||||
|
||||
```
|
||||
$ rg --type-add 'web:*.{html,css,js}' -tweb title
|
||||
```
|
||||
|
||||
The above command defines a new type, `web`, corresponding to the glob
|
||||
`*.{html,css,js}`. It then applies the new filter with `-tweb` and searches for
|
||||
the pattern `title`. If you ran
|
||||
|
||||
```
|
||||
$ rg --type-add 'web:*.{html,css,js}' --type-list
|
||||
```
|
||||
|
||||
Then you would see your `web` type show up in the list, even though it is not
|
||||
part of ripgrep's built-in types.
|
||||
|
||||
It is important to stress here that the `--type-add` flag only applies to the
|
||||
current command. It does not add a new file type and save it somewhere in a
|
||||
persistent form. If you want a type to be available in every ripgrep command,
|
||||
then you should either create a shell alias:
|
||||
|
||||
```
|
||||
alias rg="rg --type-add 'web:*.{html,css,js}'"
|
||||
```
|
||||
|
||||
or add `--type-add=web:*.{html,css,js}` to your ripgrep configuration file.
|
||||
([Configuration files](#configuration-file) are covered in more detail later.)
|
||||
|
||||
|
||||
### Replacements
|
||||
|
||||
ripgrep provides a limited ability to modify its output by replacing matched
|
||||
text with some other text. This is easiest to explain with an example. Remember
|
||||
when we searched for the word `fast` in ripgrep's README?
|
||||
|
||||
```
|
||||
$ rg fast README.md
|
||||
75: faster than both. (N.B. It is not, strictly speaking, a "drop-in" replacement
|
||||
88: color and full Unicode support. Unlike GNU grep, `ripgrep` stays fast while
|
||||
119:### Is it really faster than everything else?
|
||||
124:Summarizing, `ripgrep` is fast because:
|
||||
129: optimizations to make searching very fast.
|
||||
```
|
||||
|
||||
What if we wanted to *replace* all occurrences of `fast` with `FAST`? That's
|
||||
easy with ripgrep's `--replace` flag:
|
||||
|
||||
```
|
||||
$ rg fast README.md --replace FAST
|
||||
75: FASTer than both. (N.B. It is not, strictly speaking, a "drop-in" replacement
|
||||
88: color and full Unicode support. Unlike GNU grep, `ripgrep` stays FAST while
|
||||
119:### Is it really FASTer than everything else?
|
||||
124:Summarizing, `ripgrep` is FAST because:
|
||||
129: optimizations to make searching very FAST.
|
||||
```
|
||||
|
||||
or, more succinctly,
|
||||
|
||||
```
|
||||
$ rg fast README.md -r FAST
|
||||
[snip]
|
||||
```
|
||||
|
||||
In essence, the `--replace` flag applies *only* to the matching portion of text
|
||||
in the output. If you instead wanted to replace an entire line of text, then
|
||||
you need to include the entire line in your match. For example:
|
||||
|
||||
```
|
||||
$ rg '^.*fast.*$' README.md -r FAST
|
||||
75:FAST
|
||||
88:FAST
|
||||
119:FAST
|
||||
124:FAST
|
||||
129:FAST
|
||||
```
|
||||
|
||||
Alternatively, you can combine the `--only-matching` (or `-o` for short) with
|
||||
the `--replace` flag to achieve the same result:
|
||||
|
||||
```
|
||||
$ rg fast README.md --only-matching --replace FAST
|
||||
75:FAST
|
||||
88:FAST
|
||||
119:FAST
|
||||
124:FAST
|
||||
129:FAST
|
||||
```
|
||||
|
||||
or, more succinctly,
|
||||
|
||||
```
|
||||
$ rg fast README.md -or FAST
|
||||
[snip]
|
||||
```
|
||||
|
||||
Finally, replacements can include capturing groups. For example, let's say
|
||||
we wanted to find all occurrences of `fast` followed by another word and
|
||||
join them together with a dash. The pattern we might use for that is
|
||||
`fast\s+(\w+)`, which matches `fast`, followed by any amount of whitespace,
|
||||
followed by any number of "word" characters. We put the `\w+` in a "capturing
|
||||
group" (indicated by parentheses) so that we can reference it later in our
|
||||
replacement string. For example:
|
||||
|
||||
```
|
||||
$ rg 'fast\s+(\w+)' README.md -r 'fast-$1'
|
||||
88: color and full Unicode support. Unlike GNU grep, `ripgrep` stays fast-while
|
||||
124:Summarizing, `ripgrep` is fast-because:
|
||||
```
|
||||
|
||||
Our replacement string here, `fast-$1`, consists of `fast-` followed by the
|
||||
contents of the capturing group at index `1`. (Capturing groups actually start
|
||||
at index 0, but the `0`th capturing group always corresponds to the entire
|
||||
match. The capturing group at index `1` always corresponds to the first
|
||||
explicit capturing group found in the regex pattern.)
|
||||
|
||||
Capturing groups can also be named, which is sometimes more convenient than
|
||||
using the indices. For example, the following command is equivalent to the
|
||||
above command:
|
||||
|
||||
```
|
||||
$ rg 'fast\s+(?P<word>\w+)' README.md -r 'fast-$word'
|
||||
88: color and full Unicode support. Unlike GNU grep, `ripgrep` stays fast-while
|
||||
124:Summarizing, `ripgrep` is fast-because:
|
||||
```
|
||||
|
||||
It is important to note that ripgrep **will never modify your files**. The
|
||||
`--replace` flag only controls ripgrep's output. (And there is no flag to let
|
||||
you do a replacement in a file.)
|
||||
|
||||
|
||||
### Configuration file
|
||||
|
||||
It is possible that ripgrep's default options aren't suitable in every case.
|
||||
For that reason, and because shell aliases aren't always convenient, ripgrep
|
||||
supports configuration files.
|
||||
|
||||
Setting up a configuration file is simple. ripgrep will not look in any
|
||||
predetermined directory for a config file automatically. Instead, you need to
|
||||
set the `RIPGREP_CONFIG_PATH` environment variable to the file path of your
|
||||
config file. Once the environment variable is set, open the file and just type
|
||||
in the flags you want set automatically. There are only two rules for
|
||||
describing the format of the config file:
|
||||
|
||||
1. Every line is a shell argument, after trimming whitespace.
|
||||
2. Lines starting with `#` (optionally preceded by any amount of whitespace)
|
||||
are ignored.
|
||||
|
||||
In particular, there is no escaping. Each line is given to ripgrep as a single
|
||||
command line argument verbatim.
|
||||
|
||||
Here's an example of a configuration file, which demonstrates some of the
|
||||
formatting peculiarities:
|
||||
|
||||
```
|
||||
$ cat $HOME/.ripgreprc
|
||||
# Don't let ripgrep vomit really long lines to my terminal, and show a preview.
|
||||
--max-columns=150
|
||||
--max-columns-preview
|
||||
|
||||
# Add my 'web' type.
|
||||
--type-add
|
||||
web:*.{html,css,js}*
|
||||
|
||||
# Using glob patterns to include/exclude files or folders
|
||||
--glob=!git/*
|
||||
|
||||
# or
|
||||
--glob
|
||||
!git/*
|
||||
|
||||
# Set the colors.
|
||||
--colors=line:none
|
||||
--colors=line:style:bold
|
||||
|
||||
# Because who cares about case!?
|
||||
--smart-case
|
||||
```
|
||||
|
||||
When we use a flag that has a value, we either put the flag and the value on
|
||||
the same line but delimited by an `=` sign (e.g., `--max-columns=150`), or we
|
||||
put the flag and the value on two different lines. This is because ripgrep's
|
||||
argument parser knows to treat the single argument `--max-columns=150` as a
|
||||
flag with a value, but if we had written `--max-columns 150` in our
|
||||
configuration file, then ripgrep's argument parser wouldn't know what to do
|
||||
with it.
|
||||
|
||||
Putting the flag and value on different lines is exactly equivalent and is a
|
||||
matter of style.
|
||||
|
||||
Comments are encouraged so that you remember what the config is doing. Empty
|
||||
lines are OK too.
|
||||
|
||||
So let's say you're using the above configuration file, but while you're at a
|
||||
terminal, you really want to be able to see lines longer than 150 columns. What
|
||||
do you do? Thankfully, all you need to do is pass `--max-columns 0` (or `-M0`
|
||||
for short) on the command line, which will override your configuration file's
|
||||
setting. This works because ripgrep's configuration file is *prepended* to the
|
||||
explicit arguments you give it on the command line. Since flags given later
|
||||
override flags given earlier, everything works as expected. This works for most
|
||||
other flags as well, and each flag's documentation states which other flags
|
||||
override it.
|
||||
|
||||
If you're confused about what configuration file ripgrep is reading arguments
|
||||
from, then running ripgrep with the `--debug` flag should help clarify things.
|
||||
The debug output should note what config file is being loaded and the arguments
|
||||
that have been read from the configuration.
|
||||
|
||||
Finally, if you want to make absolutely sure that ripgrep *isn't* reading a
|
||||
configuration file, then you can pass the `--no-config` flag, which will always
|
||||
prevent ripgrep from reading extraneous configuration from the environment,
|
||||
regardless of what other methods of configuration are added to ripgrep in the
|
||||
future.
|
||||
|
||||
|
||||
### File encoding
|
||||
|
||||
[Text encoding](https://en.wikipedia.org/wiki/Character_encoding) is a complex
|
||||
topic, but we can try to summarize its relevancy to ripgrep:
|
||||
|
||||
* Files are generally just a bundle of bytes. There is no reliable way to know
|
||||
their encoding.
|
||||
* Either the encoding of the pattern must match the encoding of the files being
|
||||
searched, or a form of transcoding must be performed that converts either the
|
||||
pattern or the file to the same encoding as the other.
|
||||
* ripgrep tends to work best on plain text files, and among plain text files,
|
||||
the most popular encodings likely consist of ASCII, latin1 or UTF-8. As
|
||||
a special exception, UTF-16 is prevalent in Windows environments
|
||||
|
||||
In light of the above, here is how ripgrep behaves when `--encoding auto` is
|
||||
given, which is the default:
|
||||
|
||||
* All input is assumed to be ASCII compatible (which means every byte that
|
||||
corresponds to an ASCII codepoint actually is an ASCII codepoint). This
|
||||
includes ASCII itself, latin1 and UTF-8.
|
||||
* ripgrep works best with UTF-8. For example, ripgrep's regular expression
|
||||
engine supports Unicode features. Namely, character classes like `\w` will
|
||||
match all word characters by Unicode's definition and `.` will match any
|
||||
Unicode codepoint instead of any byte. These constructions assume UTF-8,
|
||||
so they simply won't match when they come across bytes in a file that aren't
|
||||
UTF-8.
|
||||
* To handle the UTF-16 case, ripgrep will do something called "BOM sniffing"
|
||||
by default. That is, the first three bytes of a file will be read, and if
|
||||
they correspond to a UTF-16 BOM, then ripgrep will transcode the contents of
|
||||
the file from UTF-16 to UTF-8, and then execute the search on the transcoded
|
||||
version of the file. (This incurs a performance penalty since transcoding
|
||||
is slower than regex searching.) If the file contains invalid UTF-16, then
|
||||
the Unicode replacement codepoint is substituted in place of invalid code
|
||||
units.
|
||||
* To handle other cases, ripgrep provides a `-E/--encoding` flag, which permits
|
||||
you to specify an encoding from the
|
||||
[Encoding Standard](https://encoding.spec.whatwg.org/#concept-encoding-get).
|
||||
ripgrep will assume *all* files searched are the encoding specified (unless
|
||||
the file has a BOM) and will perform a transcoding step just like in the
|
||||
UTF-16 case described above.
|
||||
|
||||
By default, ripgrep will not require its input be valid UTF-8. That is, ripgrep
|
||||
can and will search arbitrary bytes. The key here is that if you're searching
|
||||
content that isn't UTF-8, then the usefulness of your pattern will degrade. If
|
||||
you're searching bytes that aren't ASCII compatible, then it's likely the
|
||||
pattern won't find anything. With all that said, this mode of operation is
|
||||
important, because it lets you find ASCII or UTF-8 *within* files that are
|
||||
otherwise arbitrary bytes.
|
||||
|
||||
As a special case, the `-E/--encoding` flag supports the value `none`, which
|
||||
will completely disable all encoding related logic, including BOM sniffing.
|
||||
When `-E/--encoding` is set to `none`, ripgrep will search the raw bytes of
|
||||
the underlying file with no transcoding step. For example, here's how you might
|
||||
search the raw UTF-16 encoding of the string `Шерлок`:
|
||||
|
||||
```
|
||||
$ rg '(?-u)\(\x045\x04@\x04;\x04>\x04:\x04' -E none -a some-utf16-file
|
||||
```
|
||||
|
||||
Of course, that's just an example meant to show how one can drop down into
|
||||
raw bytes. Namely, the simpler command works as you might expect automatically:
|
||||
|
||||
```
|
||||
$ rg 'Шерлок' some-utf16-file
|
||||
```
|
||||
|
||||
Finally, it is possible to disable ripgrep's Unicode support from within the
|
||||
regular expression. For example, let's say you wanted `.` to match any byte
|
||||
rather than any Unicode codepoint. (You might want this while searching a
|
||||
binary file, since `.` by default will not match invalid UTF-8.) You could do
|
||||
this by disabling Unicode via a regular expression flag:
|
||||
|
||||
```
|
||||
$ rg '(?-u:.)'
|
||||
```
|
||||
|
||||
This works for any part of the pattern. For example, the following will find
|
||||
any Unicode word character followed by any ASCII word character followed by
|
||||
another Unicode word character:
|
||||
|
||||
```
|
||||
$ rg '\w(?-u:\w)\w'
|
||||
```
|
||||
|
||||
|
||||
### Binary data
|
||||
|
||||
In addition to skipping hidden files and files in your `.gitignore` by default,
|
||||
ripgrep also attempts to skip binary files. ripgrep does this by default
|
||||
because binary files (like PDFs or images) are typically not things you want to
|
||||
search when searching for regex matches. Moreover, if content in a binary file
|
||||
did match, then it's possible for undesirable binary data to be printed to your
|
||||
terminal and wreak havoc.
|
||||
|
||||
Unfortunately, unlike skipping hidden files and respecting your `.gitignore`
|
||||
rules, a file cannot as easily be classified as binary. In order to figure out
|
||||
whether a file is binary, the most effective heuristic that balances
|
||||
correctness with performance is to simply look for `NUL` bytes. At that point,
|
||||
the determination is simple: a file is considered "binary" if and only if it
|
||||
contains a `NUL` byte somewhere in its contents.
|
||||
|
||||
The issue is that while most binary files will have a `NUL` byte toward the
|
||||
beginning of its contents, this is not necessarily true. The `NUL` byte might
|
||||
be the very last byte in a large file, but that file is still considered
|
||||
binary. While this leads to a fair amount of complexity inside ripgrep's
|
||||
implementation, it also results in some unintuitive user experiences.
|
||||
|
||||
At a high level, ripgrep operates in three different modes with respect to
|
||||
binary files:
|
||||
|
||||
1. The default mode is to attempt to remove binary files from a search
|
||||
completely. This is meant to mirror how ripgrep removes hidden files and
|
||||
files in your `.gitignore` automatically. That is, as soon as a file is
|
||||
detected as binary, searching stops. If a match was already printed (because
|
||||
it was detected long before a `NUL` byte), then ripgrep will print a warning
|
||||
message indicating that the search stopped prematurely. This default mode
|
||||
**only applies to files searched by ripgrep as a result of recursive
|
||||
directory traversal**, which is consistent with ripgrep's other automatic
|
||||
filtering. For example, `rg foo .file` will search `.file` even though it
|
||||
is hidden. Similarly, `rg foo binary-file` search `binary-file` in "binary"
|
||||
mode automatically.
|
||||
2. Binary mode is similar to the default mode, except it will not always
|
||||
stop searching after it sees a `NUL` byte. Namely, in this mode, ripgrep
|
||||
will continue searching a file that is known to be binary until the first
|
||||
of two conditions is met: 1) the end of the file has been reached or 2) a
|
||||
match is or has been seen. This means that in binary mode, if ripgrep
|
||||
reports no matches, then there are no matches in the file. When a match does
|
||||
occur, ripgrep prints a message similar to one it prints when in its default
|
||||
mode indicating that the search has stopped prematurely. This mode can be
|
||||
forcefully enabled for all files with the `--binary` flag. The purpose of
|
||||
binary mode is to provide a way to discover matches in all files, but to
|
||||
avoid having binary data dumped into your terminal.
|
||||
3. Text mode completely disables all binary detection and searches all files
|
||||
as if they were text. This is useful when searching a file that is
|
||||
predominantly text but contains a `NUL` byte, or if you are specifically
|
||||
trying to search binary data. This mode can be enabled with the `-a/--text`
|
||||
flag. Note that when using this mode on very large binary files, it is
|
||||
possible for ripgrep to use a lot of memory.
|
||||
|
||||
Unfortunately, there is one additional complexity in ripgrep that can make it
|
||||
difficult to reason about binary files. That is, the way binary detection works
|
||||
depends on the way that ripgrep searches your files. Specifically:
|
||||
|
||||
* When ripgrep uses memory maps, then binary detection is only performed on the
|
||||
first few kilobytes of the file in addition to every matching line.
|
||||
* When ripgrep doesn't use memory maps, then binary detection is performed on
|
||||
all bytes searched.
|
||||
|
||||
This means that whether a file is detected as binary or not can change based
|
||||
on the internal search strategy used by ripgrep. If you prefer to keep
|
||||
ripgrep's binary file detection consistent, then you can disable memory maps
|
||||
via the `--no-mmap` flag. (The cost will be a small performance regression when
|
||||
searching very large files on some platforms.)
|
||||
|
||||
|
||||
### Common options
|
||||
|
||||
ripgrep has a lot of flags. Too many to keep in your head at once. This section
|
||||
is intended to give you a sampling of some of the most important and frequently
|
||||
used options that will likely impact how you use ripgrep on a regular basis.
|
||||
|
||||
* `-h`: Show ripgrep's condensed help output.
|
||||
* `--help`: Show ripgrep's longer form help output. (Nearly what you'd find in
|
||||
ripgrep's man page, so pipe it into a pager!)
|
||||
* `-i/--ignore-case`: When searching for a pattern, ignore case differences.
|
||||
That is `rg -i fast` matches `fast`, `fASt`, `FAST`, etc.
|
||||
* `-S/--smart-case`: This is similar to `--ignore-case`, but disables itself
|
||||
if the pattern contains any uppercase letters. Usually this flag is put into
|
||||
alias or a config file.
|
||||
* `-w/--word-regexp`: Require that all matches of the pattern be surrounded
|
||||
by word boundaries. That is, given `pattern`, the `--word-regexp` flag will
|
||||
cause ripgrep to behave as if `pattern` were actually `\b(?:pattern)\b`.
|
||||
* `-c/--count`: Report a count of total matched lines.
|
||||
* `--files`: Print the files that ripgrep *would* search, but don't actually
|
||||
search them.
|
||||
* `-a/--text`: Search binary files as if they were plain text.
|
||||
* `-z/--search-zip`: Search compressed files (gzip, bzip2, lzma, xz, lz4,
|
||||
brotli, zstd). This is disabled by default.
|
||||
* `-C/--context`: Show the lines surrounding a match.
|
||||
* `--sort path`: Force ripgrep to sort its output by file name. (This disables
|
||||
parallelism, so it might be slower.)
|
||||
* `-L/--follow`: Follow symbolic links while recursively searching.
|
||||
* `-M/--max-columns`: Limit the length of lines printed by ripgrep.
|
||||
* `--debug`: Shows ripgrep's debug output. This is useful for understanding
|
||||
why a particular file might be ignored from search, or what kinds of
|
||||
configuration ripgrep is loading from the environment.
|
||||
@@ -1,53 +0,0 @@
|
||||
#### What version of ripgrep are you using?
|
||||
|
||||
Replace this text with the output of `rg --version`.
|
||||
|
||||
#### How did you install ripgrep?
|
||||
|
||||
If you installed ripgrep with snap and are getting strange file permission or
|
||||
file not found errors, then please do not file a bug. Instead, use one of the
|
||||
Github binary releases.
|
||||
|
||||
#### What operating system are you using ripgrep on?
|
||||
|
||||
Replace this text with your operating system and version.
|
||||
|
||||
#### Describe your question, feature request, or bug.
|
||||
|
||||
If a question, please describe the problem you're trying to solve and give
|
||||
as much context as possible.
|
||||
|
||||
If a feature request, please describe the behavior you want and the motivation.
|
||||
Please also provide an example of how ripgrep would be used if your feature
|
||||
request were added.
|
||||
|
||||
If a bug, please see below.
|
||||
|
||||
#### If this is a bug, what are the steps to reproduce the behavior?
|
||||
|
||||
If possible, please include both your search patterns and the corpus on which
|
||||
you are searching. Unless the bug is very obvious, then it is unlikely that it
|
||||
will be fixed if the ripgrep maintainers cannot reproduce it.
|
||||
|
||||
If the corpus is too big and you cannot decrease its size, file the bug anyway
|
||||
and the ripgrep maintainers will help figure out next steps.
|
||||
|
||||
#### If this is a bug, what is the actual behavior?
|
||||
|
||||
Show the command you ran and the actual output. Include the `--debug` flag in
|
||||
your invocation of ripgrep.
|
||||
|
||||
If the output is large, put it in a gist: https://gist.github.com/
|
||||
|
||||
If the output is small, put it in code fences:
|
||||
|
||||
```
|
||||
your
|
||||
output
|
||||
goes
|
||||
here
|
||||
```
|
||||
|
||||
#### If this is a bug, what is the expected behavior?
|
||||
|
||||
What do you think ripgrep should have done?
|
||||
594
README.md
594
README.md
@@ -1,198 +1,155 @@
|
||||
ripgrep (rg)
|
||||
------------
|
||||
ripgrep is a line-oriented search tool that recursively searches your current
|
||||
directory for a regex pattern. By default, ripgrep will respect your .gitignore
|
||||
and automatically skip hidden files/directories and binary files. ripgrep
|
||||
has first class support on Windows, macOS and Linux, with binary downloads
|
||||
available for [every release](https://github.com/BurntSushi/ripgrep/releases).
|
||||
ripgrep is similar to other popular search tools like The Silver Searcher, ack
|
||||
and grep.
|
||||
`ripgrep` is a line oriented search tool that recursively searches your current
|
||||
directory for a regex pattern while respecting your gitignore rules. To a first
|
||||
approximation, ripgrep combines the usability of The Silver Searcher (similar
|
||||
to `ack`) with the raw speed of GNU grep. `ripgrep` has first class support on
|
||||
Windows, macOS and Linux, with binary downloads available for
|
||||
[every release](https://github.com/BurntSushi/ripgrep/releases).
|
||||
|
||||
[](https://travis-ci.org/BurntSushi/ripgrep)
|
||||
[](https://travis-ci.org/BurntSushi/ripgrep)
|
||||
[](https://ci.appveyor.com/project/BurntSushi/ripgrep)
|
||||
[](https://crates.io/crates/ripgrep)
|
||||
[](https://repology.org/project/ripgrep/badges)
|
||||
[](https://crates.io/crates/ripgrep)
|
||||
|
||||
Dual-licensed under MIT or the [UNLICENSE](http://unlicense.org).
|
||||
|
||||
|
||||
### CHANGELOG
|
||||
|
||||
Please see the [CHANGELOG](CHANGELOG.md) for a release history.
|
||||
|
||||
### Documentation quick links
|
||||
|
||||
* [Installation](#installation)
|
||||
* [User Guide](GUIDE.md)
|
||||
* [Frequently Asked Questions](FAQ.md)
|
||||
* [Regex syntax](https://docs.rs/regex/1/regex/#syntax)
|
||||
* [Configuration files](GUIDE.md#configuration-file)
|
||||
* [Shell completions](FAQ.md#complete)
|
||||
* [Building](#building)
|
||||
|
||||
|
||||
### Screenshot of search results
|
||||
|
||||
[](http://burntsushi.net/stuff/ripgrep1.png)
|
||||
|
||||
|
||||
### Quick examples comparing tools
|
||||
|
||||
This example searches the entire Linux kernel source tree (after running
|
||||
`make defconfig && make -j8`) for `[A-Z]+_SUSPEND`, where all matches must be
|
||||
words. Timings were collected on a system with an Intel i7-6900K 3.2 GHz, and
|
||||
ripgrep was compiled with SIMD enabled.
|
||||
ripgrep was compiled using the `compile` script in this repo.
|
||||
|
||||
Please remember that a single benchmark is never enough! See my
|
||||
[blog post on ripgrep](http://blog.burntsushi.net/ripgrep/)
|
||||
[blog post on `ripgrep`](http://blog.burntsushi.net/ripgrep/)
|
||||
for a very detailed comparison with more benchmarks and analysis.
|
||||
|
||||
| Tool | Command | Line count | Time |
|
||||
| ---- | ------- | ---------- | ---- |
|
||||
| ripgrep (Unicode) | `rg -n -w '[A-Z]+_SUSPEND'` | 450 | **0.106s** |
|
||||
| [git grep](https://www.kernel.org/pub/software/scm/git/docs/git-grep.html) | `LC_ALL=C git grep -E -n -w '[A-Z]+_SUSPEND'` | 450 | 0.553s |
|
||||
| [The Silver Searcher](https://github.com/ggreer/the_silver_searcher) | `ag -w '[A-Z]+_SUSPEND'` | 450 | 0.589s |
|
||||
| [git grep (Unicode)](https://www.kernel.org/pub/software/scm/git/docs/git-grep.html) | `LC_ALL=en_US.UTF-8 git grep -E -n -w '[A-Z]+_SUSPEND'` | 450 | 2.266s |
|
||||
| [sift](https://github.com/svent/sift) | `sift --git -n -w '[A-Z]+_SUSPEND'` | 450 | 3.505s |
|
||||
| [ack](https://github.com/petdance/ack2) | `ack -w '[A-Z]+_SUSPEND'` | 1878 | 6.823s |
|
||||
| [The Platinum Searcher](https://github.com/monochromegane/the_platinum_searcher) | `pt -w -e '[A-Z]+_SUSPEND'` | 450 | 14.208s |
|
||||
| ripgrep (Unicode) | `rg -n -w '[A-Z]+_SUSPEND'` | 450 | **0.134s** |
|
||||
| [The Silver Searcher](https://github.com/ggreer/the_silver_searcher) | `ag -w '[A-Z]+_SUSPEND'` | 450 | 0.753s |
|
||||
| [git grep](https://www.kernel.org/pub/software/scm/git/docs/git-grep.html) | `LC_ALL=C git grep -E -n -w '[A-Z]+_SUSPEND'` | 450 | 0.823s |
|
||||
| [git grep (Unicode)](https://www.kernel.org/pub/software/scm/git/docs/git-grep.html) | `LC_ALL=en_US.UTF-8 git grep -E -n -w '[A-Z]+_SUSPEND'` | 450 | 2.880s |
|
||||
| [sift](https://github.com/svent/sift) | `sift --git -n -w '[A-Z]+_SUSPEND'` | 450 | 3.656s |
|
||||
| [The Platinum Searcher](https://github.com/monochromegane/the_platinum_searcher) | `pt -w -e '[A-Z]+_SUSPEND'` | 450 | 12.369s |
|
||||
| [ack](https://github.com/petdance/ack2) | `ack -w '[A-Z]+_SUSPEND'` | 1878 | 16.952s |
|
||||
|
||||
(Yes, `ack` [has](https://github.com/petdance/ack2/issues/445) a
|
||||
[bug](https://github.com/petdance/ack2/issues/14).)
|
||||
|
||||
Here's another benchmark that disregards gitignore files and searches with a
|
||||
whitelist instead. The corpus is the same as in the previous benchmark, and the
|
||||
flags passed to each command ensure that they are doing equivalent work:
|
||||
flags passed to each command ensures that they are doing equivalent work:
|
||||
|
||||
| Tool | Command | Line count | Time |
|
||||
| ---- | ------- | ---------- | ---- |
|
||||
| ripgrep | `rg -L -u -tc -n -w '[A-Z]+_SUSPEND'` | 404 | **0.079s** |
|
||||
| [ucg](https://github.com/gvansickle/ucg) | `ucg --type=cc -w '[A-Z]+_SUSPEND'` | 390 | 0.163s |
|
||||
| [GNU grep](https://www.gnu.org/software/grep/) | `egrep -R -n --include='*.c' --include='*.h' -w '[A-Z]+_SUSPEND'` | 404 | 0.611s |
|
||||
| ripgrep | `rg -L -u -tc -n -w '[A-Z]+_SUSPEND'` | 404 | **0.108s** |
|
||||
| [ucg](https://github.com/gvansickle/ucg) | `ucg --type=cc -w '[A-Z]+_SUSPEND'` | 392 | 0.219s |
|
||||
| [GNU grep](https://www.gnu.org/software/grep/) | `egrep -R -n --include='*.c' --include='*.h' -w '[A-Z]+_SUSPEND'` | 404 | 0.733s |
|
||||
|
||||
(`ucg` [has slightly different behavior in the presence of symbolic links](https://github.com/gvansickle/ucg/issues/106).)
|
||||
|
||||
And finally, a straight-up comparison between ripgrep and GNU grep on a single
|
||||
And finally, a straight up comparison between ripgrep and GNU grep on a single
|
||||
large file (~9.3GB,
|
||||
[`OpenSubtitles2016.raw.en.gz`](http://opus.lingfil.uu.se/OpenSubtitles2016/mono/OpenSubtitles2016.raw.en.gz)):
|
||||
|
||||
| Tool | Command | Line count | Time |
|
||||
| ---- | ------- | ---------- | ---- |
|
||||
| ripgrep | `rg -w 'Sherlock [A-Z]\w+'` | 5268 | **2.108s** |
|
||||
| [GNU grep](https://www.gnu.org/software/grep/) | `LC_ALL=C egrep -w 'Sherlock [A-Z]\w+'` | 5268 | 7.014s |
|
||||
| ripgrep | `rg -w 'Sherlock [A-Z]\w+'` | 5268 | **2.520s** |
|
||||
| [GNU grep](https://www.gnu.org/software/grep/) | `LC_ALL=C egrep -w 'Sherlock [A-Z]\w+'` | 5268 | 7.143s |
|
||||
|
||||
In the above benchmark, passing the `-n` flag (for showing line numbers)
|
||||
increases the times to `2.640s` for ripgrep and `10.277s` for GNU grep.
|
||||
increases the times to `3.081s` for ripgrep and `11.403s` for GNU grep.
|
||||
|
||||
### Why should I use `ripgrep`?
|
||||
|
||||
### Why should I use ripgrep?
|
||||
|
||||
* It can replace many use cases served by other search tools
|
||||
because it contains most of their features and is generally faster. (See
|
||||
[the FAQ](FAQ.md#posix4ever) for more details on whether ripgrep can truly
|
||||
replace grep.)
|
||||
* Like other tools specialized to code search, ripgrep defaults to recursive
|
||||
directory search and won't search files ignored by your `.gitignore` files.
|
||||
It also ignores hidden and binary files by default. ripgrep also implements
|
||||
full support for `.gitignore`, whereas there are many bugs related to that
|
||||
functionality in other code search tools claiming to provide the same
|
||||
functionality.
|
||||
* ripgrep can search specific types of files. For example, `rg -tpy foo`
|
||||
* It can replace both The Silver Searcher and GNU grep because it is generally
|
||||
faster than both. (N.B. It is not, strictly speaking, a "drop-in" replacement
|
||||
for both, but the feature sets are far more similar than different.)
|
||||
* Like The Silver Searcher, `ripgrep` defaults to recursive directory search
|
||||
and won't search files ignored by your `.gitignore` files. It also ignores
|
||||
hidden and binary files by default. `ripgrep` also implements full support
|
||||
for `.gitignore`, where as there are many bugs related to that functionality
|
||||
in The Silver Searcher.
|
||||
* `ripgrep` can search specific types of files. For example, `rg -tpy foo`
|
||||
limits your search to Python files and `rg -Tjs foo` excludes Javascript
|
||||
files from your search. ripgrep can be taught about new file types with
|
||||
files from your search. `ripgrep` can be taught about new file types with
|
||||
custom matching rules.
|
||||
* ripgrep supports many features found in `grep`, such as showing the context
|
||||
* `ripgrep` supports many features found in `grep`, such as showing the context
|
||||
of search results, searching multiple patterns, highlighting matches with
|
||||
color and full Unicode support. Unlike GNU grep, ripgrep stays fast while
|
||||
color and full Unicode support. Unlike GNU grep, `ripgrep` stays fast while
|
||||
supporting Unicode (which is always on).
|
||||
* ripgrep has optional support for switching its regex engine to use PCRE2.
|
||||
Among other things, this makes it possible to use look-around and
|
||||
backreferences in your patterns, which are not supported in ripgrep's default
|
||||
regex engine. PCRE2 support is enabled with `-P`.
|
||||
* ripgrep supports searching files in text encodings other than UTF-8, such
|
||||
* `ripgrep` supports searching files in text encodings other than UTF-8, such
|
||||
as UTF-16, latin-1, GBK, EUC-JP, Shift_JIS and more. (Some support for
|
||||
automatically detecting UTF-16 is provided. Other text encodings must be
|
||||
specifically specified with the `-E/--encoding` flag.)
|
||||
* ripgrep supports searching files compressed in a common format (gzip, xz,
|
||||
lzma, bzip2 or lz4) with the `-z/--search-zip` flag.
|
||||
* ripgrep supports arbitrary input preprocessing filters which could be PDF
|
||||
text extraction, less supported decompression, decrypting, automatic encoding
|
||||
detection and so on.
|
||||
|
||||
In other words, use ripgrep if you like speed, filtering by default, fewer
|
||||
In other words, use `ripgrep` if you like speed, filtering by default, fewer
|
||||
bugs and Unicode support.
|
||||
|
||||
### Why shouldn't I use `ripgrep`?
|
||||
|
||||
### Why shouldn't I use ripgrep?
|
||||
I'd like to try to convince you why you *shouldn't* use `ripgrep`. This should
|
||||
give you a glimpse at some important downsides or missing features of
|
||||
`ripgrep`.
|
||||
|
||||
Despite initially not wanting to add every feature under the sun to ripgrep,
|
||||
over time, ripgrep has grown support for most features found in other file
|
||||
searching tools. This includes searching for results spanning across multiple
|
||||
lines, and opt-in support for PCRE2, which provides look-around and
|
||||
backreference support.
|
||||
|
||||
At this point, the primary reasons not to use ripgrep probably consist of one
|
||||
or more of the following:
|
||||
|
||||
* You need a portable and ubiquitous tool. While ripgrep works on Windows,
|
||||
macOS and Linux, it is not ubiquitous and it does not conform to any
|
||||
standard such as POSIX. The best tool for this job is good old grep.
|
||||
* There still exists some other feature (or bug) not listed in this README that
|
||||
you rely on that's in another tool that isn't in ripgrep.
|
||||
* There is a performance edge case where ripgrep doesn't do well where another
|
||||
tool does do well. (Please file a bug report!)
|
||||
* ripgrep isn't possible to install on your machine or isn't available for your
|
||||
platform. (Please file a bug report!)
|
||||
* `ripgrep` uses a regex engine based on finite automata, so if you want fancy
|
||||
regex features such as backreferences or look around, `ripgrep` won't give
|
||||
them to you. `ripgrep` does support lots of things though, including, but not
|
||||
limited to: lazy quantification (e.g., `a+?`), repetitions (e.g., `a{2,5}`),
|
||||
begin/end assertions (e.g., `^\w+$`), word boundaries (e.g., `\bfoo\b`), and
|
||||
support for Unicode categories (e.g., `\p{Sc}` to match currency symbols or
|
||||
`\p{Lu}` to match any uppercase letter). (Fancier regexes will never be
|
||||
supported.)
|
||||
* `ripgrep` doesn't yet support searching compressed files. (Likely to be
|
||||
supported in the future.)
|
||||
* `ripgrep` doesn't have multiline search. (Unlikely to ever be supported.)
|
||||
|
||||
In other words, if you like fancy regexes, searching compressed files or
|
||||
multiline search, then `ripgrep` may not quite meet your needs (yet).
|
||||
|
||||
### Is it really faster than everything else?
|
||||
|
||||
Generally, yes. A large number of benchmarks with detailed analysis for each is
|
||||
[available on my blog](http://blog.burntsushi.net/ripgrep/).
|
||||
|
||||
Summarizing, ripgrep is fast because:
|
||||
Summarizing, `ripgrep` is fast because:
|
||||
|
||||
* It is built on top of
|
||||
[Rust's regex engine](https://github.com/rust-lang-nursery/regex).
|
||||
Rust's regex engine uses finite automata, SIMD and aggressive literal
|
||||
optimizations to make searching very fast. (PCRE2 support can be opted into
|
||||
with the `-P/--pcre2` flag.)
|
||||
optimizations to make searching very fast.
|
||||
* Rust's regex library maintains performance with full Unicode support by
|
||||
building UTF-8 decoding directly into its deterministic finite automaton
|
||||
engine.
|
||||
* It supports searching with either memory maps or by searching incrementally
|
||||
with an intermediate buffer. The former is better for single files and the
|
||||
latter is better for large directories. ripgrep chooses the best searching
|
||||
latter is better for large directories. `ripgrep` chooses the best searching
|
||||
strategy for you automatically.
|
||||
* Applies your ignore patterns in `.gitignore` files using a
|
||||
[`RegexSet`](https://docs.rs/regex/1/regex/struct.RegexSet.html).
|
||||
[`RegexSet`](https://doc.rust-lang.org/regex/regex/struct.RegexSet.html).
|
||||
That means a single file path can be matched against multiple glob patterns
|
||||
simultaneously.
|
||||
* It uses a lock-free parallel recursive directory iterator, courtesy of
|
||||
[`crossbeam`](https://docs.rs/crossbeam) and
|
||||
[`ignore`](https://docs.rs/ignore).
|
||||
|
||||
|
||||
### Feature comparison
|
||||
|
||||
Andy Lester, author of [ack](https://beyondgrep.com/), has published an
|
||||
excellent table comparing the features of ack, ag, git-grep, GNU grep and
|
||||
ripgrep: https://beyondgrep.com/feature-comparison/
|
||||
|
||||
Note that ripgrep has grown a few significant new features recently that
|
||||
are not yet present in Andy's table. This includes, but is not limited to,
|
||||
configuration files, passthru, support for searching compressed files,
|
||||
multiline search and opt-in fancy regex support via PCRE2.
|
||||
|
||||
|
||||
### Installation
|
||||
|
||||
The binary name for ripgrep is `rg`.
|
||||
The binary name for `ripgrep` is `rg`.
|
||||
|
||||
**[Archives of precompiled binaries for ripgrep are available for Windows,
|
||||
**[Archives of precompiled binaries for `ripgrep` are available for Windows,
|
||||
macOS and Linux.](https://github.com/BurntSushi/ripgrep/releases)** Users of
|
||||
platforms not explicitly mentioned below are advised to download one of these
|
||||
archives.
|
||||
platforms not explicitly mentioned below (such as Debian and Ubuntu) are advised
|
||||
to download one of these archives.
|
||||
|
||||
Linux binaries are static executables. Windows binaries are available either as
|
||||
built with MinGW (GNU) or with Microsoft Visual C++ (MSVC). When possible,
|
||||
@@ -213,134 +170,51 @@ optimizations) by utilizing a custom tap:
|
||||
|
||||
```
|
||||
$ brew tap burntsushi/ripgrep https://github.com/BurntSushi/ripgrep.git
|
||||
$ brew install ripgrep-bin
|
||||
$ brew install burntsushi/ripgrep/ripgrep-bin
|
||||
```
|
||||
|
||||
If you're a **MacPorts** user, then you can install ripgrep from the
|
||||
[official ports](https://www.macports.org/ports.php?by=name&substr=ripgrep):
|
||||
|
||||
```
|
||||
$ sudo port install ripgrep
|
||||
```
|
||||
|
||||
If you're a **Windows Chocolatey** user, then you can install ripgrep from the
|
||||
[official repo](https://chocolatey.org/packages/ripgrep):
|
||||
If you're a **Windows Chocolatey** user, then you can install `ripgrep` from the [official repo](https://chocolatey.org/packages/ripgrep):
|
||||
|
||||
```
|
||||
$ choco install ripgrep
|
||||
```
|
||||
|
||||
If you're a **Windows Scoop** user, then you can install ripgrep from the
|
||||
[official bucket](https://github.com/lukesampson/scoop/blob/master/bucket/ripgrep.json):
|
||||
|
||||
```
|
||||
$ scoop install ripgrep
|
||||
```
|
||||
|
||||
If you're an **Arch Linux** user, then you can install ripgrep from the official repos:
|
||||
If you're an **Arch Linux** user, then you can install `ripgrep` from the official repos:
|
||||
|
||||
```
|
||||
$ pacman -S ripgrep
|
||||
```
|
||||
|
||||
If you're a **Gentoo** user, you can install ripgrep from the
|
||||
[official repo](https://packages.gentoo.org/packages/sys-apps/ripgrep):
|
||||
If you're a **Gentoo** user, you can install `ripgrep` from the [official repo](https://packages.gentoo.org/packages/sys-apps/ripgrep):
|
||||
|
||||
```
|
||||
$ emerge sys-apps/ripgrep
|
||||
$ emerge ripgrep
|
||||
```
|
||||
|
||||
If you're a **Fedora** user, you can install ripgrep from official
|
||||
repositories.
|
||||
If you're a **Fedora 24+** user, you can install `ripgrep` from [copr](https://copr.fedorainfracloud.org/coprs/carlwgeorge/ripgrep/):
|
||||
|
||||
```
|
||||
$ sudo dnf install ripgrep
|
||||
$ dnf copr enable carlwgeorge/ripgrep
|
||||
$ dnf install ripgrep
|
||||
```
|
||||
|
||||
If you're an **openSUSE Leap 15.0** user, you can install ripgrep from the
|
||||
[utilities repo](https://build.opensuse.org/package/show/utilities/ripgrep):
|
||||
If you're a **RHEL/CentOS 7** user, you can install `ripgrep` from [copr](https://copr.fedorainfracloud.org/coprs/carlwgeorge/ripgrep/):
|
||||
|
||||
```
|
||||
$ sudo zypper ar https://download.opensuse.org/repositories/utilities/openSUSE_Leap_15.0/utilities.repo
|
||||
$ sudo zypper install ripgrep
|
||||
$ yum-config-manager --add-repo=https://copr.fedorainfracloud.org/coprs/carlwgeorge/ripgrep/repo/epel-7/carlwgeorge-ripgrep-epel-7.repo
|
||||
$ yum install ripgrep
|
||||
```
|
||||
|
||||
|
||||
If you're an **openSUSE Tumbleweed** user, you can install ripgrep from the
|
||||
[official repo](http://software.opensuse.org/package/ripgrep):
|
||||
|
||||
```
|
||||
$ sudo zypper install ripgrep
|
||||
```
|
||||
|
||||
If you're a **RHEL/CentOS 7** user, you can install ripgrep from
|
||||
[copr](https://copr.fedorainfracloud.org/coprs/carlwgeorge/ripgrep/):
|
||||
|
||||
```
|
||||
$ sudo yum-config-manager --add-repo=https://copr.fedorainfracloud.org/coprs/carlwgeorge/ripgrep/repo/epel-7/carlwgeorge-ripgrep-epel-7.repo
|
||||
$ sudo yum install ripgrep
|
||||
```
|
||||
|
||||
If you're a **Nix** user, you can install ripgrep from
|
||||
If you're a **Nix** user, you can install `ripgrep` from
|
||||
[nixpkgs](https://github.com/NixOS/nixpkgs/blob/master/pkgs/tools/text/ripgrep/default.nix):
|
||||
|
||||
```
|
||||
$ nix-env --install ripgrep
|
||||
$ # (Or using the attribute name, which is also ripgrep.)
|
||||
$ # (Or using the attribute name, which is also `ripgrep`.)
|
||||
```
|
||||
|
||||
If you're a **Debian** user (or a user of a Debian derivative like **Ubuntu**),
|
||||
then ripgrep can be installed using a binary `.deb` file provided in each
|
||||
[ripgrep release](https://github.com/BurntSushi/ripgrep/releases).
|
||||
|
||||
```
|
||||
$ curl -LO https://github.com/BurntSushi/ripgrep/releases/download/0.10.0/ripgrep_0.10.0_amd64.deb
|
||||
$ sudo dpkg -i ripgrep_0.10.0_amd64.deb
|
||||
```
|
||||
|
||||
If you run Debian Buster (currently Debian testing) or Debian sid, ripgrep is
|
||||
[officially maintained by Debian](https://tracker.debian.org/pkg/rust-ripgrep).
|
||||
```
|
||||
$ sudo apt-get install ripgrep
|
||||
```
|
||||
|
||||
If you're an **Ubuntu Cosmic (18.10)** (or newer) user, ripgrep is
|
||||
[available](https://launchpad.net/ubuntu/+source/rust-ripgrep) using the same
|
||||
packaging as Debian:
|
||||
|
||||
```
|
||||
$ sudo apt-get install ripgrep
|
||||
```
|
||||
|
||||
(N.B. Various snaps for ripgrep on Ubuntu are also available, but none of them
|
||||
seem to work right and generate a number of very strange bug reports that I
|
||||
don't know how to fix and don't have the time to fix. Therefore, it is no
|
||||
longer a recommended installation option.)
|
||||
|
||||
If you're a **FreeBSD** user, then you can install ripgrep from the
|
||||
[official ports](https://www.freshports.org/textproc/ripgrep/):
|
||||
|
||||
```
|
||||
# pkg install ripgrep
|
||||
```
|
||||
|
||||
If you're an **OpenBSD** user, then you can install ripgrep from the
|
||||
[official ports](http://openports.se/textproc/ripgrep):
|
||||
|
||||
```
|
||||
$ doas pkg_add ripgrep
|
||||
```
|
||||
|
||||
If you're a **NetBSD** user, then you can install ripgrep from
|
||||
[pkgsrc](http://pkgsrc.se/textproc/ripgrep):
|
||||
|
||||
```
|
||||
# pkgin install ripgrep
|
||||
```
|
||||
|
||||
If you're a **Rust programmer**, ripgrep can be installed with `cargo`.
|
||||
|
||||
* Note that the minimum supported version of Rust for ripgrep is **1.34.0**,
|
||||
If you're a **Rust programmer**, `ripgrep` can be installed with `cargo`.
|
||||
* Note that the minimum supported version of Rust for ripgrep is **1.17**,
|
||||
although ripgrep may work with older versions.
|
||||
* Note that the binary may be bigger than expected because it contains debug
|
||||
symbols. This is intentional. To remove debug symbols and therefore reduce
|
||||
@@ -350,18 +224,145 @@ If you're a **Rust programmer**, ripgrep can be installed with `cargo`.
|
||||
$ cargo install ripgrep
|
||||
```
|
||||
|
||||
ripgrep isn't currently in any other package repositories.
|
||||
`ripgrep` isn't currently in any other package repositories.
|
||||
[I'd like to change that](https://github.com/BurntSushi/ripgrep/issues/10).
|
||||
|
||||
### Whirlwind tour
|
||||
|
||||
The command line usage of `ripgrep` doesn't differ much from other tools that
|
||||
perform a similar function, so you probably already know how to use `ripgrep`.
|
||||
The full details can be found in `rg --help`, but let's go on a whirlwind tour.
|
||||
|
||||
`ripgrep` detects when its printing to a terminal, and will automatically
|
||||
colorize your output and show line numbers, just like The Silver Searcher.
|
||||
Coloring works on Windows too! Colors can be controlled more granularly with
|
||||
the `--color` flag.
|
||||
|
||||
One last thing before we get started: generally speaking, `ripgrep` assumes the
|
||||
input is reading is UTF-8. However, if ripgrep notices a file is encoded as
|
||||
UTF-16, then it will know how to search it. For other encodings, you'll need to
|
||||
explicitly specify them with the `-E/--encoding` flag.
|
||||
|
||||
To recursively search the current directory, while respecting all `.gitignore`
|
||||
files, ignore hidden files and directories and skip binary files:
|
||||
|
||||
```
|
||||
$ rg foobar
|
||||
```
|
||||
|
||||
The above command also respects all `.ignore` files, including in parent
|
||||
directories. `.ignore` files can be used when `.gitignore` files are
|
||||
insufficient. In all cases, `.ignore` patterns take precedence over
|
||||
`.gitignore`.
|
||||
|
||||
To ignore all ignore files, use `-u`. To additionally search hidden files
|
||||
and directories, use `-uu`. To additionally search binary files, use `-uuu`.
|
||||
(In other words, "search everything, dammit!") In particular, `rg -uuu` is
|
||||
similar to `grep -a -r`.
|
||||
|
||||
```
|
||||
$ rg -uu foobar # similar to `grep -r`
|
||||
$ rg -uuu foobar # similar to `grep -a -r`
|
||||
```
|
||||
|
||||
(Tip: If your ignore files aren't being adhered to like you expect, run your
|
||||
search with the `--debug` flag.)
|
||||
|
||||
Make the search case insensitive with `-i`, invert the search with `-v` or
|
||||
show the 2 lines before and after every search result with `-C2`.
|
||||
|
||||
Force all matches to be surrounded by word boundaries with `-w`.
|
||||
|
||||
Search and replace (find first and last names and swap them):
|
||||
|
||||
```
|
||||
$ rg '([A-Z][a-z]+)\s+([A-Z][a-z]+)' --replace '$2, $1'
|
||||
```
|
||||
|
||||
Named groups are supported:
|
||||
|
||||
```
|
||||
$ rg '(?P<first>[A-Z][a-z]+)\s+(?P<last>[A-Z][a-z]+)' --replace '$last, $first'
|
||||
```
|
||||
|
||||
Up the ante with full Unicode support, by matching any uppercase Unicode letter
|
||||
followed by any sequence of lowercase Unicode letters (good luck doing this
|
||||
with other search tools!):
|
||||
|
||||
```
|
||||
$ rg '(\p{Lu}\p{Ll}+)\s+(\p{Lu}\p{Ll}+)' --replace '$2, $1'
|
||||
```
|
||||
|
||||
Search only files matching a particular glob:
|
||||
|
||||
```
|
||||
$ rg foo -g 'README.*'
|
||||
```
|
||||
|
||||
<!--*-->
|
||||
|
||||
Or exclude files matching a particular glob:
|
||||
|
||||
```
|
||||
$ rg foo -g '!*.min.js'
|
||||
```
|
||||
|
||||
Search and return paths matching a particular glob (i.e., `-g` flag in ag/ack):
|
||||
|
||||
```
|
||||
$ rg -g 'doc*' --files
|
||||
```
|
||||
|
||||
Search only HTML and CSS files:
|
||||
|
||||
```
|
||||
$ rg -thtml -tcss foobar
|
||||
```
|
||||
|
||||
Search everything except for Javascript files:
|
||||
|
||||
```
|
||||
$ rg -Tjs foobar
|
||||
```
|
||||
|
||||
To see a list of types supported, run `rg --type-list`. To add a new type, use
|
||||
`--type-add`, which must be accompanied by a pattern for searching (`rg` won't
|
||||
persist your type settings):
|
||||
|
||||
```
|
||||
$ rg --type-add 'foo:*.{foo,foobar}' -tfoo bar
|
||||
```
|
||||
|
||||
The type `foo` will now match any file ending with the `.foo` or `.foobar`
|
||||
extensions.
|
||||
|
||||
### Regex syntax
|
||||
|
||||
The syntax supported is
|
||||
[documented as part of Rust's regex library](https://doc.rust-lang.org/regex/regex/index.html#syntax).
|
||||
|
||||
### Shell completions
|
||||
|
||||
Shell completion files are included in the release tarball for Bash, Fish, Zsh
|
||||
and PowerShell.
|
||||
|
||||
For **bash**, move `complete/rg.bash-completion` to `$XDG_CONFIG_HOME/bash_completion`
|
||||
or `/etc/bash_completion.d/`.
|
||||
|
||||
For **fish**, move `complete/rg.fish` to `$HOME/.config/fish/completions/`.
|
||||
|
||||
For **PowerShell**, add `. _rg.ps1` to your PowerShell
|
||||
[profile](https://technet.microsoft.com/en-us/library/bb613488(v=vs.85).aspx)
|
||||
(note the leading period). If the `_rg.ps1` file is not on your `PATH`, do
|
||||
`. /path/to/_rg.ps1` instead.
|
||||
|
||||
For **zsh**, move `complete/_rg` to one of your `$fpath` directories.
|
||||
|
||||
### Building
|
||||
|
||||
ripgrep is written in Rust, so you'll need to grab a
|
||||
`ripgrep` is written in Rust, so you'll need to grab a
|
||||
[Rust installation](https://www.rust-lang.org/) in order to compile it.
|
||||
ripgrep compiles with Rust 1.34.0 (stable) or newer. In general, ripgrep tracks
|
||||
the latest stable release of the Rust compiler.
|
||||
|
||||
To build ripgrep:
|
||||
`ripgrep` compiles with Rust 1.17 (stable) or newer. Building is easy:
|
||||
|
||||
```
|
||||
$ git clone https://github.com/BurntSushi/ripgrep
|
||||
@@ -371,60 +372,141 @@ $ ./target/release/rg --version
|
||||
0.1.3
|
||||
```
|
||||
|
||||
If you have a Rust nightly compiler and a recent Intel CPU, then you can enable
|
||||
additional optional SIMD acceleration like so:
|
||||
If you have a Rust nightly compiler, then you can enable optional SIMD
|
||||
acceleration like so:
|
||||
|
||||
```
|
||||
RUSTFLAGS="-C target-cpu=native" cargo build --release --features 'simd-accel'
|
||||
RUSTFLAGS="-C target-cpu=native" cargo build --release --features 'simd-accel avx-accel'
|
||||
```
|
||||
|
||||
The `simd-accel` feature enables SIMD support in certain ripgrep dependencies
|
||||
(responsible for transcoding). They are not necessary to get SIMD optimizations
|
||||
for search; those are enabled automatically. Hopefully, some day, the
|
||||
`simd-accel` feature will similarly become unnecessary. **WARNING:** Currently,
|
||||
enabling this option can increase compilation times dramatically.
|
||||
|
||||
Finally, optional PCRE2 support can be built with ripgrep by enabling the
|
||||
`pcre2` feature:
|
||||
|
||||
```
|
||||
$ cargo build --release --features 'pcre2'
|
||||
```
|
||||
|
||||
(Tip: use `--features 'pcre2 simd-accel'` to also include compile time SIMD
|
||||
optimizations, which will only work with a nightly compiler.)
|
||||
|
||||
Enabling the PCRE2 feature works with a stable Rust compiler and will
|
||||
attempt to automatically find and link with your system's PCRE2 library via
|
||||
`pkg-config`. If one doesn't exist, then ripgrep will build PCRE2 from source
|
||||
using your system's C compiler and then statically link it into the final
|
||||
executable. Static linking can be forced even when there is an available PCRE2
|
||||
system library by either building ripgrep with the MUSL target or by setting
|
||||
`PCRE2_SYS_STATIC=1`.
|
||||
|
||||
ripgrep can be built with the MUSL target on Linux by first installing the MUSL
|
||||
library on your system (consult your friendly neighborhood package manager).
|
||||
Then you just need to add MUSL support to your Rust toolchain and rebuild
|
||||
ripgrep, which yields a fully static executable:
|
||||
|
||||
```
|
||||
$ rustup target add x86_64-unknown-linux-musl
|
||||
$ cargo build --release --target x86_64-unknown-linux-musl
|
||||
```
|
||||
|
||||
Applying the `--features` flag from above works as expected. If you want to
|
||||
build a static executable with MUSL and with PCRE2, then you will need to have
|
||||
`musl-gcc` installed, which might be in a separate package from the actual
|
||||
MUSL library, depending on your Linux distribution.
|
||||
|
||||
If your machine doesn't support AVX instructions, then simply remove
|
||||
`avx-accel` from the features list. Similarly for SIMD.
|
||||
|
||||
### Running tests
|
||||
|
||||
ripgrep is relatively well-tested, including both unit tests and integration
|
||||
`ripgrep` is relatively well tested, including both unit tests and integration
|
||||
tests. To run the full test suite, use:
|
||||
|
||||
```
|
||||
$ cargo test --all
|
||||
$ cargo test
|
||||
```
|
||||
|
||||
from the repository root.
|
||||
|
||||
### Tips
|
||||
|
||||
#### Windows Powershell
|
||||
|
||||
##### Powershell Profile
|
||||
|
||||
To customize powershell on start-up there is a special powershell script that has to be created.
|
||||
In order to find its location type `$profile`
|
||||
See [more](https://technet.microsoft.com/en-us/library/bb613488(v=vs.85).aspx) for profile details.
|
||||
|
||||
Any powershell code in this file gets evaluated at the start of console.
|
||||
This way you can have own aliases to be created at start.
|
||||
|
||||
##### Setup function alias
|
||||
|
||||
Often you can find a need to make alias for the favourite utility.
|
||||
|
||||
But powershell function aliases do not behave like your typical linux shell alias.
|
||||
|
||||
You always need to propagate arguments and **Stdin** input.
|
||||
But it cannot be done simply as `function grep() { $input | rg.exe --hidden $args }`
|
||||
|
||||
Use below example as reference to how setup alias in powershell.
|
||||
|
||||
```powershell
|
||||
function grep {
|
||||
$count = @($input).Count
|
||||
$input.Reset()
|
||||
|
||||
if ($count) {
|
||||
$input | rg.exe --hidden $args
|
||||
}
|
||||
else {
|
||||
rg.exe --hidden $args
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Powershell special variables:
|
||||
* input - is powershell **Stdin** object that allows you to access its content.
|
||||
* args - is array of arguments passed to this function.
|
||||
|
||||
This alias checks whether there is **Stdin** input and propagates only if there is some lines.
|
||||
Otherwise empty `$input` will make powershell to trigger `rg` to search empty **Stdin**
|
||||
|
||||
##### Piping non-ASCII content to ripgrep
|
||||
|
||||
When piping input into native executables in PowerShell, the encoding of the
|
||||
input is controlled by the `$OutputEncoding` variable. By default, this is set
|
||||
to US-ASCII, and any characters in the pipeline that don't have encodings in
|
||||
US-ASCII are converted to `?` (question mark) characters.
|
||||
|
||||
To change this setting, set `$OutputEncoding` to a different encoding, as
|
||||
represented by a .NET encoding object. Some common examples are below. The
|
||||
value of this variable is reset when PowerShell restarts, so to make this
|
||||
change take effect every time PowerShell is started add a line setting the
|
||||
variable into your PowerShell profile.
|
||||
|
||||
Example `$OutputEncoding` settings:
|
||||
* UTF-8 without BOM: `$OutputEncoding = [System.Text.UTF8Encoding]::new()`
|
||||
* The console's output encoding:
|
||||
`$OutputEncoding = [System.Console]::OutputEncoding`
|
||||
|
||||
If you continue to have encoding problems, you can also force the encoding
|
||||
that the console will use for printing to UTF-8 with
|
||||
`[System.Console]::OutputEncoding = [System.Text.Encoding]::UTF8`. This
|
||||
will also reset when PowerShell is restarted, so you can add that line
|
||||
to your profile as well if you want to make the setting permanent.
|
||||
|
||||
### Known issues
|
||||
|
||||
#### I just hit Ctrl+C in the middle of ripgrep's output and now my terminal's foreground color is wrong!
|
||||
|
||||
Type in `color` in cmd.exe (Command Prompt) and `echo -ne "\033[0m"` on Unix
|
||||
to restore your original foreground color.
|
||||
|
||||
In PowerShell, you can add the following code to your profile which will
|
||||
restore the original foreground color when `Reset-ForegroundColor` is called.
|
||||
Including the `Set-Alias` line will allow you to call it with simply `color`.
|
||||
|
||||
```powershell
|
||||
$OrigFgColor = $Host.UI.RawUI.ForegroundColor
|
||||
function Reset-ForegroundColor {
|
||||
$Host.UI.RawUI.ForegroundColor = $OrigFgColor
|
||||
}
|
||||
Set-Alias -Name color -Value Reset-ForegroundColor
|
||||
```
|
||||
|
||||
PR [#187](https://github.com/BurntSushi/ripgrep/pull/187) fixed this, and it
|
||||
was later deprecated in
|
||||
[#281](https://github.com/BurntSushi/ripgrep/issues/281). A full explanation is
|
||||
available [here][msys issue explanation].
|
||||
|
||||
[msys issue explanation]: https://github.com/BurntSushi/ripgrep/issues/281#issuecomment-269093893
|
||||
|
||||
#### When I run `rg` it executes some other command!
|
||||
|
||||
It's likely that you have a shell alias or even another tool called `rg` which
|
||||
is interfering with `ripgrep` — run `which rg` to see what it is.
|
||||
|
||||
(Notably, the `rails` plug-in for
|
||||
[Oh My Zsh](https://github.com/robbyrussell/oh-my-zsh/wiki/Plugins#rails) sets
|
||||
up an `rg` alias for `rails generate`.)
|
||||
|
||||
Problems like this can be resolved in one of several ways:
|
||||
|
||||
* If you're using the OMZ `rails` plug-in, disable it by editing the `plugins`
|
||||
array in your zsh configuration.
|
||||
* Temporarily bypass an existing `rg` alias by calling `ripgrep` as
|
||||
`command rg`, `\rg`, or `'rg'`.
|
||||
* Temporarily bypass an existing alias or another tool named `rg` by calling
|
||||
`ripgrep` by its full path (e.g., `/usr/bin/rg` or `/usr/local/bin/rg`).
|
||||
* Permanently disable an existing `rg` alias by adding `unalias rg` to the
|
||||
bottom of your shell configuration file (e.g., `.bash_profile` or `.zshrc`).
|
||||
* Give `ripgrep` its own alias that doesn't conflict with other tools/aliases by
|
||||
adding a line like the following to the bottom of your shell configuration
|
||||
file: `alias ripgrep='command rg'`
|
||||
|
||||
57
appveyor.yml
57
appveyor.yml
@@ -1,39 +1,16 @@
|
||||
cache:
|
||||
- c:\cargo\registry
|
||||
- c:\cargo\git
|
||||
|
||||
init:
|
||||
- mkdir c:\cargo
|
||||
- mkdir c:\rustup
|
||||
- SET PATH=c:\cargo\bin;%PATH%
|
||||
|
||||
clone_folder: c:\projects\ripgrep
|
||||
|
||||
environment:
|
||||
CARGO_HOME: "c:\\cargo"
|
||||
RUSTUP_HOME: "c:\\rustup"
|
||||
CARGO_TARGET_DIR: "c:\\projects\\ripgrep\\target"
|
||||
global:
|
||||
PROJECT_NAME: ripgrep
|
||||
RUST_BACKTRACE: full
|
||||
matrix:
|
||||
- TARGET: x86_64-pc-windows-gnu
|
||||
CHANNEL: stable
|
||||
BITS: 64
|
||||
MSYS2: 1
|
||||
- TARGET: x86_64-pc-windows-msvc
|
||||
CHANNEL: stable
|
||||
BITS: 64
|
||||
- TARGET: i686-pc-windows-gnu
|
||||
CHANNEL: stable
|
||||
BITS: 32
|
||||
MSYS2: 1
|
||||
- TARGET: i686-pc-windows-msvc
|
||||
CHANNEL: stable
|
||||
BITS: 32
|
||||
|
||||
matrix:
|
||||
fast_finish: true
|
||||
- TARGET: x86_64-pc-windows-gnu
|
||||
CHANNEL: stable
|
||||
- TARGET: x86_64-pc-windows-msvc
|
||||
CHANNEL: stable
|
||||
|
||||
# Install Rust and Cargo
|
||||
# (Based on from https://github.com/rust-lang/libc/blob/master/appveyor.yml)
|
||||
@@ -41,25 +18,32 @@ install:
|
||||
- curl -sSf -o rustup-init.exe https://win.rustup.rs/
|
||||
- rustup-init.exe -y --default-host %TARGET%
|
||||
- set PATH=%PATH%;C:\Users\appveyor\.cargo\bin
|
||||
- if defined MSYS2 set PATH=C:\msys64\mingw%BITS%\bin;%PATH%
|
||||
- if defined MSYS2_BITS set PATH=%PATH%;C:\msys64\mingw%MSYS2_BITS%\bin
|
||||
- rustc -V
|
||||
- cargo -V
|
||||
|
||||
# Hack to work around a harmless warning in Appveyor builds?
|
||||
# ???
|
||||
build: false
|
||||
|
||||
# Equivalent to Travis' `script` phase
|
||||
# TODO modify this phase as you see fit
|
||||
test_script:
|
||||
- cargo test --verbose --all --features pcre2
|
||||
- cargo test --verbose
|
||||
- cargo test --verbose --manifest-path grep/Cargo.toml
|
||||
- cargo test --verbose --manifest-path globset/Cargo.toml
|
||||
- cargo test --verbose --manifest-path ignore/Cargo.toml
|
||||
- cargo test --verbose --manifest-path wincolor/Cargo.toml
|
||||
- cargo test --verbose --manifest-path termcolor/Cargo.toml
|
||||
|
||||
before_deploy:
|
||||
# Generate artifacts for release
|
||||
- cargo build --release --features pcre2
|
||||
# TODO(burntsushi): How can we enable SSSE3 on Windows?
|
||||
- cargo build --release
|
||||
- mkdir staging
|
||||
- copy target\release\rg.exe staging
|
||||
- ps: copy target\release\build\ripgrep-*\out\_rg.ps1 staging
|
||||
- cd staging
|
||||
# release zipfile will look like 'ripgrep-1.2.3-x86_64-pc-windows-msvc'
|
||||
# release zipfile will look like 'rust-everywhere-v1.2.3-x86_64-pc-windows-msvc'
|
||||
- 7z a ../%PROJECT_NAME%-%APPVEYOR_REPO_TAG_NAME%-%TARGET%.zip *
|
||||
- appveyor PushArtifact ../%PROJECT_NAME%-%APPVEYOR_REPO_TAG_NAME%-%TARGET%.zip
|
||||
|
||||
@@ -72,10 +56,17 @@ deploy:
|
||||
provider: GitHub
|
||||
# deploy when a new tag is pushed and only on the stable channel
|
||||
on:
|
||||
# channel to use to produce the release artifacts
|
||||
# NOTE make sure you only release *once* per target
|
||||
# TODO you may want to pick a different channel
|
||||
CHANNEL: stable
|
||||
appveyor_repo_tag: true
|
||||
|
||||
branches:
|
||||
only:
|
||||
- /^\d+\.\d+\.\d+$/
|
||||
- /\d+\.\d+\.\d+/
|
||||
- master
|
||||
# - appveyor
|
||||
# - /\d+\.\d+\.\d+/
|
||||
# except:
|
||||
# - master
|
||||
|
||||
@@ -1,59 +0,0 @@
|
||||
This directory contains updated benchmarks as of 2018-01-08. They were captured
|
||||
via the benchsuite script at `benchsuite/benchsuite` from the root of this
|
||||
repository. The command that was run:
|
||||
|
||||
$ ./benchsuite \
|
||||
--dir /tmp/benchsuite \
|
||||
--raw runs/2018-01-08-archlinux-cheetah/raw.csv \
|
||||
--warmup-iter 1 \
|
||||
--bench-iter 5
|
||||
|
||||
These results are most directly comparable to the
|
||||
`2016-09-22-archlinux-cheetah` run in the parent directory.
|
||||
|
||||
The versions of each tool are as follows:
|
||||
|
||||
$ grep -V
|
||||
grep (GNU grep) 3.1
|
||||
|
||||
$ ag -V
|
||||
ag version 2.1.0
|
||||
Features:
|
||||
+jit +lzma +zlib
|
||||
|
||||
$ sift -V
|
||||
sift 0.8.0 (linux/amd64)
|
||||
built from commit 2ca94717 (which seems to be 0.9.0)
|
||||
|
||||
$ pt --version
|
||||
pt version 2.1.4
|
||||
|
||||
$ ucg -V
|
||||
UniversalCodeGrep 0.3.3
|
||||
[...]
|
||||
Build info
|
||||
|
||||
Repo version: 0.3.3-251-g9b5a3e3
|
||||
|
||||
Compiler info:
|
||||
Name ($(CXX)): "g++ -std=gnu++1z"
|
||||
Version string: "g++ (GCC) 7.2.1 20171224"
|
||||
|
||||
ISA extensions in use:
|
||||
sse4.2: yes
|
||||
popcnt: yes
|
||||
|
||||
libpcre info:
|
||||
Not linked against libpcre.
|
||||
|
||||
libpcre2-8 info:
|
||||
Version: 10.30 2017-08-14
|
||||
JIT support built in?: yes
|
||||
JIT target architecture: x86 64bit (little endian + unaligned)
|
||||
Newline style: LF
|
||||
|
||||
The version of ripgrep was compiled from source on commit 85d463c0, with the
|
||||
simd-accel and avx-accel features enabled:
|
||||
|
||||
$ export RUSTFLAGS="-C target-cpu=native"
|
||||
$ cargo build --release --features 'simd-accel avx-accel'
|
||||
@@ -1,806 +0,0 @@
|
||||
benchmark,warmup_iter,iter,name,command,duration,lines,env
|
||||
linux_alternates,1,5,rg (ignore),rg -n ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.10186767578125,68,
|
||||
linux_alternates,1,5,rg (ignore),rg -n ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.10199356079101562,68,
|
||||
linux_alternates,1,5,rg (ignore),rg -n ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.09750819206237793,68,
|
||||
linux_alternates,1,5,rg (ignore),rg -n ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.09634733200073242,68,
|
||||
linux_alternates,1,5,rg (ignore),rg -n ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.10117292404174805,68,
|
||||
linux_alternates,1,5,ag (ignore),ag -s ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.49642109870910645,68,
|
||||
linux_alternates,1,5,ag (ignore),ag -s ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.48993706703186035,68,
|
||||
linux_alternates,1,5,ag (ignore),ag -s ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.4837028980255127,68,
|
||||
linux_alternates,1,5,ag (ignore),ag -s ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.4773833751678467,68,
|
||||
linux_alternates,1,5,ag (ignore),ag -s ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.558436393737793,68,
|
||||
linux_alternates,1,5,git grep (ignore),git grep -E -I -n ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.2605454921722412,68,LC_ALL=C
|
||||
linux_alternates,1,5,git grep (ignore),git grep -E -I -n ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.26748204231262207,68,LC_ALL=C
|
||||
linux_alternates,1,5,git grep (ignore),git grep -E -I -n ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.26719212532043457,68,LC_ALL=C
|
||||
linux_alternates,1,5,git grep (ignore),git grep -E -I -n ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.2719383239746094,68,LC_ALL=C
|
||||
linux_alternates,1,5,git grep (ignore),git grep -E -I -n ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.26963257789611816,68,LC_ALL=C
|
||||
linux_alternates,1,5,rg (whitelist),rg --no-ignore -n ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.08797001838684082,68,
|
||||
linux_alternates,1,5,rg (whitelist),rg --no-ignore -n ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.09073781967163086,68,
|
||||
linux_alternates,1,5,rg (whitelist),rg --no-ignore -n ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.0914468765258789,68,
|
||||
linux_alternates,1,5,rg (whitelist),rg --no-ignore -n ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.09071612358093262,68,
|
||||
linux_alternates,1,5,rg (whitelist),rg --no-ignore -n ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.0914316177368164,68,
|
||||
linux_alternates,1,5,ucg (whitelist),ucg --nosmart-case ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.1372535228729248,68,
|
||||
linux_alternates,1,5,ucg (whitelist),ucg --nosmart-case ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.13880419731140137,68,
|
||||
linux_alternates,1,5,ucg (whitelist),ucg --nosmart-case ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.13315439224243164,68,
|
||||
linux_alternates,1,5,ucg (whitelist),ucg --nosmart-case ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.1367807388305664,68,
|
||||
linux_alternates,1,5,ucg (whitelist),ucg --nosmart-case ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.13135552406311035,68,
|
||||
linux_alternates_casei,1,5,rg (ignore),rg -n -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.12781810760498047,160,
|
||||
linux_alternates_casei,1,5,rg (ignore),rg -n -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.11988544464111328,160,
|
||||
linux_alternates_casei,1,5,rg (ignore),rg -n -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.1205439567565918,160,
|
||||
linux_alternates_casei,1,5,rg (ignore),rg -n -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.12867259979248047,160,
|
||||
linux_alternates_casei,1,5,rg (ignore),rg -n -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.1215970516204834,160,
|
||||
linux_alternates_casei,1,5,ag (ignore),ag -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.5444357395172119,160,
|
||||
linux_alternates_casei,1,5,ag (ignore),ag -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.5511739253997803,160,
|
||||
linux_alternates_casei,1,5,ag (ignore),ag -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.5382294654846191,160,
|
||||
linux_alternates_casei,1,5,ag (ignore),ag -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.5499558448791504,160,
|
||||
linux_alternates_casei,1,5,ag (ignore),ag -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.6376545429229736,160,
|
||||
linux_alternates_casei,1,5,git grep (ignore),git grep -E -I -n -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.9767155647277832,160,LC_ALL=C
|
||||
linux_alternates_casei,1,5,git grep (ignore),git grep -E -I -n -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.920574426651001,160,LC_ALL=C
|
||||
linux_alternates_casei,1,5,git grep (ignore),git grep -E -I -n -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.9352290630340576,160,LC_ALL=C
|
||||
linux_alternates_casei,1,5,git grep (ignore),git grep -E -I -n -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.8866012096405029,160,LC_ALL=C
|
||||
linux_alternates_casei,1,5,git grep (ignore),git grep -E -I -n -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.9189445972442627,160,LC_ALL=C
|
||||
linux_alternates_casei,1,5,rg (whitelist),rg --no-ignore -n -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.09351730346679688,160,
|
||||
linux_alternates_casei,1,5,rg (whitelist),rg --no-ignore -n -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.09393739700317383,160,
|
||||
linux_alternates_casei,1,5,rg (whitelist),rg --no-ignore -n -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.09986448287963867,160,
|
||||
linux_alternates_casei,1,5,rg (whitelist),rg --no-ignore -n -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.09596824645996094,160,
|
||||
linux_alternates_casei,1,5,rg (whitelist),rg --no-ignore -n -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.09604883193969727,160,
|
||||
linux_alternates_casei,1,5,ucg (whitelist),ucg -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.23943114280700684,160,
|
||||
linux_alternates_casei,1,5,ucg (whitelist),ucg -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.2587015628814697,160,
|
||||
linux_alternates_casei,1,5,ucg (whitelist),ucg -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.2543606758117676,160,
|
||||
linux_alternates_casei,1,5,ucg (whitelist),ucg -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.2490406036376953,160,
|
||||
linux_alternates_casei,1,5,ucg (whitelist),ucg -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.24046540260314941,160,
|
||||
linux_literal,1,5,rg (ignore),rg -n PM_RESUME,0.08253765106201172,16,
|
||||
linux_literal,1,5,rg (ignore),rg -n PM_RESUME,0.08176755905151367,16,
|
||||
linux_literal,1,5,rg (ignore),rg -n PM_RESUME,0.08141684532165527,16,
|
||||
linux_literal,1,5,rg (ignore),rg -n PM_RESUME,0.08108830451965332,16,
|
||||
linux_literal,1,5,rg (ignore),rg -n PM_RESUME,0.08082938194274902,16,
|
||||
linux_literal,1,5,rg (ignore) (mmap),rg -n --mmap PM_RESUME,0.6870582103729248,16,
|
||||
linux_literal,1,5,rg (ignore) (mmap),rg -n --mmap PM_RESUME,0.807842493057251,16,
|
||||
linux_literal,1,5,rg (ignore) (mmap),rg -n --mmap PM_RESUME,0.8129942417144775,16,
|
||||
linux_literal,1,5,rg (ignore) (mmap),rg -n --mmap PM_RESUME,0.7582321166992188,16,
|
||||
linux_literal,1,5,rg (ignore) (mmap),rg -n --mmap PM_RESUME,0.6869800090789795,16,
|
||||
linux_literal,1,5,ag (ignore) (mmap),ag -s PM_RESUME,0.6534101963043213,16,
|
||||
linux_literal,1,5,ag (ignore) (mmap),ag -s PM_RESUME,0.6020612716674805,16,
|
||||
linux_literal,1,5,ag (ignore) (mmap),ag -s PM_RESUME,0.6712157726287842,16,
|
||||
linux_literal,1,5,ag (ignore) (mmap),ag -s PM_RESUME,0.6267571449279785,16,
|
||||
linux_literal,1,5,ag (ignore) (mmap),ag -s PM_RESUME,0.505136251449585,16,
|
||||
linux_literal,1,5,pt (ignore),pt PM_RESUME,0.21415948867797852,16,
|
||||
linux_literal,1,5,pt (ignore),pt PM_RESUME,0.19318318367004395,16,
|
||||
linux_literal,1,5,pt (ignore),pt PM_RESUME,0.21352124214172363,16,
|
||||
linux_literal,1,5,pt (ignore),pt PM_RESUME,0.18979454040527344,16,
|
||||
linux_literal,1,5,pt (ignore),pt PM_RESUME,0.16629600524902344,16,
|
||||
linux_literal,1,5,sift (ignore),sift --binary-skip --exclude-files .* --exclude-files *.pdf -n --git PM_RESUME,0.46967077255249023,16,
|
||||
linux_literal,1,5,sift (ignore),sift --binary-skip --exclude-files .* --exclude-files *.pdf -n --git PM_RESUME,0.46343088150024414,16,
|
||||
linux_literal,1,5,sift (ignore),sift --binary-skip --exclude-files .* --exclude-files *.pdf -n --git PM_RESUME,0.4723978042602539,16,
|
||||
linux_literal,1,5,sift (ignore),sift --binary-skip --exclude-files .* --exclude-files *.pdf -n --git PM_RESUME,0.4741063117980957,16,
|
||||
linux_literal,1,5,sift (ignore),sift --binary-skip --exclude-files .* --exclude-files *.pdf -n --git PM_RESUME,0.4613051414489746,16,
|
||||
linux_literal,1,5,git grep (ignore),git grep -I -n PM_RESUME,0.20196986198425293,16,LC_ALL=C
|
||||
linux_literal,1,5,git grep (ignore),git grep -I -n PM_RESUME,0.18932533264160156,16,LC_ALL=C
|
||||
linux_literal,1,5,git grep (ignore),git grep -I -n PM_RESUME,0.19396305084228516,16,LC_ALL=C
|
||||
linux_literal,1,5,git grep (ignore),git grep -I -n PM_RESUME,0.1952073574066162,16,LC_ALL=C
|
||||
linux_literal,1,5,git grep (ignore),git grep -I -n PM_RESUME,0.20149731636047363,16,LC_ALL=C
|
||||
linux_literal,1,5,rg (whitelist),rg -n --no-ignore -tall PM_RESUME,0.08270478248596191,16,
|
||||
linux_literal,1,5,rg (whitelist),rg -n --no-ignore -tall PM_RESUME,0.08414745330810547,16,
|
||||
linux_literal,1,5,rg (whitelist),rg -n --no-ignore -tall PM_RESUME,0.08627724647521973,16,
|
||||
linux_literal,1,5,rg (whitelist),rg -n --no-ignore -tall PM_RESUME,0.08978700637817383,16,
|
||||
linux_literal,1,5,rg (whitelist),rg -n --no-ignore -tall PM_RESUME,0.0836489200592041,16,
|
||||
linux_literal,1,5,ucg (whitelist),ucg --nosmart-case PM_RESUME,0.15774202346801758,16,
|
||||
linux_literal,1,5,ucg (whitelist),ucg --nosmart-case PM_RESUME,0.16005396842956543,16,
|
||||
linux_literal,1,5,ucg (whitelist),ucg --nosmart-case PM_RESUME,0.15743708610534668,16,
|
||||
linux_literal,1,5,ucg (whitelist),ucg --nosmart-case PM_RESUME,0.16156601905822754,16,
|
||||
linux_literal,1,5,ucg (whitelist),ucg --nosmart-case PM_RESUME,0.1557624340057373,16,
|
||||
linux_literal_casei,1,5,rg (ignore),rg -n -i PM_RESUME,0.1028127670288086,374,
|
||||
linux_literal_casei,1,5,rg (ignore),rg -n -i PM_RESUME,0.10258054733276367,374,
|
||||
linux_literal_casei,1,5,rg (ignore),rg -n -i PM_RESUME,0.10902261734008789,374,
|
||||
linux_literal_casei,1,5,rg (ignore),rg -n -i PM_RESUME,0.10802555084228516,374,
|
||||
linux_literal_casei,1,5,rg (ignore),rg -n -i PM_RESUME,0.10153412818908691,374,
|
||||
linux_literal_casei,1,5,rg (ignore) (mmap),rg -n -i --mmap PM_RESUME,0.7902817726135254,374,
|
||||
linux_literal_casei,1,5,rg (ignore) (mmap),rg -n -i --mmap PM_RESUME,0.7985179424285889,374,
|
||||
linux_literal_casei,1,5,rg (ignore) (mmap),rg -n -i --mmap PM_RESUME,0.8208649158477783,374,
|
||||
linux_literal_casei,1,5,rg (ignore) (mmap),rg -n -i --mmap PM_RESUME,0.7937076091766357,374,
|
||||
linux_literal_casei,1,5,rg (ignore) (mmap),rg -n -i --mmap PM_RESUME,0.7936429977416992,374,
|
||||
linux_literal_casei,1,5,ag (ignore) (mmap),ag -i PM_RESUME,0.5215470790863037,374,
|
||||
linux_literal_casei,1,5,ag (ignore) (mmap),ag -i PM_RESUME,0.46518707275390625,374,
|
||||
linux_literal_casei,1,5,ag (ignore) (mmap),ag -i PM_RESUME,0.4467353820800781,374,
|
||||
linux_literal_casei,1,5,ag (ignore) (mmap),ag -i PM_RESUME,0.4595184326171875,374,
|
||||
linux_literal_casei,1,5,ag (ignore) (mmap),ag -i PM_RESUME,0.4531285762786865,374,
|
||||
linux_literal_casei,1,5,pt (ignore),pt -i PM_RESUME,14.187762022018433,374,
|
||||
linux_literal_casei,1,5,pt (ignore),pt -i PM_RESUME,14.178058385848999,374,
|
||||
linux_literal_casei,1,5,pt (ignore),pt -i PM_RESUME,14.096448421478271,374,
|
||||
linux_literal_casei,1,5,pt (ignore),pt -i PM_RESUME,14.190524339675903,374,
|
||||
linux_literal_casei,1,5,pt (ignore),pt -i PM_RESUME,14.231573343276978,374,
|
||||
linux_literal_casei,1,5,sift (ignore),sift --binary-skip --exclude-files .* --exclude-files *.pdf -n -i --git PM_RESUME,0.4668574333190918,374,
|
||||
linux_literal_casei,1,5,sift (ignore),sift --binary-skip --exclude-files .* --exclude-files *.pdf -n -i --git PM_RESUME,0.46050214767456055,374,
|
||||
linux_literal_casei,1,5,sift (ignore),sift --binary-skip --exclude-files .* --exclude-files *.pdf -n -i --git PM_RESUME,0.46228861808776855,374,
|
||||
linux_literal_casei,1,5,sift (ignore),sift --binary-skip --exclude-files .* --exclude-files *.pdf -n -i --git PM_RESUME,0.44957947731018066,374,
|
||||
linux_literal_casei,1,5,sift (ignore),sift --binary-skip --exclude-files .* --exclude-files *.pdf -n -i --git PM_RESUME,0.4612581729888916,374,
|
||||
linux_literal_casei,1,5,git grep (ignore),git grep -I -n -i PM_RESUME,0.1932981014251709,370,LC_ALL=C
|
||||
linux_literal_casei,1,5,git grep (ignore),git grep -I -n -i PM_RESUME,0.20561552047729492,370,LC_ALL=C
|
||||
linux_literal_casei,1,5,git grep (ignore),git grep -I -n -i PM_RESUME,0.19516706466674805,370,LC_ALL=C
|
||||
linux_literal_casei,1,5,git grep (ignore),git grep -I -n -i PM_RESUME,0.20196247100830078,370,LC_ALL=C
|
||||
linux_literal_casei,1,5,git grep (ignore),git grep -I -n -i PM_RESUME,0.19236421585083008,370,LC_ALL=C
|
||||
linux_literal_casei,1,5,rg (whitelist),rg -n -i --no-ignore -tall PM_RESUME,0.09555959701538086,370,
|
||||
linux_literal_casei,1,5,rg (whitelist),rg -n -i --no-ignore -tall PM_RESUME,0.09589338302612305,370,
|
||||
linux_literal_casei,1,5,rg (whitelist),rg -n -i --no-ignore -tall PM_RESUME,0.09479856491088867,370,
|
||||
linux_literal_casei,1,5,rg (whitelist),rg -n -i --no-ignore -tall PM_RESUME,0.09741568565368652,370,
|
||||
linux_literal_casei,1,5,rg (whitelist),rg -n -i --no-ignore -tall PM_RESUME,0.10127615928649902,370,
|
||||
linux_literal_casei,1,5,ucg (whitelist),ucg -i PM_RESUME,0.15514039993286133,370,
|
||||
linux_literal_casei,1,5,ucg (whitelist),ucg -i PM_RESUME,0.15668940544128418,370,
|
||||
linux_literal_casei,1,5,ucg (whitelist),ucg -i PM_RESUME,0.15429425239562988,370,
|
||||
linux_literal_casei,1,5,ucg (whitelist),ucg -i PM_RESUME,0.15332818031311035,370,
|
||||
linux_literal_casei,1,5,ucg (whitelist),ucg -i PM_RESUME,0.14861536026000977,370,
|
||||
linux_literal_default,1,5,rg,rg PM_RESUME,0.08931398391723633,16,
|
||||
linux_literal_default,1,5,rg,rg PM_RESUME,0.08717465400695801,16,
|
||||
linux_literal_default,1,5,rg,rg PM_RESUME,0.0879361629486084,16,
|
||||
linux_literal_default,1,5,rg,rg PM_RESUME,0.08688950538635254,16,
|
||||
linux_literal_default,1,5,rg,rg PM_RESUME,0.09138607978820801,16,
|
||||
linux_literal_default,1,5,ag,ag PM_RESUME,0.5342838764190674,16,
|
||||
linux_literal_default,1,5,ag,ag PM_RESUME,0.47187042236328125,16,
|
||||
linux_literal_default,1,5,ag,ag PM_RESUME,0.4456596374511719,16,
|
||||
linux_literal_default,1,5,ag,ag PM_RESUME,0.4507424831390381,16,
|
||||
linux_literal_default,1,5,ag,ag PM_RESUME,0.44472575187683105,16,
|
||||
linux_literal_default,1,5,ucg,ucg PM_RESUME,0.15556907653808594,16,
|
||||
linux_literal_default,1,5,ucg,ucg PM_RESUME,0.1533644199371338,16,
|
||||
linux_literal_default,1,5,ucg,ucg PM_RESUME,0.15392351150512695,16,
|
||||
linux_literal_default,1,5,ucg,ucg PM_RESUME,0.1535196304321289,16,
|
||||
linux_literal_default,1,5,ucg,ucg PM_RESUME,0.15589547157287598,16,
|
||||
linux_literal_default,1,5,pt,pt PM_RESUME,0.2261514663696289,16,
|
||||
linux_literal_default,1,5,pt,pt PM_RESUME,0.2731902599334717,16,
|
||||
linux_literal_default,1,5,pt,pt PM_RESUME,0.2563004493713379,16,
|
||||
linux_literal_default,1,5,pt,pt PM_RESUME,0.2575085163116455,16,
|
||||
linux_literal_default,1,5,pt,pt PM_RESUME,0.1724245548248291,16,
|
||||
linux_literal_default,1,5,sift,sift PM_RESUME,0.13233542442321777,16,
|
||||
linux_literal_default,1,5,sift,sift PM_RESUME,0.1256580352783203,16,
|
||||
linux_literal_default,1,5,sift,sift PM_RESUME,0.12435102462768555,16,
|
||||
linux_literal_default,1,5,sift,sift PM_RESUME,0.1259307861328125,16,
|
||||
linux_literal_default,1,5,sift,sift PM_RESUME,0.12412142753601074,16,
|
||||
linux_literal_default,1,5,git grep,git grep PM_RESUME,0.1742086410522461,16,LC_ALL=en_US.UTF-8
|
||||
linux_literal_default,1,5,git grep,git grep PM_RESUME,0.16890597343444824,16,LC_ALL=en_US.UTF-8
|
||||
linux_literal_default,1,5,git grep,git grep PM_RESUME,0.16680669784545898,16,LC_ALL=en_US.UTF-8
|
||||
linux_literal_default,1,5,git grep,git grep PM_RESUME,0.16899871826171875,16,LC_ALL=en_US.UTF-8
|
||||
linux_literal_default,1,5,git grep,git grep PM_RESUME,0.19794917106628418,16,LC_ALL=en_US.UTF-8
|
||||
linux_no_literal,1,5,rg (ignore),rg -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.33940672874450684,490,
|
||||
linux_no_literal,1,5,rg (ignore),rg -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.3274960517883301,490,
|
||||
linux_no_literal,1,5,rg (ignore),rg -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.32681775093078613,490,
|
||||
linux_no_literal,1,5,rg (ignore),rg -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.32865071296691895,490,
|
||||
linux_no_literal,1,5,rg (ignore),rg -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.3240926265716553,490,
|
||||
linux_no_literal,1,5,rg (ignore) (ASCII),rg -n (?-u)\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.17426586151123047,490,
|
||||
linux_no_literal,1,5,rg (ignore) (ASCII),rg -n (?-u)\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.17265701293945312,490,
|
||||
linux_no_literal,1,5,rg (ignore) (ASCII),rg -n (?-u)\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.1703634262084961,490,
|
||||
linux_no_literal,1,5,rg (ignore) (ASCII),rg -n (?-u)\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.17192435264587402,490,
|
||||
linux_no_literal,1,5,rg (ignore) (ASCII),rg -n (?-u)\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.1704559326171875,490,
|
||||
linux_no_literal,1,5,ag (ignore) (ASCII),ag -s \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.8443403244018555,766,
|
||||
linux_no_literal,1,5,ag (ignore) (ASCII),ag -s \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.6956703662872314,766,
|
||||
linux_no_literal,1,5,ag (ignore) (ASCII),ag -s \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.6938261985778809,766,
|
||||
linux_no_literal,1,5,ag (ignore) (ASCII),ag -s \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.695967435836792,766,
|
||||
linux_no_literal,1,5,ag (ignore) (ASCII),ag -s \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.6945271492004395,766,
|
||||
linux_no_literal,1,5,pt (ignore) (ASCII),pt -e \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},12.645716428756714,490,
|
||||
linux_no_literal,1,5,pt (ignore) (ASCII),pt -e \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},12.441533088684082,490,
|
||||
linux_no_literal,1,5,pt (ignore) (ASCII),pt -e \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},12.472522735595703,490,
|
||||
linux_no_literal,1,5,pt (ignore) (ASCII),pt -e \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},12.42497444152832,490,
|
||||
linux_no_literal,1,5,pt (ignore) (ASCII),pt -e \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},12.407486200332642,490,
|
||||
linux_no_literal,1,5,sift (ignore) (ASCII),sift --binary-skip --exclude-files .* --exclude-files *.pdf -n --git \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},9.091489553451538,490,
|
||||
linux_no_literal,1,5,sift (ignore) (ASCII),sift --binary-skip --exclude-files .* --exclude-files *.pdf -n --git \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},9.049214124679565,490,
|
||||
linux_no_literal,1,5,sift (ignore) (ASCII),sift --binary-skip --exclude-files .* --exclude-files *.pdf -n --git \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},8.879419803619385,490,
|
||||
linux_no_literal,1,5,sift (ignore) (ASCII),sift --binary-skip --exclude-files .* --exclude-files *.pdf -n --git \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},9.07261848449707,490,
|
||||
linux_no_literal,1,5,sift (ignore) (ASCII),sift --binary-skip --exclude-files .* --exclude-files *.pdf -n --git \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},8.918747901916504,490,
|
||||
linux_no_literal,1,5,git grep (ignore),git grep -E -I -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},8.334321975708008,490,LC_ALL=en_US.UTF-8
|
||||
linux_no_literal,1,5,git grep (ignore),git grep -E -I -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},8.993232727050781,490,LC_ALL=en_US.UTF-8
|
||||
linux_no_literal,1,5,git grep (ignore),git grep -E -I -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},8.622304916381836,490,LC_ALL=en_US.UTF-8
|
||||
linux_no_literal,1,5,git grep (ignore),git grep -E -I -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},8.35973048210144,490,LC_ALL=en_US.UTF-8
|
||||
linux_no_literal,1,5,git grep (ignore),git grep -E -I -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},8.39980435371399,490,LC_ALL=en_US.UTF-8
|
||||
linux_no_literal,1,5,git grep (ignore) (ASCII),git grep -E -I -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},2.0318400859832764,490,LC_ALL=C
|
||||
linux_no_literal,1,5,git grep (ignore) (ASCII),git grep -E -I -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},1.8587837219238281,490,LC_ALL=C
|
||||
linux_no_literal,1,5,git grep (ignore) (ASCII),git grep -E -I -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},1.873384714126587,490,LC_ALL=C
|
||||
linux_no_literal,1,5,git grep (ignore) (ASCII),git grep -E -I -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},1.8111364841461182,490,LC_ALL=C
|
||||
linux_no_literal,1,5,git grep (ignore) (ASCII),git grep -E -I -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},1.8385357856750488,490,LC_ALL=C
|
||||
linux_no_literal,1,5,rg (whitelist),rg -n --no-ignore -tall \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.28792643547058105,458,
|
||||
linux_no_literal,1,5,rg (whitelist),rg -n --no-ignore -tall \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.28545212745666504,458,
|
||||
linux_no_literal,1,5,rg (whitelist),rg -n --no-ignore -tall \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.28576135635375977,458,
|
||||
linux_no_literal,1,5,rg (whitelist),rg -n --no-ignore -tall \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.29883813858032227,458,
|
||||
linux_no_literal,1,5,rg (whitelist),rg -n --no-ignore -tall \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.28493285179138184,458,
|
||||
linux_no_literal,1,5,rg (whitelist) (ASCII),rg -n --no-ignore -tall (?-u)\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.15974783897399902,458,
|
||||
linux_no_literal,1,5,rg (whitelist) (ASCII),rg -n --no-ignore -tall (?-u)\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.15943312644958496,458,
|
||||
linux_no_literal,1,5,rg (whitelist) (ASCII),rg -n --no-ignore -tall (?-u)\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.160233736038208,458,
|
||||
linux_no_literal,1,5,rg (whitelist) (ASCII),rg -n --no-ignore -tall (?-u)\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.16201996803283691,458,
|
||||
linux_no_literal,1,5,rg (whitelist) (ASCII),rg -n --no-ignore -tall (?-u)\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.16033530235290527,458,
|
||||
linux_no_literal,1,5,ucg (whitelist) (ASCII),ucg --nosmart-case \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.4639148712158203,416,
|
||||
linux_no_literal,1,5,ucg (whitelist) (ASCII),ucg --nosmart-case \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.46042823791503906,416,
|
||||
linux_no_literal,1,5,ucg (whitelist) (ASCII),ucg --nosmart-case \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.45925426483154297,416,
|
||||
linux_no_literal,1,5,ucg (whitelist) (ASCII),ucg --nosmart-case \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.477064847946167,416,
|
||||
linux_no_literal,1,5,ucg (whitelist) (ASCII),ucg --nosmart-case \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.507554292678833,416,
|
||||
linux_re_literal_suffix,1,5,rg (ignore),rg -n [A-Z]+_RESUME,0.08520364761352539,1652,
|
||||
linux_re_literal_suffix,1,5,rg (ignore),rg -n [A-Z]+_RESUME,0.08203816413879395,1652,
|
||||
linux_re_literal_suffix,1,5,rg (ignore),rg -n [A-Z]+_RESUME,0.08355021476745605,1652,
|
||||
linux_re_literal_suffix,1,5,rg (ignore),rg -n [A-Z]+_RESUME,0.0865166187286377,1652,
|
||||
linux_re_literal_suffix,1,5,rg (ignore),rg -n [A-Z]+_RESUME,0.08125448226928711,1652,
|
||||
linux_re_literal_suffix,1,5,ag (ignore),ag -s [A-Z]+_RESUME,0.4846627712249756,1652,
|
||||
linux_re_literal_suffix,1,5,ag (ignore),ag -s [A-Z]+_RESUME,0.48070311546325684,1652,
|
||||
linux_re_literal_suffix,1,5,ag (ignore),ag -s [A-Z]+_RESUME,0.4813041687011719,1652,
|
||||
linux_re_literal_suffix,1,5,ag (ignore),ag -s [A-Z]+_RESUME,0.4755582809448242,1652,
|
||||
linux_re_literal_suffix,1,5,ag (ignore),ag -s [A-Z]+_RESUME,0.4926290512084961,1652,
|
||||
linux_re_literal_suffix,1,5,pt (ignore),pt -e [A-Z]+_RESUME,14.124520540237427,1652,
|
||||
linux_re_literal_suffix,1,5,pt (ignore),pt -e [A-Z]+_RESUME,14.151537656784058,1652,
|
||||
linux_re_literal_suffix,1,5,pt (ignore),pt -e [A-Z]+_RESUME,14.157994270324707,1652,
|
||||
linux_re_literal_suffix,1,5,pt (ignore),pt -e [A-Z]+_RESUME,14.102291822433472,1652,
|
||||
linux_re_literal_suffix,1,5,pt (ignore),pt -e [A-Z]+_RESUME,14.103861093521118,1652,
|
||||
linux_re_literal_suffix,1,5,sift (ignore),sift --binary-skip --exclude-files .* --exclude-files *.pdf -n --git [A-Z]+_RESUME,4.182392835617065,1652,
|
||||
linux_re_literal_suffix,1,5,sift (ignore),sift --binary-skip --exclude-files .* --exclude-files *.pdf -n --git [A-Z]+_RESUME,4.190829277038574,1652,
|
||||
linux_re_literal_suffix,1,5,sift (ignore),sift --binary-skip --exclude-files .* --exclude-files *.pdf -n --git [A-Z]+_RESUME,3.9770240783691406,1652,
|
||||
linux_re_literal_suffix,1,5,sift (ignore),sift --binary-skip --exclude-files .* --exclude-files *.pdf -n --git [A-Z]+_RESUME,3.9978606700897217,1652,
|
||||
linux_re_literal_suffix,1,5,sift (ignore),sift --binary-skip --exclude-files .* --exclude-files *.pdf -n --git [A-Z]+_RESUME,4.146454572677612,1652,
|
||||
linux_re_literal_suffix,1,5,git grep (ignore),git grep -E -I -n [A-Z]+_RESUME,0.5080702304840088,1652,LC_ALL=C
|
||||
linux_re_literal_suffix,1,5,git grep (ignore),git grep -E -I -n [A-Z]+_RESUME,0.5281260013580322,1652,LC_ALL=C
|
||||
linux_re_literal_suffix,1,5,git grep (ignore),git grep -E -I -n [A-Z]+_RESUME,0.5350546836853027,1652,LC_ALL=C
|
||||
linux_re_literal_suffix,1,5,git grep (ignore),git grep -E -I -n [A-Z]+_RESUME,0.5474245548248291,1652,LC_ALL=C
|
||||
linux_re_literal_suffix,1,5,git grep (ignore),git grep -E -I -n [A-Z]+_RESUME,0.5256762504577637,1652,LC_ALL=C
|
||||
linux_re_literal_suffix,1,5,rg (whitelist),rg -n --no-ignore -tall [A-Z]+_RESUME,0.07924222946166992,1630,
|
||||
linux_re_literal_suffix,1,5,rg (whitelist),rg -n --no-ignore -tall [A-Z]+_RESUME,0.0767812728881836,1630,
|
||||
linux_re_literal_suffix,1,5,rg (whitelist),rg -n --no-ignore -tall [A-Z]+_RESUME,0.07874488830566406,1630,
|
||||
linux_re_literal_suffix,1,5,rg (whitelist),rg -n --no-ignore -tall [A-Z]+_RESUME,0.0804905891418457,1630,
|
||||
linux_re_literal_suffix,1,5,rg (whitelist),rg -n --no-ignore -tall [A-Z]+_RESUME,0.07479119300842285,1630,
|
||||
linux_re_literal_suffix,1,5,ucg (whitelist),ucg --nosmart-case [A-Z]+_RESUME,0.13643193244934082,1630,
|
||||
linux_re_literal_suffix,1,5,ucg (whitelist),ucg --nosmart-case [A-Z]+_RESUME,0.13543128967285156,1630,
|
||||
linux_re_literal_suffix,1,5,ucg (whitelist),ucg --nosmart-case [A-Z]+_RESUME,0.13312768936157227,1630,
|
||||
linux_re_literal_suffix,1,5,ucg (whitelist),ucg --nosmart-case [A-Z]+_RESUME,0.13562273979187012,1630,
|
||||
linux_re_literal_suffix,1,5,ucg (whitelist),ucg --nosmart-case [A-Z]+_RESUME,0.13236212730407715,1630,
|
||||
linux_unicode_greek,1,5,rg,rg -n \p{Greek},0.17355775833129883,23,
|
||||
linux_unicode_greek,1,5,rg,rg -n \p{Greek},0.1676032543182373,23,
|
||||
linux_unicode_greek,1,5,rg,rg -n \p{Greek},0.1727275848388672,23,
|
||||
linux_unicode_greek,1,5,rg,rg -n \p{Greek},0.17095375061035156,23,
|
||||
linux_unicode_greek,1,5,rg,rg -n \p{Greek},0.17271947860717773,23,
|
||||
linux_unicode_greek,1,5,pt,pt -e \p{Greek},14.14364218711853,23,
|
||||
linux_unicode_greek,1,5,pt,pt -e \p{Greek},14.137334108352661,23,
|
||||
linux_unicode_greek,1,5,pt,pt -e \p{Greek},14.083475351333618,23,
|
||||
linux_unicode_greek,1,5,pt,pt -e \p{Greek},14.095231056213379,23,
|
||||
linux_unicode_greek,1,5,pt,pt -e \p{Greek},14.151906490325928,23,
|
||||
linux_unicode_greek,1,5,sift,sift --binary-skip --exclude-files .* --exclude-files *.pdf -n --git \p{Greek},2.8376963138580322,23,
|
||||
linux_unicode_greek,1,5,sift,sift --binary-skip --exclude-files .* --exclude-files *.pdf -n --git \p{Greek},2.8271427154541016,23,
|
||||
linux_unicode_greek,1,5,sift,sift --binary-skip --exclude-files .* --exclude-files *.pdf -n --git \p{Greek},2.8310961723327637,23,
|
||||
linux_unicode_greek,1,5,sift,sift --binary-skip --exclude-files .* --exclude-files *.pdf -n --git \p{Greek},2.826141595840454,23,
|
||||
linux_unicode_greek,1,5,sift,sift --binary-skip --exclude-files .* --exclude-files *.pdf -n --git \p{Greek},2.805818796157837,23,
|
||||
linux_unicode_greek_casei,1,5,rg,rg -n -i \p{Greek},0.16843819618225098,103,
|
||||
linux_unicode_greek_casei,1,5,rg,rg -n -i \p{Greek},0.1704998016357422,103,
|
||||
linux_unicode_greek_casei,1,5,rg,rg -n -i \p{Greek},0.17055058479309082,103,
|
||||
linux_unicode_greek_casei,1,5,rg,rg -n -i \p{Greek},0.17064881324768066,103,
|
||||
linux_unicode_greek_casei,1,5,rg,rg -n -i \p{Greek},0.1699228286743164,103,
|
||||
linux_unicode_greek_casei,1,5,pt,pt -i -e \p{Greek},14.164355993270874,23,
|
||||
linux_unicode_greek_casei,1,5,pt,pt -i -e \p{Greek},14.099931478500366,23,
|
||||
linux_unicode_greek_casei,1,5,pt,pt -i -e \p{Greek},14.155095338821411,23,
|
||||
linux_unicode_greek_casei,1,5,pt,pt -i -e \p{Greek},14.109308004379272,23,
|
||||
linux_unicode_greek_casei,1,5,pt,pt -i -e \p{Greek},14.072362422943115,23,
|
||||
linux_unicode_greek_casei,1,5,sift,sift --binary-skip --exclude-files .* --exclude-files *.pdf -n -i --git \p{Greek},0.003945589065551758,,
|
||||
linux_unicode_greek_casei,1,5,sift,sift --binary-skip --exclude-files .* --exclude-files *.pdf -n -i --git \p{Greek},0.004189729690551758,,
|
||||
linux_unicode_greek_casei,1,5,sift,sift --binary-skip --exclude-files .* --exclude-files *.pdf -n -i --git \p{Greek},0.0034589767456054688,,
|
||||
linux_unicode_greek_casei,1,5,sift,sift --binary-skip --exclude-files .* --exclude-files *.pdf -n -i --git \p{Greek},0.003614187240600586,,
|
||||
linux_unicode_greek_casei,1,5,sift,sift --binary-skip --exclude-files .* --exclude-files *.pdf -n -i --git \p{Greek},0.003975629806518555,,
|
||||
linux_unicode_word,1,5,rg (ignore),rg -n \wAh,0.09798526763916016,186,
|
||||
linux_unicode_word,1,5,rg (ignore),rg -n \wAh,0.09575009346008301,186,
|
||||
linux_unicode_word,1,5,rg (ignore),rg -n \wAh,0.10181760787963867,186,
|
||||
linux_unicode_word,1,5,rg (ignore),rg -n \wAh,0.09650158882141113,186,
|
||||
linux_unicode_word,1,5,rg (ignore),rg -n \wAh,0.09717488288879395,186,
|
||||
linux_unicode_word,1,5,rg (ignore) (ASCII),rg -n (?-u)\wAh,0.09417867660522461,174,
|
||||
linux_unicode_word,1,5,rg (ignore) (ASCII),rg -n (?-u)\wAh,0.09903812408447266,174,
|
||||
linux_unicode_word,1,5,rg (ignore) (ASCII),rg -n (?-u)\wAh,0.09407877922058105,174,
|
||||
linux_unicode_word,1,5,rg (ignore) (ASCII),rg -n (?-u)\wAh,0.09681963920593262,174,
|
||||
linux_unicode_word,1,5,rg (ignore) (ASCII),rg -n (?-u)\wAh,0.09762454032897949,174,
|
||||
linux_unicode_word,1,5,ag (ignore) (ASCII),ag -s \wAh,0.5779609680175781,174,
|
||||
linux_unicode_word,1,5,ag (ignore) (ASCII),ag -s \wAh,0.635645866394043,174,
|
||||
linux_unicode_word,1,5,ag (ignore) (ASCII),ag -s \wAh,0.6109263896942139,174,
|
||||
linux_unicode_word,1,5,ag (ignore) (ASCII),ag -s \wAh,0.6260912418365479,174,
|
||||
linux_unicode_word,1,5,ag (ignore) (ASCII),ag -s \wAh,0.6823546886444092,174,
|
||||
linux_unicode_word,1,5,pt (ignore) (ASCII),pt -e \wAh,14.178487062454224,174,
|
||||
linux_unicode_word,1,5,pt (ignore) (ASCII),pt -e \wAh,14.190000057220459,174,
|
||||
linux_unicode_word,1,5,pt (ignore) (ASCII),pt -e \wAh,14.16363000869751,174,
|
||||
linux_unicode_word,1,5,pt (ignore) (ASCII),pt -e \wAh,14.160430431365967,174,
|
||||
linux_unicode_word,1,5,pt (ignore) (ASCII),pt -e \wAh,14.2189621925354,174,
|
||||
linux_unicode_word,1,5,sift (ignore) (ASCII),sift --binary-skip --exclude-files .* --exclude-files *.pdf -n --git \wAh,4.17629337310791,174,
|
||||
linux_unicode_word,1,5,sift (ignore) (ASCII),sift --binary-skip --exclude-files .* --exclude-files *.pdf -n --git \wAh,4.051238059997559,174,
|
||||
linux_unicode_word,1,5,sift (ignore) (ASCII),sift --binary-skip --exclude-files .* --exclude-files *.pdf -n --git \wAh,4.323853015899658,174,
|
||||
linux_unicode_word,1,5,sift (ignore) (ASCII),sift --binary-skip --exclude-files .* --exclude-files *.pdf -n --git \wAh,4.085661172866821,174,
|
||||
linux_unicode_word,1,5,sift (ignore) (ASCII),sift --binary-skip --exclude-files .* --exclude-files *.pdf -n --git \wAh,4.036486625671387,174,
|
||||
linux_unicode_word,1,5,git grep (ignore),git grep -E -I -n \wAh,4.620476961135864,186,LC_ALL=en_US.UTF-8
|
||||
linux_unicode_word,1,5,git grep (ignore),git grep -E -I -n \wAh,4.536192417144775,186,LC_ALL=en_US.UTF-8
|
||||
linux_unicode_word,1,5,git grep (ignore),git grep -E -I -n \wAh,4.510494232177734,186,LC_ALL=en_US.UTF-8
|
||||
linux_unicode_word,1,5,git grep (ignore),git grep -E -I -n \wAh,6.001620769500732,186,LC_ALL=en_US.UTF-8
|
||||
linux_unicode_word,1,5,git grep (ignore),git grep -E -I -n \wAh,4.602652311325073,186,LC_ALL=en_US.UTF-8
|
||||
linux_unicode_word,1,5,git grep (ignore) (ASCII),git grep -E -I -n \wAh,1.3785994052886963,174,LC_ALL=C
|
||||
linux_unicode_word,1,5,git grep (ignore) (ASCII),git grep -E -I -n \wAh,1.4163663387298584,174,LC_ALL=C
|
||||
linux_unicode_word,1,5,git grep (ignore) (ASCII),git grep -E -I -n \wAh,1.402677297592163,174,LC_ALL=C
|
||||
linux_unicode_word,1,5,git grep (ignore) (ASCII),git grep -E -I -n \wAh,1.3327512741088867,174,LC_ALL=C
|
||||
linux_unicode_word,1,5,git grep (ignore) (ASCII),git grep -E -I -n \wAh,1.3501760959625244,174,LC_ALL=C
|
||||
linux_unicode_word,1,5,rg (whitelist),rg -n --no-ignore -tall \wAh,0.07958698272705078,180,
|
||||
linux_unicode_word,1,5,rg (whitelist),rg -n --no-ignore -tall \wAh,0.0798649787902832,180,
|
||||
linux_unicode_word,1,5,rg (whitelist),rg -n --no-ignore -tall \wAh,0.08086204528808594,180,
|
||||
linux_unicode_word,1,5,rg (whitelist),rg -n --no-ignore -tall \wAh,0.0814356803894043,180,
|
||||
linux_unicode_word,1,5,rg (whitelist),rg -n --no-ignore -tall \wAh,0.08273720741271973,180,
|
||||
linux_unicode_word,1,5,rg (whitelist) (ASCII),rg -n --no-ignore -tall (?-u)\wAh,0.08280825614929199,168,
|
||||
linux_unicode_word,1,5,rg (whitelist) (ASCII),rg -n --no-ignore -tall (?-u)\wAh,0.08074021339416504,168,
|
||||
linux_unicode_word,1,5,rg (whitelist) (ASCII),rg -n --no-ignore -tall (?-u)\wAh,0.0821676254272461,168,
|
||||
linux_unicode_word,1,5,rg (whitelist) (ASCII),rg -n --no-ignore -tall (?-u)\wAh,0.07926368713378906,168,
|
||||
linux_unicode_word,1,5,rg (whitelist) (ASCII),rg -n --no-ignore -tall (?-u)\wAh,0.08405280113220215,168,
|
||||
linux_unicode_word,1,5,ucg (ASCII),ucg --nosmart-case \wAh,0.1545090675354004,168,
|
||||
linux_unicode_word,1,5,ucg (ASCII),ucg --nosmart-case \wAh,0.1517190933227539,168,
|
||||
linux_unicode_word,1,5,ucg (ASCII),ucg --nosmart-case \wAh,0.15704965591430664,168,
|
||||
linux_unicode_word,1,5,ucg (ASCII),ucg --nosmart-case \wAh,0.15523767471313477,168,
|
||||
linux_unicode_word,1,5,ucg (ASCII),ucg --nosmart-case \wAh,0.1582942008972168,168,
|
||||
linux_word,1,5,rg (ignore),rg -n -w PM_RESUME,0.09102368354797363,6,
|
||||
linux_word,1,5,rg (ignore),rg -n -w PM_RESUME,0.08986210823059082,6,
|
||||
linux_word,1,5,rg (ignore),rg -n -w PM_RESUME,0.08989477157592773,6,
|
||||
linux_word,1,5,rg (ignore),rg -n -w PM_RESUME,0.0895695686340332,6,
|
||||
linux_word,1,5,rg (ignore),rg -n -w PM_RESUME,0.09547114372253418,6,
|
||||
linux_word,1,5,ag (ignore),ag -s -w PM_RESUME,0.4948008060455322,6,
|
||||
linux_word,1,5,ag (ignore),ag -s -w PM_RESUME,0.45710110664367676,6,
|
||||
linux_word,1,5,ag (ignore),ag -s -w PM_RESUME,0.44803452491760254,6,
|
||||
linux_word,1,5,ag (ignore),ag -s -w PM_RESUME,0.44779396057128906,6,
|
||||
linux_word,1,5,ag (ignore),ag -s -w PM_RESUME,0.4563112258911133,6,
|
||||
linux_word,1,5,pt (ignore),pt -w PM_RESUME,14.233235597610474,6,
|
||||
linux_word,1,5,pt (ignore),pt -w PM_RESUME,14.277648687362671,6,
|
||||
linux_word,1,5,pt (ignore),pt -w PM_RESUME,14.218127727508545,6,
|
||||
linux_word,1,5,pt (ignore),pt -w PM_RESUME,14.171622037887573,6,
|
||||
linux_word,1,5,pt (ignore),pt -w PM_RESUME,14.214240312576294,6,
|
||||
linux_word,1,5,sift (ignore),sift --binary-skip --exclude-files .* --exclude-files *.pdf -n -w --git PM_RESUME,3.1536731719970703,6,
|
||||
linux_word,1,5,sift (ignore),sift --binary-skip --exclude-files .* --exclude-files *.pdf -n -w --git PM_RESUME,3.2415099143981934,6,
|
||||
linux_word,1,5,sift (ignore),sift --binary-skip --exclude-files .* --exclude-files *.pdf -n -w --git PM_RESUME,3.2526626586914062,6,
|
||||
linux_word,1,5,sift (ignore),sift --binary-skip --exclude-files .* --exclude-files *.pdf -n -w --git PM_RESUME,3.2590816020965576,6,
|
||||
linux_word,1,5,sift (ignore),sift --binary-skip --exclude-files .* --exclude-files *.pdf -n -w --git PM_RESUME,3.222473621368408,6,
|
||||
linux_word,1,5,git grep (ignore),git grep -E -I -n -w PM_RESUME,0.16982412338256836,6,LC_ALL=C
|
||||
linux_word,1,5,git grep (ignore),git grep -E -I -n -w PM_RESUME,0.16739583015441895,6,LC_ALL=C
|
||||
linux_word,1,5,git grep (ignore),git grep -E -I -n -w PM_RESUME,0.16866540908813477,6,LC_ALL=C
|
||||
linux_word,1,5,git grep (ignore),git grep -E -I -n -w PM_RESUME,0.18207120895385742,6,LC_ALL=C
|
||||
linux_word,1,5,git grep (ignore),git grep -E -I -n -w PM_RESUME,0.17716264724731445,6,LC_ALL=C
|
||||
linux_word,1,5,rg (whitelist),rg -n -w --no-ignore -tall PM_RESUME,0.07490420341491699,6,
|
||||
linux_word,1,5,rg (whitelist),rg -n -w --no-ignore -tall PM_RESUME,0.07714152336120605,6,
|
||||
linux_word,1,5,rg (whitelist),rg -n -w --no-ignore -tall PM_RESUME,0.07552146911621094,6,
|
||||
linux_word,1,5,rg (whitelist),rg -n -w --no-ignore -tall PM_RESUME,0.07651710510253906,6,
|
||||
linux_word,1,5,rg (whitelist),rg -n -w --no-ignore -tall PM_RESUME,0.0757131576538086,6,
|
||||
linux_word,1,5,ucg (whitelist),ucg --nosmart-case -w PM_RESUME,0.1530015468597412,6,
|
||||
linux_word,1,5,ucg (whitelist),ucg --nosmart-case -w PM_RESUME,0.15152239799499512,6,
|
||||
linux_word,1,5,ucg (whitelist),ucg --nosmart-case -w PM_RESUME,0.1571195125579834,6,
|
||||
linux_word,1,5,ucg (whitelist),ucg --nosmart-case -w PM_RESUME,0.15993595123291016,6,
|
||||
linux_word,1,5,ucg (whitelist),ucg --nosmart-case -w PM_RESUME,0.15633797645568848,6,
|
||||
subtitles_en_alternate,1,5,rg (lines),rg -n Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.33371877670288086,848,
|
||||
subtitles_en_alternate,1,5,rg (lines),rg -n Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.3207988739013672,848,
|
||||
subtitles_en_alternate,1,5,rg (lines),rg -n Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.3301675319671631,848,
|
||||
subtitles_en_alternate,1,5,rg (lines),rg -n Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.29731154441833496,848,
|
||||
subtitles_en_alternate,1,5,rg (lines),rg -n Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.2711911201477051,848,
|
||||
subtitles_en_alternate,1,5,ag (lines),ag -s Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,2.186570405960083,848,
|
||||
subtitles_en_alternate,1,5,ag (lines),ag -s Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,2.1659939289093018,848,
|
||||
subtitles_en_alternate,1,5,ag (lines),ag -s Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,2.187847137451172,848,
|
||||
subtitles_en_alternate,1,5,ag (lines),ag -s Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,2.3522064685821533,848,
|
||||
subtitles_en_alternate,1,5,ag (lines),ag -s Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,2.316105842590332,848,
|
||||
subtitles_en_alternate,1,5,ucg (lines),ucg --nosmart-case Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.1400718688964844,848,
|
||||
subtitles_en_alternate,1,5,ucg (lines),ucg --nosmart-case Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.1492774486541748,848,
|
||||
subtitles_en_alternate,1,5,ucg (lines),ucg --nosmart-case Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.1337254047393799,848,
|
||||
subtitles_en_alternate,1,5,ucg (lines),ucg --nosmart-case Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.1037378311157227,848,
|
||||
subtitles_en_alternate,1,5,ucg (lines),ucg --nosmart-case Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.1312851905822754,848,
|
||||
subtitles_en_alternate,1,5,grep (lines),grep -E -an Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.8294000625610352,848,LC_ALL=C
|
||||
subtitles_en_alternate,1,5,grep (lines),grep -E -an Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.808884620666504,848,LC_ALL=C
|
||||
subtitles_en_alternate,1,5,grep (lines),grep -E -an Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.8134734630584717,848,LC_ALL=C
|
||||
subtitles_en_alternate,1,5,grep (lines),grep -E -an Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.8405649662017822,848,LC_ALL=C
|
||||
subtitles_en_alternate,1,5,grep (lines),grep -E -an Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.8500289916992188,848,LC_ALL=C
|
||||
subtitles_en_alternate,1,5,rg,rg Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.21175312995910645,848,
|
||||
subtitles_en_alternate,1,5,rg,rg Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.2118232250213623,848,
|
||||
subtitles_en_alternate,1,5,rg,rg Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.21287035942077637,848,
|
||||
subtitles_en_alternate,1,5,rg,rg Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.21167230606079102,848,
|
||||
subtitles_en_alternate,1,5,rg,rg Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.28102636337280273,848,
|
||||
subtitles_en_alternate,1,5,grep,grep -E -a Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.5029187202453613,848,LC_ALL=C
|
||||
subtitles_en_alternate,1,5,grep,grep -E -a Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.49977445602417,848,LC_ALL=C
|
||||
subtitles_en_alternate,1,5,grep,grep -E -a Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.508340835571289,848,LC_ALL=C
|
||||
subtitles_en_alternate,1,5,grep,grep -E -a Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.5002548694610596,848,LC_ALL=C
|
||||
subtitles_en_alternate,1,5,grep,grep -E -a Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.629526138305664,848,LC_ALL=C
|
||||
subtitles_en_alternate_casei,1,5,ag (ASCII),ag -s -i Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,3.730497360229492,862,
|
||||
subtitles_en_alternate_casei,1,5,ag (ASCII),ag -s -i Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,3.781018018722534,862,
|
||||
subtitles_en_alternate_casei,1,5,ag (ASCII),ag -s -i Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,3.7858059406280518,862,
|
||||
subtitles_en_alternate_casei,1,5,ag (ASCII),ag -s -i Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,3.7127914428710938,862,
|
||||
subtitles_en_alternate_casei,1,5,ag (ASCII),ag -s -i Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,3.717308759689331,862,
|
||||
subtitles_en_alternate_casei,1,5,ucg (ASCII),ucg -i Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,2.428208351135254,862,
|
||||
subtitles_en_alternate_casei,1,5,ucg (ASCII),ucg -i Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,2.389420509338379,862,
|
||||
subtitles_en_alternate_casei,1,5,ucg (ASCII),ucg -i Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,2.403301954269409,862,
|
||||
subtitles_en_alternate_casei,1,5,ucg (ASCII),ucg -i Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,2.4691550731658936,862,
|
||||
subtitles_en_alternate_casei,1,5,ucg (ASCII),ucg -i Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,2.4245004653930664,862,
|
||||
subtitles_en_alternate_casei,1,5,grep (ASCII),grep -E -ani Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,2.978189706802368,862,LC_ALL=C
|
||||
subtitles_en_alternate_casei,1,5,grep (ASCII),grep -E -ani Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,2.974303722381592,862,LC_ALL=C
|
||||
subtitles_en_alternate_casei,1,5,grep (ASCII),grep -E -ani Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,2.982886552810669,862,LC_ALL=C
|
||||
subtitles_en_alternate_casei,1,5,grep (ASCII),grep -E -ani Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,2.90018630027771,862,LC_ALL=C
|
||||
subtitles_en_alternate_casei,1,5,grep (ASCII),grep -E -ani Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,3.0078439712524414,862,LC_ALL=C
|
||||
subtitles_en_alternate_casei,1,5,rg,rg -n -i Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.9129142761230469,862,
|
||||
subtitles_en_alternate_casei,1,5,rg,rg -n -i Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.9066660404205322,862,
|
||||
subtitles_en_alternate_casei,1,5,rg,rg -n -i Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.946380615234375,862,
|
||||
subtitles_en_alternate_casei,1,5,rg,rg -n -i Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.9672930240631104,862,
|
||||
subtitles_en_alternate_casei,1,5,rg,rg -n -i Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,2.028451919555664,862,
|
||||
subtitles_en_alternate_casei,1,5,grep,grep -E -ani Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,2.9427030086517334,862,LC_ALL=en_US.UTF-8
|
||||
subtitles_en_alternate_casei,1,5,grep,grep -E -ani Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,2.938739061355591,862,LC_ALL=en_US.UTF-8
|
||||
subtitles_en_alternate_casei,1,5,grep,grep -E -ani Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,2.921248435974121,862,LC_ALL=en_US.UTF-8
|
||||
subtitles_en_alternate_casei,1,5,grep,grep -E -ani Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,2.9194068908691406,862,LC_ALL=en_US.UTF-8
|
||||
subtitles_en_alternate_casei,1,5,grep,grep -E -ani Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,2.917184829711914,862,LC_ALL=en_US.UTF-8
|
||||
subtitles_en_literal,1,5,rg,rg Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.12293672561645508,629,
|
||||
subtitles_en_literal,1,5,rg,rg Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.1259000301361084,629,
|
||||
subtitles_en_literal,1,5,rg,rg Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.12285709381103516,629,
|
||||
subtitles_en_literal,1,5,rg,rg Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.12280964851379395,629,
|
||||
subtitles_en_literal,1,5,rg,rg Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.1547396183013916,629,
|
||||
subtitles_en_literal,1,5,rg (no mmap),rg --no-mmap Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.22011375427246094,629,
|
||||
subtitles_en_literal,1,5,rg (no mmap),rg --no-mmap Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.23095202445983887,629,
|
||||
subtitles_en_literal,1,5,rg (no mmap),rg --no-mmap Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.2577846050262451,629,
|
||||
subtitles_en_literal,1,5,rg (no mmap),rg --no-mmap Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.2563819885253906,629,
|
||||
subtitles_en_literal,1,5,rg (no mmap),rg --no-mmap Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.24869346618652344,629,
|
||||
subtitles_en_literal,1,5,pt,pt -N Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.415337324142456,629,
|
||||
subtitles_en_literal,1,5,pt,pt -N Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.4208543300628662,629,
|
||||
subtitles_en_literal,1,5,pt,pt -N Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.416351079940796,629,
|
||||
subtitles_en_literal,1,5,pt,pt -N Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.4270708560943604,629,
|
||||
subtitles_en_literal,1,5,pt,pt -N Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.4243996143341064,629,
|
||||
subtitles_en_literal,1,5,sift,sift Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.2245020866394043,629,
|
||||
subtitles_en_literal,1,5,sift,sift Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.2382345199584961,629,
|
||||
subtitles_en_literal,1,5,sift,sift Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.23533034324645996,629,
|
||||
subtitles_en_literal,1,5,sift,sift Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.2577829360961914,629,
|
||||
subtitles_en_literal,1,5,sift,sift Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.2599349021911621,629,
|
||||
subtitles_en_literal,1,5,grep,grep -a Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.4733700752258301,629,LC_ALL=C
|
||||
subtitles_en_literal,1,5,grep,grep -a Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.4598572254180908,629,LC_ALL=C
|
||||
subtitles_en_literal,1,5,grep,grep -a Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.5303301811218262,629,LC_ALL=C
|
||||
subtitles_en_literal,1,5,grep,grep -a Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.4775106906890869,629,LC_ALL=C
|
||||
subtitles_en_literal,1,5,grep,grep -a Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.4881136417388916,629,LC_ALL=C
|
||||
subtitles_en_literal,1,5,rg (lines),rg -n Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.20051789283752441,629,
|
||||
subtitles_en_literal,1,5,rg (lines),rg -n Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.17326998710632324,629,
|
||||
subtitles_en_literal,1,5,rg (lines),rg -n Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.20733428001403809,629,
|
||||
subtitles_en_literal,1,5,rg (lines),rg -n Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.189713716506958,629,
|
||||
subtitles_en_literal,1,5,rg (lines),rg -n Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.17817258834838867,629,
|
||||
subtitles_en_literal,1,5,ag (lines),ag -s Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.5327835083007812,629,
|
||||
subtitles_en_literal,1,5,ag (lines),ag -s Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.5411181449890137,629,
|
||||
subtitles_en_literal,1,5,ag (lines),ag -s Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.600783109664917,629,
|
||||
subtitles_en_literal,1,5,ag (lines),ag -s Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.5838911533355713,629,
|
||||
subtitles_en_literal,1,5,ag (lines),ag -s Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.6051928997039795,629,
|
||||
subtitles_en_literal,1,5,ucg (lines),ucg --nosmart-case Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.4090385437011719,629,
|
||||
subtitles_en_literal,1,5,ucg (lines),ucg --nosmart-case Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.3816399574279785,629,
|
||||
subtitles_en_literal,1,5,ucg (lines),ucg --nosmart-case Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.38033008575439453,629,
|
||||
subtitles_en_literal,1,5,ucg (lines),ucg --nosmart-case Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.3731727600097656,629,
|
||||
subtitles_en_literal,1,5,ucg (lines),ucg --nosmart-case Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.38796329498291016,629,
|
||||
subtitles_en_literal,1,5,pt (lines),pt Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.4102630615234375,629,
|
||||
subtitles_en_literal,1,5,pt (lines),pt Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.4137451648712158,629,
|
||||
subtitles_en_literal,1,5,pt (lines),pt Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.4649333953857422,629,
|
||||
subtitles_en_literal,1,5,pt (lines),pt Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.430387258529663,629,
|
||||
subtitles_en_literal,1,5,pt (lines),pt Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.541991949081421,629,
|
||||
subtitles_en_literal,1,5,sift (lines),sift -n Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.6231405735015869,629,
|
||||
subtitles_en_literal,1,5,sift (lines),sift -n Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.5986526012420654,629,
|
||||
subtitles_en_literal,1,5,sift (lines),sift -n Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.5821917057037354,629,
|
||||
subtitles_en_literal,1,5,sift (lines),sift -n Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.6045489311218262,629,
|
||||
subtitles_en_literal,1,5,sift (lines),sift -n Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.5986905097961426,629,
|
||||
subtitles_en_literal,1,5,grep (lines),grep -an Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.8278565406799316,629,LC_ALL=C
|
||||
subtitles_en_literal,1,5,grep (lines),grep -an Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.777052640914917,629,LC_ALL=C
|
||||
subtitles_en_literal,1,5,grep (lines),grep -an Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.7619414329528809,629,LC_ALL=C
|
||||
subtitles_en_literal,1,5,grep (lines),grep -an Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.8248744010925293,629,LC_ALL=C
|
||||
subtitles_en_literal,1,5,grep (lines),grep -an Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.824932336807251,629,LC_ALL=C
|
||||
subtitles_en_literal_casei,1,5,rg,rg -i Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.2718961238861084,642,
|
||||
subtitles_en_literal_casei,1,5,rg,rg -i Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.27082157135009766,642,
|
||||
subtitles_en_literal_casei,1,5,rg,rg -i Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.27086758613586426,642,
|
||||
subtitles_en_literal_casei,1,5,rg,rg -i Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.274705171585083,642,
|
||||
subtitles_en_literal_casei,1,5,rg,rg -i Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.3337059020996094,642,
|
||||
subtitles_en_literal_casei,1,5,grep,grep -ai Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.9112112522125244,642,LC_ALL=en_US.UTF-8
|
||||
subtitles_en_literal_casei,1,5,grep,grep -ai Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.907888650894165,642,LC_ALL=en_US.UTF-8
|
||||
subtitles_en_literal_casei,1,5,grep,grep -ai Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.912668228149414,642,LC_ALL=en_US.UTF-8
|
||||
subtitles_en_literal_casei,1,5,grep,grep -ai Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.9082865715026855,642,LC_ALL=en_US.UTF-8
|
||||
subtitles_en_literal_casei,1,5,grep,grep -ai Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.9177796840667725,642,LC_ALL=en_US.UTF-8
|
||||
subtitles_en_literal_casei,1,5,grep (ASCII),grep -E -ai Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.6020669937133789,642,LC_ALL=C
|
||||
subtitles_en_literal_casei,1,5,grep (ASCII),grep -E -ai Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.568228006362915,642,LC_ALL=C
|
||||
subtitles_en_literal_casei,1,5,grep (ASCII),grep -E -ai Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.5648214817047119,642,LC_ALL=C
|
||||
subtitles_en_literal_casei,1,5,grep (ASCII),grep -E -ai Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.5568234920501709,642,LC_ALL=C
|
||||
subtitles_en_literal_casei,1,5,grep (ASCII),grep -E -ai Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.5588953495025635,642,LC_ALL=C
|
||||
subtitles_en_literal_casei,1,5,rg (lines),rg -n -i Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.3486766815185547,642,
|
||||
subtitles_en_literal_casei,1,5,rg (lines),rg -n -i Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.34010815620422363,642,
|
||||
subtitles_en_literal_casei,1,5,rg (lines),rg -n -i Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.33849263191223145,642,
|
||||
subtitles_en_literal_casei,1,5,rg (lines),rg -n -i Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.3917088508605957,642,
|
||||
subtitles_en_literal_casei,1,5,rg (lines),rg -n -i Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.39266490936279297,642,
|
||||
subtitles_en_literal_casei,1,5,ag (lines) (ASCII),ag -i Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.5564041137695312,642,
|
||||
subtitles_en_literal_casei,1,5,ag (lines) (ASCII),ag -i Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.5533506870269775,642,
|
||||
subtitles_en_literal_casei,1,5,ag (lines) (ASCII),ag -i Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.6205368041992188,642,
|
||||
subtitles_en_literal_casei,1,5,ag (lines) (ASCII),ag -i Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.5530028343200684,642,
|
||||
subtitles_en_literal_casei,1,5,ag (lines) (ASCII),ag -i Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.6189889907836914,642,
|
||||
subtitles_en_literal_casei,1,5,ucg (lines) (ASCII),ucg -i Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.3834850788116455,642,
|
||||
subtitles_en_literal_casei,1,5,ucg (lines) (ASCII),ucg -i Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.41916346549987793,642,
|
||||
subtitles_en_literal_casei,1,5,ucg (lines) (ASCII),ucg -i Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.3895289897918701,642,
|
||||
subtitles_en_literal_casei,1,5,ucg (lines) (ASCII),ucg -i Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.4278140068054199,642,
|
||||
subtitles_en_literal_casei,1,5,ucg (lines) (ASCII),ucg -i Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.4013493061065674,642,
|
||||
subtitles_en_literal_word,1,5,rg (ASCII),rg -n (?-u:\b)Sherlock Holmes(?-u:\b) /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.17953085899353027,629,
|
||||
subtitles_en_literal_word,1,5,rg (ASCII),rg -n (?-u:\b)Sherlock Holmes(?-u:\b) /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.17679834365844727,629,
|
||||
subtitles_en_literal_word,1,5,rg (ASCII),rg -n (?-u:\b)Sherlock Holmes(?-u:\b) /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.17448186874389648,629,
|
||||
subtitles_en_literal_word,1,5,rg (ASCII),rg -n (?-u:\b)Sherlock Holmes(?-u:\b) /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.21117281913757324,629,
|
||||
subtitles_en_literal_word,1,5,rg (ASCII),rg -n (?-u:\b)Sherlock Holmes(?-u:\b) /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.1848156452178955,629,
|
||||
subtitles_en_literal_word,1,5,ag (ASCII),ag -sw Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.5236153602600098,629,
|
||||
subtitles_en_literal_word,1,5,ag (ASCII),ag -sw Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.52512526512146,629,
|
||||
subtitles_en_literal_word,1,5,ag (ASCII),ag -sw Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.5218794345855713,629,
|
||||
subtitles_en_literal_word,1,5,ag (ASCII),ag -sw Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.5384306907653809,629,
|
||||
subtitles_en_literal_word,1,5,ag (ASCII),ag -sw Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.5150353908538818,629,
|
||||
subtitles_en_literal_word,1,5,ucg (ASCII),ucg --nosmart-case Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.3757903575897217,629,
|
||||
subtitles_en_literal_word,1,5,ucg (ASCII),ucg --nosmart-case Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.3744041919708252,629,
|
||||
subtitles_en_literal_word,1,5,ucg (ASCII),ucg --nosmart-case Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.37261366844177246,629,
|
||||
subtitles_en_literal_word,1,5,ucg (ASCII),ucg --nosmart-case Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.40795230865478516,629,
|
||||
subtitles_en_literal_word,1,5,ucg (ASCII),ucg --nosmart-case Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.3868849277496338,629,
|
||||
subtitles_en_literal_word,1,5,grep (ASCII),grep -anw Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.8265349864959717,629,LC_ALL=C
|
||||
subtitles_en_literal_word,1,5,grep (ASCII),grep -anw Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.8123743534088135,629,LC_ALL=C
|
||||
subtitles_en_literal_word,1,5,grep (ASCII),grep -anw Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.7669925689697266,629,LC_ALL=C
|
||||
subtitles_en_literal_word,1,5,grep (ASCII),grep -anw Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.766636848449707,629,LC_ALL=C
|
||||
subtitles_en_literal_word,1,5,grep (ASCII),grep -anw Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.7665839195251465,629,LC_ALL=C
|
||||
subtitles_en_literal_word,1,5,rg,rg -nw Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.1879115104675293,629,
|
||||
subtitles_en_literal_word,1,5,rg,rg -nw Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.18082356452941895,629,
|
||||
subtitles_en_literal_word,1,5,rg,rg -nw Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.18497347831726074,629,
|
||||
subtitles_en_literal_word,1,5,rg,rg -nw Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.1769394874572754,629,
|
||||
subtitles_en_literal_word,1,5,rg,rg -nw Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.1917715072631836,629,
|
||||
subtitles_en_literal_word,1,5,grep,grep -anw Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.8192996978759766,629,LC_ALL=en_US.UTF-8
|
||||
subtitles_en_literal_word,1,5,grep,grep -anw Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.8193323612213135,629,LC_ALL=en_US.UTF-8
|
||||
subtitles_en_literal_word,1,5,grep,grep -anw Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.7837738990783691,629,LC_ALL=en_US.UTF-8
|
||||
subtitles_en_literal_word,1,5,grep,grep -anw Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.7639024257659912,629,LC_ALL=en_US.UTF-8
|
||||
subtitles_en_literal_word,1,5,grep,grep -anw Sherlock Holmes /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.7634689807891846,629,LC_ALL=en_US.UTF-8
|
||||
subtitles_en_no_literal,1,5,rg,rg -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.7922985553741455,13,
|
||||
subtitles_en_no_literal,1,5,rg,rg -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.7885758876800537,13,
|
||||
subtitles_en_no_literal,1,5,rg,rg -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.802325963973999,13,
|
||||
subtitles_en_no_literal,1,5,rg,rg -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.792595386505127,13,
|
||||
subtitles_en_no_literal,1,5,rg,rg -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.7909605503082275,13,
|
||||
subtitles_en_no_literal,1,5,rg (ASCII),rg -n (?-u)\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.5903098583221436,13,
|
||||
subtitles_en_no_literal,1,5,rg (ASCII),rg -n (?-u)\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.5982813835144043,13,
|
||||
subtitles_en_no_literal,1,5,rg (ASCII),rg -n (?-u)\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.5926671028137207,13,
|
||||
subtitles_en_no_literal,1,5,rg (ASCII),rg -n (?-u)\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.5976767539978027,13,
|
||||
subtitles_en_no_literal,1,5,rg (ASCII),rg -n (?-u)\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.593153953552246,13,
|
||||
subtitles_en_no_literal,1,5,ag (ASCII),ag -s \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,6.614634275436401,48,
|
||||
subtitles_en_no_literal,1,5,ag (ASCII),ag -s \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,6.574857473373413,48,
|
||||
subtitles_en_no_literal,1,5,ag (ASCII),ag -s \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,6.54079270362854,48,
|
||||
subtitles_en_no_literal,1,5,ag (ASCII),ag -s \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,6.600660800933838,48,
|
||||
subtitles_en_no_literal,1,5,ag (ASCII),ag -s \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,6.531627178192139,48,
|
||||
subtitles_en_no_literal,1,5,ucg (ASCII),ucg --nosmart-case \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,5.361133337020874,13,
|
||||
subtitles_en_no_literal,1,5,ucg (ASCII),ucg --nosmart-case \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,5.456786870956421,13,
|
||||
subtitles_en_no_literal,1,5,ucg (ASCII),ucg --nosmart-case \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,5.403071403503418,13,
|
||||
subtitles_en_no_literal,1,5,ucg (ASCII),ucg --nosmart-case \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,5.398236274719238,13,
|
||||
subtitles_en_no_literal,1,5,ucg (ASCII),ucg --nosmart-case \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,5.348573923110962,13,
|
||||
subtitles_en_no_literal,1,5,grep (ASCII),grep -E -an \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,3.5057969093322754,13,LC_ALL=C
|
||||
subtitles_en_no_literal,1,5,grep (ASCII),grep -E -an \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,3.4157862663269043,13,LC_ALL=C
|
||||
subtitles_en_no_literal,1,5,grep (ASCII),grep -E -an \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,3.471182346343994,13,LC_ALL=C
|
||||
subtitles_en_no_literal,1,5,grep (ASCII),grep -E -an \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,3.4590909481048584,13,LC_ALL=C
|
||||
subtitles_en_no_literal,1,5,grep (ASCII),grep -E -an \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,3.3759689331054688,13,LC_ALL=C
|
||||
subtitles_en_surrounding_words,1,5,rg,rg -n \w+\s+Holmes\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.18518710136413574,317,
|
||||
subtitles_en_surrounding_words,1,5,rg,rg -n \w+\s+Holmes\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.18791556358337402,317,
|
||||
subtitles_en_surrounding_words,1,5,rg,rg -n \w+\s+Holmes\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.18598675727844238,317,
|
||||
subtitles_en_surrounding_words,1,5,rg,rg -n \w+\s+Holmes\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.18552684783935547,317,
|
||||
subtitles_en_surrounding_words,1,5,rg,rg -n \w+\s+Holmes\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.19262075424194336,317,
|
||||
subtitles_en_surrounding_words,1,5,grep,grep -E -an \w+\s+Holmes\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.1321008205413818,317,LC_ALL=en_US.UTF-8
|
||||
subtitles_en_surrounding_words,1,5,grep,grep -E -an \w+\s+Holmes\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.0709969997406006,317,LC_ALL=en_US.UTF-8
|
||||
subtitles_en_surrounding_words,1,5,grep,grep -E -an \w+\s+Holmes\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.1117346286773682,317,LC_ALL=en_US.UTF-8
|
||||
subtitles_en_surrounding_words,1,5,grep,grep -E -an \w+\s+Holmes\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.0880234241485596,317,LC_ALL=en_US.UTF-8
|
||||
subtitles_en_surrounding_words,1,5,grep,grep -E -an \w+\s+Holmes\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.0745558738708496,317,LC_ALL=en_US.UTF-8
|
||||
subtitles_en_surrounding_words,1,5,rg (ASCII),rg -n (?-u)\w+\s+Holmes\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.1827528476715088,317,
|
||||
subtitles_en_surrounding_words,1,5,rg (ASCII),rg -n (?-u)\w+\s+Holmes\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.18874144554138184,317,
|
||||
subtitles_en_surrounding_words,1,5,rg (ASCII),rg -n (?-u)\w+\s+Holmes\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.17983436584472656,317,
|
||||
subtitles_en_surrounding_words,1,5,rg (ASCII),rg -n (?-u)\w+\s+Holmes\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.18831133842468262,317,
|
||||
subtitles_en_surrounding_words,1,5,rg (ASCII),rg -n (?-u)\w+\s+Holmes\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.17810606956481934,317,
|
||||
subtitles_en_surrounding_words,1,5,ag (ASCII),ag -s \w+\s+Holmes\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,4.5957207679748535,323,
|
||||
subtitles_en_surrounding_words,1,5,ag (ASCII),ag -s \w+\s+Holmes\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,4.627211570739746,323,
|
||||
subtitles_en_surrounding_words,1,5,ag (ASCII),ag -s \w+\s+Holmes\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,4.554431200027466,323,
|
||||
subtitles_en_surrounding_words,1,5,ag (ASCII),ag -s \w+\s+Holmes\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,4.492656469345093,323,
|
||||
subtitles_en_surrounding_words,1,5,ag (ASCII),ag -s \w+\s+Holmes\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,4.443558216094971,323,
|
||||
subtitles_en_surrounding_words,1,5,ucg (ASCII),ucg --nosmart-case \w+\s+Holmes\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,3.522758722305298,317,
|
||||
subtitles_en_surrounding_words,1,5,ucg (ASCII),ucg --nosmart-case \w+\s+Holmes\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,3.502918004989624,317,
|
||||
subtitles_en_surrounding_words,1,5,ucg (ASCII),ucg --nosmart-case \w+\s+Holmes\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,3.6503307819366455,317,
|
||||
subtitles_en_surrounding_words,1,5,ucg (ASCII),ucg --nosmart-case \w+\s+Holmes\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,3.58940052986145,317,
|
||||
subtitles_en_surrounding_words,1,5,ucg (ASCII),ucg --nosmart-case \w+\s+Holmes\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,3.569624423980713,317,
|
||||
subtitles_en_surrounding_words,1,5,grep (ASCII),grep -E -an \w+\s+Holmes\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.0672054290771484,317,LC_ALL=C
|
||||
subtitles_en_surrounding_words,1,5,grep (ASCII),grep -E -an \w+\s+Holmes\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.0729331970214844,317,LC_ALL=C
|
||||
subtitles_en_surrounding_words,1,5,grep (ASCII),grep -E -an \w+\s+Holmes\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.052501916885376,317,LC_ALL=C
|
||||
subtitles_en_surrounding_words,1,5,grep (ASCII),grep -E -an \w+\s+Holmes\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.0711696147918701,317,LC_ALL=C
|
||||
subtitles_en_surrounding_words,1,5,grep (ASCII),grep -E -an \w+\s+Holmes\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,1.0863316059112549,317,LC_ALL=C
|
||||
subtitles_ru_alternate,1,5,rg (lines),rg -n Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.0312588214874268,691,
|
||||
subtitles_ru_alternate,1,5,rg (lines),rg -n Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.063939094543457,691,
|
||||
subtitles_ru_alternate,1,5,rg (lines),rg -n Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.0000121593475342,691,
|
||||
subtitles_ru_alternate,1,5,rg (lines),rg -n Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.9842438697814941,691,
|
||||
subtitles_ru_alternate,1,5,rg (lines),rg -n Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.95733642578125,691,
|
||||
subtitles_ru_alternate,1,5,ag (lines),ag -s Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,3.7781903743743896,691,
|
||||
subtitles_ru_alternate,1,5,ag (lines),ag -s Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,3.861164093017578,691,
|
||||
subtitles_ru_alternate,1,5,ag (lines),ag -s Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,3.8268885612487793,691,
|
||||
subtitles_ru_alternate,1,5,ag (lines),ag -s Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,3.8621268272399902,691,
|
||||
subtitles_ru_alternate,1,5,ag (lines),ag -s Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,3.8216166496276855,691,
|
||||
subtitles_ru_alternate,1,5,ucg (lines),ucg --nosmart-case Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.0069098472595215,691,
|
||||
subtitles_ru_alternate,1,5,ucg (lines),ucg --nosmart-case Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.025178909301758,691,
|
||||
subtitles_ru_alternate,1,5,ucg (lines),ucg --nosmart-case Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.0631070137023926,691,
|
||||
subtitles_ru_alternate,1,5,ucg (lines),ucg --nosmart-case Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.0902633666992188,691,
|
||||
subtitles_ru_alternate,1,5,ucg (lines),ucg --nosmart-case Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.0272655487060547,691,
|
||||
subtitles_ru_alternate,1,5,grep (lines),grep -E -an Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,7.510146617889404,691,LC_ALL=C
|
||||
subtitles_ru_alternate,1,5,grep (lines),grep -E -an Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,7.541701793670654,691,LC_ALL=C
|
||||
subtitles_ru_alternate,1,5,grep (lines),grep -E -an Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,7.506088733673096,691,LC_ALL=C
|
||||
subtitles_ru_alternate,1,5,grep (lines),grep -E -an Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,7.51838755607605,691,LC_ALL=C
|
||||
subtitles_ru_alternate,1,5,grep (lines),grep -E -an Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,7.486810684204102,691,LC_ALL=C
|
||||
subtitles_ru_alternate,1,5,rg,rg Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.9679937362670898,691,
|
||||
subtitles_ru_alternate,1,5,rg,rg Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.9942011833190918,691,
|
||||
subtitles_ru_alternate,1,5,rg,rg Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.9233448505401611,691,
|
||||
subtitles_ru_alternate,1,5,rg,rg Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.9294781684875488,691,
|
||||
subtitles_ru_alternate,1,5,rg,rg Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.8729774951934814,691,
|
||||
subtitles_ru_alternate,1,5,grep,grep -E -a Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,7.100147485733032,691,LC_ALL=C
|
||||
subtitles_ru_alternate,1,5,grep,grep -E -a Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,7.075790166854858,691,LC_ALL=C
|
||||
subtitles_ru_alternate,1,5,grep,grep -E -a Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,7.069685220718384,691,LC_ALL=C
|
||||
subtitles_ru_alternate,1,5,grep,grep -E -a Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,7.0526063442230225,691,LC_ALL=C
|
||||
subtitles_ru_alternate,1,5,grep,grep -E -a Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,7.129194498062134,691,LC_ALL=C
|
||||
subtitles_ru_alternate_casei,1,5,ag (ASCII),ag -s -i Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,3.7894201278686523,691,
|
||||
subtitles_ru_alternate_casei,1,5,ag (ASCII),ag -s -i Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,3.7878782749176025,691,
|
||||
subtitles_ru_alternate_casei,1,5,ag (ASCII),ag -s -i Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,3.796328544616699,691,
|
||||
subtitles_ru_alternate_casei,1,5,ag (ASCII),ag -s -i Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,3.8249149322509766,691,
|
||||
subtitles_ru_alternate_casei,1,5,ag (ASCII),ag -s -i Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,3.7949724197387695,691,
|
||||
subtitles_ru_alternate_casei,1,5,ucg (ASCII),ucg -i Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.075739622116089,691,
|
||||
subtitles_ru_alternate_casei,1,5,ucg (ASCII),ucg -i Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.013590097427368,691,
|
||||
subtitles_ru_alternate_casei,1,5,ucg (ASCII),ucg -i Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.012375593185425,691,
|
||||
subtitles_ru_alternate_casei,1,5,ucg (ASCII),ucg -i Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.023118495941162,691,
|
||||
subtitles_ru_alternate_casei,1,5,ucg (ASCII),ucg -i Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.0641982555389404,691,
|
||||
subtitles_ru_alternate_casei,1,5,grep (ASCII),grep -E -ani Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,7.467320442199707,691,LC_ALL=C
|
||||
subtitles_ru_alternate_casei,1,5,grep (ASCII),grep -E -ani Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,7.486851692199707,691,LC_ALL=C
|
||||
subtitles_ru_alternate_casei,1,5,grep (ASCII),grep -E -ani Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,7.479818344116211,691,LC_ALL=C
|
||||
subtitles_ru_alternate_casei,1,5,grep (ASCII),grep -E -ani Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,7.516186475753784,691,LC_ALL=C
|
||||
subtitles_ru_alternate_casei,1,5,grep (ASCII),grep -E -ani Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,7.471773862838745,691,LC_ALL=C
|
||||
subtitles_ru_alternate_casei,1,5,rg,rg -n -i Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,11.026185274124146,735,
|
||||
subtitles_ru_alternate_casei,1,5,rg,rg -n -i Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,11.168465614318848,735,
|
||||
subtitles_ru_alternate_casei,1,5,rg,rg -n -i Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,11.039950370788574,735,
|
||||
subtitles_ru_alternate_casei,1,5,rg,rg -n -i Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,11.089850425720215,735,
|
||||
subtitles_ru_alternate_casei,1,5,rg,rg -n -i Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,11.112446546554565,735,
|
||||
subtitles_ru_alternate_casei,1,5,grep,grep -E -ani Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,6.822641849517822,735,LC_ALL=en_US.UTF-8
|
||||
subtitles_ru_alternate_casei,1,5,grep,grep -E -ani Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,6.808355331420898,735,LC_ALL=en_US.UTF-8
|
||||
subtitles_ru_alternate_casei,1,5,grep,grep -E -ani Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,6.80171275138855,735,LC_ALL=en_US.UTF-8
|
||||
subtitles_ru_alternate_casei,1,5,grep,grep -E -ani Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,6.794351577758789,735,LC_ALL=en_US.UTF-8
|
||||
subtitles_ru_alternate_casei,1,5,grep,grep -E -ani Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,6.844403266906738,735,LC_ALL=en_US.UTF-8
|
||||
subtitles_ru_literal,1,5,rg,rg Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.20681476593017578,583,
|
||||
subtitles_ru_literal,1,5,rg,rg Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.190568208694458,583,
|
||||
subtitles_ru_literal,1,5,rg,rg Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.18462657928466797,583,
|
||||
subtitles_ru_literal,1,5,rg,rg Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.1873643398284912,583,
|
||||
subtitles_ru_literal,1,5,rg,rg Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.20382428169250488,583,
|
||||
subtitles_ru_literal,1,5,rg (no mmap),rg --no-mmap Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.3085510730743408,583,
|
||||
subtitles_ru_literal,1,5,rg (no mmap),rg --no-mmap Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.318758487701416,583,
|
||||
subtitles_ru_literal,1,5,rg (no mmap),rg --no-mmap Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.3177149295806885,583,
|
||||
subtitles_ru_literal,1,5,rg (no mmap),rg --no-mmap Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.31236958503723145,583,
|
||||
subtitles_ru_literal,1,5,rg (no mmap),rg --no-mmap Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.31880998611450195,583,
|
||||
subtitles_ru_literal,1,5,pt,pt -N Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,5.152938365936279,583,
|
||||
subtitles_ru_literal,1,5,pt,pt -N Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,5.124867677688599,583,
|
||||
subtitles_ru_literal,1,5,pt,pt -N Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,5.132290363311768,583,
|
||||
subtitles_ru_literal,1,5,pt,pt -N Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,5.158328056335449,583,
|
||||
subtitles_ru_literal,1,5,pt,pt -N Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,5.1022467613220215,583,
|
||||
subtitles_ru_literal,1,5,sift,sift Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,5.807113409042358,583,
|
||||
subtitles_ru_literal,1,5,sift,sift Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,5.8178558349609375,583,
|
||||
subtitles_ru_literal,1,5,sift,sift Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,5.925220012664795,583,
|
||||
subtitles_ru_literal,1,5,sift,sift Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,5.861236333847046,583,
|
||||
subtitles_ru_literal,1,5,sift,sift Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,5.763278484344482,583,
|
||||
subtitles_ru_literal,1,5,grep,grep -a Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.704503059387207,583,LC_ALL=C
|
||||
subtitles_ru_literal,1,5,grep,grep -a Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.6887199878692627,583,LC_ALL=C
|
||||
subtitles_ru_literal,1,5,grep,grep -a Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.7092702388763428,583,LC_ALL=C
|
||||
subtitles_ru_literal,1,5,grep,grep -a Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.6964359283447266,583,LC_ALL=C
|
||||
subtitles_ru_literal,1,5,grep,grep -a Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.6928379535675049,583,LC_ALL=C
|
||||
subtitles_ru_literal,1,5,rg (lines),rg -n Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.2646975517272949,583,
|
||||
subtitles_ru_literal,1,5,rg (lines),rg -n Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.26806163787841797,583,
|
||||
subtitles_ru_literal,1,5,rg (lines),rg -n Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.2700214385986328,583,
|
||||
subtitles_ru_literal,1,5,rg (lines),rg -n Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.2669072151184082,583,
|
||||
subtitles_ru_literal,1,5,rg (lines),rg -n Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.2656106948852539,583,
|
||||
subtitles_ru_literal,1,5,ag (lines),ag -s Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.9972407817840576,583,
|
||||
subtitles_ru_literal,1,5,ag (lines),ag -s Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.906053066253662,583,
|
||||
subtitles_ru_literal,1,5,ag (lines),ag -s Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.864766836166382,583,
|
||||
subtitles_ru_literal,1,5,ag (lines),ag -s Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.7820546627044678,583,
|
||||
subtitles_ru_literal,1,5,ag (lines),ag -s Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.7599871158599854,583,
|
||||
subtitles_ru_literal,1,5,ucg (lines),ucg --nosmart-case Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.411653995513916,583,
|
||||
subtitles_ru_literal,1,5,ucg (lines),ucg --nosmart-case Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.394604206085205,583,
|
||||
subtitles_ru_literal,1,5,ucg (lines),ucg --nosmart-case Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.362853765487671,583,
|
||||
subtitles_ru_literal,1,5,ucg (lines),ucg --nosmart-case Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.4795477390289307,583,
|
||||
subtitles_ru_literal,1,5,ucg (lines),ucg --nosmart-case Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.4428844451904297,583,
|
||||
subtitles_ru_literal,1,5,pt (lines),pt Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,5.122563123703003,583,
|
||||
subtitles_ru_literal,1,5,pt (lines),pt Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,5.17008900642395,583,
|
||||
subtitles_ru_literal,1,5,pt (lines),pt Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,5.1965367794036865,583,
|
||||
subtitles_ru_literal,1,5,pt (lines),pt Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,5.152370929718018,583,
|
||||
subtitles_ru_literal,1,5,pt (lines),pt Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,5.106513738632202,583,
|
||||
subtitles_ru_literal,1,5,sift (lines),sift -n Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,6.408761978149414,583,
|
||||
subtitles_ru_literal,1,5,sift (lines),sift -n Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,6.423579454421997,583,
|
||||
subtitles_ru_literal,1,5,sift (lines),sift -n Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,6.2807464599609375,583,
|
||||
subtitles_ru_literal,1,5,sift (lines),sift -n Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,6.3771467208862305,583,
|
||||
subtitles_ru_literal,1,5,sift (lines),sift -n Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,6.378506422042847,583,
|
||||
subtitles_ru_literal,1,5,grep (lines),grep -an Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.121800422668457,583,LC_ALL=C
|
||||
subtitles_ru_literal,1,5,grep (lines),grep -an Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.1189923286437988,583,LC_ALL=C
|
||||
subtitles_ru_literal,1,5,grep (lines),grep -an Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.0678138732910156,583,LC_ALL=C
|
||||
subtitles_ru_literal,1,5,grep (lines),grep -an Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.0668041706085205,583,LC_ALL=C
|
||||
subtitles_ru_literal,1,5,grep (lines),grep -an Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.0713574886322021,583,LC_ALL=C
|
||||
subtitles_ru_literal_casei,1,5,rg,rg -i Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.9427816867828369,604,
|
||||
subtitles_ru_literal_casei,1,5,rg,rg -i Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.0397350788116455,604,
|
||||
subtitles_ru_literal_casei,1,5,rg,rg -i Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.9732518196105957,604,
|
||||
subtitles_ru_literal_casei,1,5,rg,rg -i Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.9387776851654053,604,
|
||||
subtitles_ru_literal_casei,1,5,rg,rg -i Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.9536802768707275,604,
|
||||
subtitles_ru_literal_casei,1,5,grep,grep -ai Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,6.338641405105591,604,LC_ALL=en_US.UTF-8
|
||||
subtitles_ru_literal_casei,1,5,grep,grep -ai Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,6.280565023422241,604,LC_ALL=en_US.UTF-8
|
||||
subtitles_ru_literal_casei,1,5,grep,grep -ai Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,6.241750240325928,604,LC_ALL=en_US.UTF-8
|
||||
subtitles_ru_literal_casei,1,5,grep,grep -ai Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,6.316105604171753,604,LC_ALL=en_US.UTF-8
|
||||
subtitles_ru_literal_casei,1,5,grep,grep -ai Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,6.307560205459595,604,LC_ALL=en_US.UTF-8
|
||||
subtitles_ru_literal_casei,1,5,grep (ASCII),grep -E -ai Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.7379302978515625,583,LC_ALL=C
|
||||
subtitles_ru_literal_casei,1,5,grep (ASCII),grep -E -ai Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.7226619720458984,583,LC_ALL=C
|
||||
subtitles_ru_literal_casei,1,5,grep (ASCII),grep -E -ai Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.683293342590332,583,LC_ALL=C
|
||||
subtitles_ru_literal_casei,1,5,grep (ASCII),grep -E -ai Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.714146614074707,583,LC_ALL=C
|
||||
subtitles_ru_literal_casei,1,5,grep (ASCII),grep -E -ai Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.7654330730438232,583,LC_ALL=C
|
||||
subtitles_ru_literal_casei,1,5,rg (lines),rg -n -i Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.0237820148468018,604,
|
||||
subtitles_ru_literal_casei,1,5,rg (lines),rg -n -i Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.0194151401519775,604,
|
||||
subtitles_ru_literal_casei,1,5,rg (lines),rg -n -i Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.0364336967468262,604,
|
||||
subtitles_ru_literal_casei,1,5,rg (lines),rg -n -i Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.035005807876587,604,
|
||||
subtitles_ru_literal_casei,1,5,rg (lines),rg -n -i Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.0438766479492188,604,
|
||||
subtitles_ru_literal_casei,1,5,ag (lines) (ASCII),ag -i Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.619025468826294,,
|
||||
subtitles_ru_literal_casei,1,5,ag (lines) (ASCII),ag -i Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.647244930267334,,
|
||||
subtitles_ru_literal_casei,1,5,ag (lines) (ASCII),ag -i Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.6785612106323242,,
|
||||
subtitles_ru_literal_casei,1,5,ag (lines) (ASCII),ag -i Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.6503715515136719,,
|
||||
subtitles_ru_literal_casei,1,5,ag (lines) (ASCII),ag -i Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.6314499378204346,,
|
||||
subtitles_ru_literal_casei,1,5,ucg (lines) (ASCII),ucg -i Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.8302316665649414,583,
|
||||
subtitles_ru_literal_casei,1,5,ucg (lines) (ASCII),ucg -i Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.7719593048095703,583,
|
||||
subtitles_ru_literal_casei,1,5,ucg (lines) (ASCII),ucg -i Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.7697594165802002,583,
|
||||
subtitles_ru_literal_casei,1,5,ucg (lines) (ASCII),ucg -i Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.7312629222869873,583,
|
||||
subtitles_ru_literal_casei,1,5,ucg (lines) (ASCII),ucg -i Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.767866849899292,583,
|
||||
subtitles_ru_literal_word,1,5,rg (ASCII),rg -n (?-u:\b)Шерлок Холмс(?-u:\b) /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.19411826133728027,,
|
||||
subtitles_ru_literal_word,1,5,rg (ASCII),rg -n (?-u:\b)Шерлок Холмс(?-u:\b) /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.18651676177978516,,
|
||||
subtitles_ru_literal_word,1,5,rg (ASCII),rg -n (?-u:\b)Шерлок Холмс(?-u:\b) /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.19614577293395996,,
|
||||
subtitles_ru_literal_word,1,5,rg (ASCII),rg -n (?-u:\b)Шерлок Холмс(?-u:\b) /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.18459081649780273,,
|
||||
subtitles_ru_literal_word,1,5,rg (ASCII),rg -n (?-u:\b)Шерлок Холмс(?-u:\b) /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.1797487735748291,,
|
||||
subtitles_ru_literal_word,1,5,ag (ASCII),ag -sw Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.6507105827331543,,
|
||||
subtitles_ru_literal_word,1,5,ag (ASCII),ag -sw Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.6480035781860352,,
|
||||
subtitles_ru_literal_word,1,5,ag (ASCII),ag -sw Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.7138750553131104,,
|
||||
subtitles_ru_literal_word,1,5,ag (ASCII),ag -sw Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.6521759033203125,,
|
||||
subtitles_ru_literal_word,1,5,ag (ASCII),ag -sw Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.6728894710540771,,
|
||||
subtitles_ru_literal_word,1,5,ucg (ASCII),ucg --nosmart-case Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.3646819591522217,583,
|
||||
subtitles_ru_literal_word,1,5,ucg (ASCII),ucg --nosmart-case Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.3836848735809326,583,
|
||||
subtitles_ru_literal_word,1,5,ucg (ASCII),ucg --nosmart-case Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.419490337371826,583,
|
||||
subtitles_ru_literal_word,1,5,ucg (ASCII),ucg --nosmart-case Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.363335609436035,583,
|
||||
subtitles_ru_literal_word,1,5,ucg (ASCII),ucg --nosmart-case Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.488351345062256,583,
|
||||
subtitles_ru_literal_word,1,5,grep (ASCII),grep -anw Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.171506643295288,583,LC_ALL=C
|
||||
subtitles_ru_literal_word,1,5,grep (ASCII),grep -anw Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.1602776050567627,583,LC_ALL=C
|
||||
subtitles_ru_literal_word,1,5,grep (ASCII),grep -anw Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.084787368774414,583,LC_ALL=C
|
||||
subtitles_ru_literal_word,1,5,grep (ASCII),grep -anw Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.0714166164398193,583,LC_ALL=C
|
||||
subtitles_ru_literal_word,1,5,grep (ASCII),grep -anw Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.083632469177246,583,LC_ALL=C
|
||||
subtitles_ru_literal_word,1,5,rg,rg -nw Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.2769143581390381,579,
|
||||
subtitles_ru_literal_word,1,5,rg,rg -nw Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.2694058418273926,579,
|
||||
subtitles_ru_literal_word,1,5,rg,rg -nw Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.26763367652893066,579,
|
||||
subtitles_ru_literal_word,1,5,rg,rg -nw Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.2671318054199219,579,
|
||||
subtitles_ru_literal_word,1,5,rg,rg -nw Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.2922348976135254,579,
|
||||
subtitles_ru_literal_word,1,5,grep,grep -anw Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.083528757095337,579,LC_ALL=en_US.UTF-8
|
||||
subtitles_ru_literal_word,1,5,grep,grep -anw Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.0857081413269043,579,LC_ALL=en_US.UTF-8
|
||||
subtitles_ru_literal_word,1,5,grep,grep -anw Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.07025146484375,579,LC_ALL=en_US.UTF-8
|
||||
subtitles_ru_literal_word,1,5,grep,grep -anw Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.071930170059204,579,LC_ALL=en_US.UTF-8
|
||||
subtitles_ru_literal_word,1,5,grep,grep -anw Шерлок Холмс /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.0709245204925537,579,LC_ALL=en_US.UTF-8
|
||||
subtitles_ru_no_literal,1,5,rg,rg -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,3.1552906036376953,41,
|
||||
subtitles_ru_no_literal,1,5,rg,rg -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,3.164951801300049,41,
|
||||
subtitles_ru_no_literal,1,5,rg,rg -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,3.175389289855957,41,
|
||||
subtitles_ru_no_literal,1,5,rg,rg -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,3.1861774921417236,41,
|
||||
subtitles_ru_no_literal,1,5,rg,rg -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,3.153625011444092,41,
|
||||
subtitles_ru_no_literal,1,5,rg (ASCII),rg -n (?-u)\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.7353317737579346,,
|
||||
subtitles_ru_no_literal,1,5,rg (ASCII),rg -n (?-u)\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.7592883110046387,,
|
||||
subtitles_ru_no_literal,1,5,rg (ASCII),rg -n (?-u)\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.7242491245269775,,
|
||||
subtitles_ru_no_literal,1,5,rg (ASCII),rg -n (?-u)\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.747089385986328,,
|
||||
subtitles_ru_no_literal,1,5,rg (ASCII),rg -n (?-u)\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.732586145401001,,
|
||||
subtitles_ru_no_literal,1,5,ag (ASCII),ag -s \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.0796375274658203,,
|
||||
subtitles_ru_no_literal,1,5,ag (ASCII),ag -s \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.9670393466949463,,
|
||||
subtitles_ru_no_literal,1,5,ag (ASCII),ag -s \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.9413447380065918,,
|
||||
subtitles_ru_no_literal,1,5,ag (ASCII),ag -s \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.916764497756958,,
|
||||
subtitles_ru_no_literal,1,5,ag (ASCII),ag -s \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.9110031127929688,,
|
||||
subtitles_ru_no_literal,1,5,ucg (ASCII),ucg --nosmart-case \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.0622072219848633,,
|
||||
subtitles_ru_no_literal,1,5,ucg (ASCII),ucg --nosmart-case \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.0975682735443115,,
|
||||
subtitles_ru_no_literal,1,5,ucg (ASCII),ucg --nosmart-case \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.0741493701934814,,
|
||||
subtitles_ru_no_literal,1,5,ucg (ASCII),ucg --nosmart-case \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.0423810482025146,,
|
||||
subtitles_ru_no_literal,1,5,ucg (ASCII),ucg --nosmart-case \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.000764846801758,,
|
||||
subtitles_ru_no_literal,1,5,grep (ASCII),grep -E -an \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.6251120567321777,,LC_ALL=C
|
||||
subtitles_ru_no_literal,1,5,grep (ASCII),grep -E -an \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.644089698791504,,LC_ALL=C
|
||||
subtitles_ru_no_literal,1,5,grep (ASCII),grep -E -an \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.6416165828704834,,LC_ALL=C
|
||||
subtitles_ru_no_literal,1,5,grep (ASCII),grep -E -an \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.6321892738342285,,LC_ALL=C
|
||||
subtitles_ru_no_literal,1,5,grep (ASCII),grep -E -an \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.6264762878417969,,LC_ALL=C
|
||||
subtitles_ru_surrounding_words,1,5,rg,rg -n \w+\s+Холмс\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.29879307746887207,278,
|
||||
subtitles_ru_surrounding_words,1,5,rg,rg -n \w+\s+Холмс\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.3226010799407959,278,
|
||||
subtitles_ru_surrounding_words,1,5,rg,rg -n \w+\s+Холмс\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.32187771797180176,278,
|
||||
subtitles_ru_surrounding_words,1,5,rg,rg -n \w+\s+Холмс\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.2825047969818115,278,
|
||||
subtitles_ru_surrounding_words,1,5,rg,rg -n \w+\s+Холмс\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,0.283217191696167,278,
|
||||
subtitles_ru_surrounding_words,1,5,grep,grep -E -an \w+\s+Холмс\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.3977878093719482,278,LC_ALL=en_US.UTF-8
|
||||
subtitles_ru_surrounding_words,1,5,grep,grep -E -an \w+\s+Холмс\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.4288139343261719,278,LC_ALL=en_US.UTF-8
|
||||
subtitles_ru_surrounding_words,1,5,grep,grep -E -an \w+\s+Холмс\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.4054889678955078,278,LC_ALL=en_US.UTF-8
|
||||
subtitles_ru_surrounding_words,1,5,grep,grep -E -an \w+\s+Холмс\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.4003441333770752,278,LC_ALL=en_US.UTF-8
|
||||
subtitles_ru_surrounding_words,1,5,grep,grep -E -an \w+\s+Холмс\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.5269148349761963,278,LC_ALL=en_US.UTF-8
|
||||
subtitles_ru_surrounding_words,1,5,ag (ASCII),ag -s \w+\s+Холмс\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.8912529945373535,,
|
||||
subtitles_ru_surrounding_words,1,5,ag (ASCII),ag -s \w+\s+Холмс\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.9221522808074951,,
|
||||
subtitles_ru_surrounding_words,1,5,ag (ASCII),ag -s \w+\s+Холмс\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.9416618347167969,,
|
||||
subtitles_ru_surrounding_words,1,5,ag (ASCII),ag -s \w+\s+Холмс\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.893650770187378,,
|
||||
subtitles_ru_surrounding_words,1,5,ag (ASCII),ag -s \w+\s+Холмс\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.8895554542541504,,
|
||||
subtitles_ru_surrounding_words,1,5,ucg (ASCII),ucg --nosmart-case \w+\s+Холмс\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.0110745429992676,,
|
||||
subtitles_ru_surrounding_words,1,5,ucg (ASCII),ucg --nosmart-case \w+\s+Холмс\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.9790067672729492,,
|
||||
subtitles_ru_surrounding_words,1,5,ucg (ASCII),ucg --nosmart-case \w+\s+Холмс\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.0426392555236816,,
|
||||
subtitles_ru_surrounding_words,1,5,ucg (ASCII),ucg --nosmart-case \w+\s+Холмс\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.121723175048828,,
|
||||
subtitles_ru_surrounding_words,1,5,ucg (ASCII),ucg --nosmart-case \w+\s+Холмс\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,2.1247596740722656,,
|
||||
subtitles_ru_surrounding_words,1,5,grep (ASCII),grep -E -an \w+\s+Холмс\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.3579976558685303,,LC_ALL=C
|
||||
subtitles_ru_surrounding_words,1,5,grep (ASCII),grep -E -an \w+\s+Холмс\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.382859468460083,,LC_ALL=C
|
||||
subtitles_ru_surrounding_words,1,5,grep (ASCII),grep -E -an \w+\s+Холмс\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.393401861190796,,LC_ALL=C
|
||||
subtitles_ru_surrounding_words,1,5,grep (ASCII),grep -E -an \w+\s+Холмс\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.474374532699585,,LC_ALL=C
|
||||
subtitles_ru_surrounding_words,1,5,grep (ASCII),grep -E -an \w+\s+Холмс\s+\w+ /tmp/benchsuite/subtitles/OpenSubtitles2016.raw.ru,1.3835601806640625,,LC_ALL=C
|
||||
|
@@ -1,235 +0,0 @@
|
||||
linux_alternates (pattern: ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT)
|
||||
-------------------------------------------------------------------------
|
||||
rg (ignore) 0.100 +/- 0.003 (lines: 68)
|
||||
ag (ignore) 0.501 +/- 0.033 (lines: 68)
|
||||
git grep (ignore) 0.267 +/- 0.004 (lines: 68)
|
||||
rg (whitelist)* 0.090 +/- 0.001 (lines: 68)*
|
||||
ucg (whitelist) 0.135 +/- 0.003 (lines: 68)
|
||||
|
||||
linux_alternates_casei (pattern: ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT)
|
||||
-------------------------------------------------------------------------------
|
||||
rg (ignore) 0.124 +/- 0.004 (lines: 160)
|
||||
ag (ignore) 0.564 +/- 0.041 (lines: 160)
|
||||
git grep (ignore) 0.928 +/- 0.033 (lines: 160)
|
||||
rg (whitelist)* 0.096 +/- 0.003 (lines: 160)*
|
||||
ucg (whitelist) 0.248 +/- 0.008 (lines: 160)
|
||||
|
||||
linux_literal (pattern: PM_RESUME)
|
||||
----------------------------------
|
||||
rg (ignore)* 0.082 +/- 0.001 (lines: 16)*
|
||||
rg (ignore) (mmap) 0.751 +/- 0.062 (lines: 16)
|
||||
ag (ignore) (mmap) 0.612 +/- 0.065 (lines: 16)
|
||||
pt (ignore) 0.195 +/- 0.020 (lines: 16)
|
||||
sift (ignore) 0.468 +/- 0.006 (lines: 16)
|
||||
git grep (ignore) 0.196 +/- 0.005 (lines: 16)
|
||||
rg (whitelist) 0.085 +/- 0.003 (lines: 16)
|
||||
ucg (whitelist) 0.159 +/- 0.002 (lines: 16)
|
||||
|
||||
linux_literal_casei (pattern: PM_RESUME)
|
||||
----------------------------------------
|
||||
rg (ignore) 0.105 +/- 0.003 (lines: 374)
|
||||
rg (ignore) (mmap) 0.799 +/- 0.012 (lines: 374)
|
||||
ag (ignore) (mmap) 0.469 +/- 0.030 (lines: 374)
|
||||
pt (ignore) 14.177 +/- 0.049 (lines: 374)
|
||||
sift (ignore) 0.460 +/- 0.006 (lines: 374)
|
||||
git grep (ignore) 0.198 +/- 0.006 (lines: 370)
|
||||
rg (whitelist)* 0.097 +/- 0.003 (lines: 370)*
|
||||
ucg (whitelist) 0.154 +/- 0.003 (lines: 370)
|
||||
|
||||
linux_literal_default (pattern: PM_RESUME)
|
||||
------------------------------------------
|
||||
rg* 0.089 +/- 0.002 (lines: 16)*
|
||||
ag 0.469 +/- 0.038 (lines: 16)
|
||||
ucg 0.154 +/- 0.001 (lines: 16)
|
||||
pt 0.237 +/- 0.040 (lines: 16)
|
||||
sift 0.126 +/- 0.003 (lines: 16)
|
||||
git grep 0.175 +/- 0.013 (lines: 16)
|
||||
|
||||
linux_no_literal (pattern: \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5})
|
||||
-----------------------------------------------------------------
|
||||
rg (ignore) 0.329 +/- 0.006 (lines: 490)
|
||||
rg (ignore) (ASCII) 0.172 +/- 0.002 (lines: 490)
|
||||
ag (ignore) (ASCII) 0.725 +/- 0.067 (lines: 766)
|
||||
pt (ignore) (ASCII) 12.478 +/- 0.097 (lines: 490)
|
||||
sift (ignore) (ASCII) 9.002 +/- 0.096 (lines: 490)
|
||||
git grep (ignore) 8.542 +/- 0.277 (lines: 490)
|
||||
git grep (ignore) (ASCII) 1.883 +/- 0.087 (lines: 490)
|
||||
rg (whitelist) 0.289 +/- 0.006 (lines: 458)
|
||||
rg (whitelist) (ASCII)* 0.160 +/- 0.001 (lines: 458)*
|
||||
ucg (whitelist) (ASCII) 0.474 +/- 0.020 (lines: 416)
|
||||
|
||||
linux_re_literal_suffix (pattern: [A-Z]+_RESUME)
|
||||
------------------------------------------------
|
||||
rg (ignore) 0.084 +/- 0.002 (lines: 1652)
|
||||
ag (ignore) 0.483 +/- 0.006 (lines: 1652)
|
||||
pt (ignore) 14.128 +/- 0.026 (lines: 1652)
|
||||
sift (ignore) 4.099 +/- 0.103 (lines: 1652)
|
||||
git grep (ignore) 0.529 +/- 0.014 (lines: 1652)
|
||||
rg (whitelist)* 0.078 +/- 0.002 (lines: 1630)*
|
||||
ucg (whitelist) 0.135 +/- 0.002 (lines: 1630)
|
||||
|
||||
linux_unicode_greek (pattern: \p{Greek})
|
||||
----------------------------------------
|
||||
rg* 0.172 +/- 0.002 (lines: 23)*
|
||||
pt 14.122 +/- 0.031 (lines: 23)
|
||||
sift 2.826 +/- 0.012 (lines: 23)
|
||||
|
||||
linux_unicode_greek_casei (pattern: \p{Greek})
|
||||
----------------------------------------------
|
||||
rg 0.170 +/- 0.001 (lines: 103)
|
||||
pt 14.120 +/- 0.039 (lines: 23)
|
||||
sift* 0.004 +/- 0.000 (lines: 0)*
|
||||
|
||||
linux_unicode_word (pattern: \wAh)
|
||||
----------------------------------
|
||||
rg (ignore) 0.098 +/- 0.002 (lines: 186)
|
||||
rg (ignore) (ASCII) 0.096 +/- 0.002 (lines: 174)
|
||||
ag (ignore) (ASCII) 0.627 +/- 0.038 (lines: 174)
|
||||
pt (ignore) (ASCII) 14.182 +/- 0.024 (lines: 174)
|
||||
sift (ignore) (ASCII) 4.135 +/- 0.119 (lines: 174)
|
||||
git grep (ignore) 4.854 +/- 0.643 (lines: 186)
|
||||
git grep (ignore) (ASCII) 1.376 +/- 0.035 (lines: 174)
|
||||
rg (whitelist) 0.081 +/- 0.001 (lines: 180)*
|
||||
rg (whitelist) (ASCII)* 0.082 +/- 0.002 (lines: 168)
|
||||
ucg (ASCII) 0.155 +/- 0.003 (lines: 168)
|
||||
|
||||
linux_word (pattern: PM_RESUME)
|
||||
-------------------------------
|
||||
rg (ignore) 0.091 +/- 0.002 (lines: 6)
|
||||
ag (ignore) 0.461 +/- 0.020 (lines: 6)
|
||||
pt (ignore) 14.223 +/- 0.038 (lines: 6)
|
||||
sift (ignore) 3.226 +/- 0.043 (lines: 6)
|
||||
git grep (ignore) 0.173 +/- 0.006 (lines: 6)
|
||||
rg (whitelist)* 0.076 +/- 0.001 (lines: 6)*
|
||||
ucg (whitelist) 0.156 +/- 0.003 (lines: 6)
|
||||
|
||||
subtitles_en_alternate (pattern: Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty)
|
||||
---------------------------------------------------------------------------------------------------------------
|
||||
rg (lines) 0.311 +/- 0.026 (lines: 848)
|
||||
ag (lines) 2.242 +/- 0.086 (lines: 848)
|
||||
ucg (lines) 1.132 +/- 0.017 (lines: 848)
|
||||
grep (lines) 1.828 +/- 0.017 (lines: 848)
|
||||
rg* 0.226 +/- 0.031 (lines: 848)*
|
||||
grep 1.528 +/- 0.057 (lines: 848)
|
||||
|
||||
subtitles_en_alternate_casei (pattern: Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty)
|
||||
---------------------------------------------------------------------------------------------------------------------
|
||||
ag (ASCII) 3.745 +/- 0.035 (lines: 862)
|
||||
ucg (ASCII) 2.423 +/- 0.030 (lines: 862)
|
||||
grep (ASCII) 2.969 +/- 0.040 (lines: 862)
|
||||
rg* 1.952 +/- 0.049 (lines: 862)*
|
||||
grep 2.928 +/- 0.012 (lines: 862)
|
||||
|
||||
subtitles_en_literal (pattern: Sherlock Holmes)
|
||||
-----------------------------------------------
|
||||
rg* 0.130 +/- 0.014 (lines: 629)*
|
||||
rg (no mmap) 0.243 +/- 0.017 (lines: 629)
|
||||
pt 1.421 +/- 0.005 (lines: 629)
|
||||
sift 0.243 +/- 0.015 (lines: 629)
|
||||
grep 0.486 +/- 0.027 (lines: 629)
|
||||
rg (lines) 0.190 +/- 0.014 (lines: 629)
|
||||
ag (lines) 1.573 +/- 0.034 (lines: 629)
|
||||
ucg (lines) 0.386 +/- 0.014 (lines: 629)
|
||||
pt (lines) 1.452 +/- 0.055 (lines: 629)
|
||||
sift (lines) 0.601 +/- 0.015 (lines: 629)
|
||||
grep (lines) 0.803 +/- 0.031 (lines: 629)
|
||||
|
||||
subtitles_en_literal_casei (pattern: Sherlock Holmes)
|
||||
-----------------------------------------------------
|
||||
rg* 0.284 +/- 0.028 (lines: 642)*
|
||||
grep 1.912 +/- 0.004 (lines: 642)
|
||||
grep (ASCII) 0.570 +/- 0.018 (lines: 642)
|
||||
rg (lines) 0.362 +/- 0.028 (lines: 642)
|
||||
ag (lines) (ASCII) 1.580 +/- 0.036 (lines: 642)
|
||||
ucg (lines) (ASCII) 0.404 +/- 0.019 (lines: 642)
|
||||
|
||||
subtitles_en_literal_word (pattern: Sherlock Holmes)
|
||||
----------------------------------------------------
|
||||
rg (ASCII)* 0.185 +/- 0.015 (lines: 629)
|
||||
ag (ASCII) 1.525 +/- 0.009 (lines: 629)
|
||||
ucg (ASCII) 0.384 +/- 0.015 (lines: 629)
|
||||
grep (ASCII) 0.788 +/- 0.029 (lines: 629)
|
||||
rg 0.184 +/- 0.006 (lines: 629)*
|
||||
grep 0.790 +/- 0.028 (lines: 629)
|
||||
|
||||
subtitles_en_no_literal (pattern: \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5})
|
||||
----------------------------------------------------------------------------------------
|
||||
rg 1.793 +/- 0.005 (lines: 13)
|
||||
rg (ASCII)* 1.594 +/- 0.003 (lines: 13)*
|
||||
ag (ASCII) 6.573 +/- 0.036 (lines: 48)
|
||||
ucg (ASCII) 5.394 +/- 0.042 (lines: 13)
|
||||
grep (ASCII) 3.446 +/- 0.050 (lines: 13)
|
||||
|
||||
subtitles_en_surrounding_words (pattern: \w+\s+Holmes\s+\w+)
|
||||
------------------------------------------------------------
|
||||
rg 0.187 +/- 0.003 (lines: 317)
|
||||
grep 1.095 +/- 0.026 (lines: 317)
|
||||
rg (ASCII)* 0.184 +/- 0.005 (lines: 317)*
|
||||
ag (ASCII) 4.543 +/- 0.075 (lines: 323)
|
||||
ucg (ASCII) 3.567 +/- 0.058 (lines: 317)
|
||||
grep (ASCII) 1.070 +/- 0.012 (lines: 317)
|
||||
|
||||
subtitles_ru_alternate (pattern: Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти)
|
||||
-----------------------------------------------------------------------------------------------------------
|
||||
rg (lines) 1.007 +/- 0.041 (lines: 691)
|
||||
ag (lines) 3.830 +/- 0.035 (lines: 691)
|
||||
ucg (lines) 2.043 +/- 0.034 (lines: 691)
|
||||
grep (lines) 7.513 +/- 0.020 (lines: 691)
|
||||
rg* 0.938 +/- 0.046 (lines: 691)*
|
||||
grep 7.085 +/- 0.030 (lines: 691)
|
||||
|
||||
subtitles_ru_alternate_casei (pattern: Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти)
|
||||
-----------------------------------------------------------------------------------------------------------------
|
||||
ag (ASCII) 3.799 +/- 0.015 (lines: 691)
|
||||
ucg (ASCII)* 2.038 +/- 0.030 (lines: 691)*
|
||||
grep (ASCII) 7.484 +/- 0.019 (lines: 691)
|
||||
rg 11.087 +/- 0.057 (lines: 735)
|
||||
grep 6.814 +/- 0.020 (lines: 735)
|
||||
|
||||
subtitles_ru_literal (pattern: Шерлок Холмс)
|
||||
--------------------------------------------
|
||||
rg* 0.195 +/- 0.010 (lines: 583)*
|
||||
rg (no mmap) 0.315 +/- 0.005 (lines: 583)
|
||||
pt 5.134 +/- 0.023 (lines: 583)
|
||||
sift 5.835 +/- 0.061 (lines: 583)
|
||||
grep 0.698 +/- 0.008 (lines: 583)
|
||||
rg (lines) 0.267 +/- 0.002 (lines: 583)
|
||||
ag (lines) 2.862 +/- 0.096 (lines: 583)
|
||||
ucg (lines) 2.418 +/- 0.045 (lines: 583)
|
||||
pt (lines) 5.150 +/- 0.036 (lines: 583)
|
||||
sift (lines) 6.374 +/- 0.056 (lines: 583)
|
||||
grep (lines) 1.089 +/- 0.028 (lines: 583)
|
||||
|
||||
subtitles_ru_literal_casei (pattern: Шерлок Холмс)
|
||||
--------------------------------------------------
|
||||
rg 0.970 +/- 0.041 (lines: 604)
|
||||
grep 6.297 +/- 0.037 (lines: 604)
|
||||
grep (ASCII) 0.725 +/- 0.030 (lines: 583)
|
||||
rg (lines) 1.032 +/- 0.010 (lines: 604)
|
||||
ag (lines) (ASCII)* 0.645 +/- 0.022 (lines: 0)*
|
||||
ucg (lines) (ASCII) 0.774 +/- 0.036 (lines: 583)
|
||||
|
||||
subtitles_ru_literal_word (pattern: Шерлок Холмс)
|
||||
-------------------------------------------------
|
||||
rg (ASCII)* 0.188 +/- 0.007 (lines: 0)*
|
||||
ag (ASCII) 0.668 +/- 0.028 (lines: 0)
|
||||
ucg (ASCII) 2.404 +/- 0.052 (lines: 583)
|
||||
grep (ASCII) 1.114 +/- 0.048 (lines: 583)
|
||||
rg 0.275 +/- 0.011 (lines: 579)
|
||||
grep 1.076 +/- 0.008 (lines: 579)
|
||||
|
||||
subtitles_ru_no_literal (pattern: \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5})
|
||||
----------------------------------------------------------------------------------------
|
||||
rg 3.167 +/- 0.014 (lines: 41)
|
||||
rg (ASCII) 2.740 +/- 0.014 (lines: 0)
|
||||
ag (ASCII) 1.963 +/- 0.069 (lines: 0)
|
||||
ucg (ASCII) 2.055 +/- 0.037 (lines: 0)
|
||||
grep (ASCII)* 1.634 +/- 0.009 (lines: 0)*
|
||||
|
||||
subtitles_ru_surrounding_words (pattern: \w+\s+Холмс\s+\w+)
|
||||
-----------------------------------------------------------
|
||||
rg* 0.302 +/- 0.020 (lines: 278)*
|
||||
grep 1.432 +/- 0.055 (lines: 278)
|
||||
ag (ASCII) 1.908 +/- 0.023 (lines: 0)
|
||||
ucg (ASCII) 2.056 +/- 0.066 (lines: 0)
|
||||
grep (ASCII) 1.398 +/- 0.044 (lines: 0)
|
||||
172
build.rs
172
build.rs
@@ -1,184 +1,26 @@
|
||||
#[macro_use]
|
||||
extern crate clap;
|
||||
#[macro_use]
|
||||
extern crate lazy_static;
|
||||
|
||||
use std::env;
|
||||
use std::fs::{self, File};
|
||||
use std::io::{self, Read, Write};
|
||||
use std::path::Path;
|
||||
use std::process;
|
||||
use std::fs;
|
||||
|
||||
use clap::Shell;
|
||||
|
||||
use app::{RGArg, RGArgKind};
|
||||
|
||||
#[allow(dead_code)]
|
||||
#[path = "src/app.rs"]
|
||||
mod app;
|
||||
|
||||
fn main() {
|
||||
// OUT_DIR is set by Cargo and it's where any additional build artifacts
|
||||
// are written.
|
||||
let outdir = match env::var_os("OUT_DIR") {
|
||||
None => return,
|
||||
Some(outdir) => outdir,
|
||||
None => {
|
||||
eprintln!(
|
||||
"OUT_DIR environment variable not defined. \
|
||||
Please file a bug: \
|
||||
https://github.com/BurntSushi/ripgrep/issues/new");
|
||||
process::exit(1);
|
||||
}
|
||||
};
|
||||
fs::create_dir_all(&outdir).unwrap();
|
||||
|
||||
let stamp_path = Path::new(&outdir).join("ripgrep-stamp");
|
||||
if let Err(err) = File::create(&stamp_path) {
|
||||
panic!("failed to write {}: {}", stamp_path.display(), err);
|
||||
}
|
||||
if let Err(err) = generate_man_page(&outdir) {
|
||||
eprintln!("failed to generate man page: {}", err);
|
||||
}
|
||||
|
||||
// Use clap to build completion files.
|
||||
let mut app = app::app();
|
||||
app.gen_completions("rg", Shell::Bash, &outdir);
|
||||
app.gen_completions("rg", Shell::Fish, &outdir);
|
||||
app.gen_completions("rg", Shell::PowerShell, &outdir);
|
||||
// Note that we do not use clap's support for zsh. Instead, zsh completions
|
||||
// are manually maintained in `complete/_rg`.
|
||||
|
||||
// Make the current git hash available to the build.
|
||||
if let Some(rev) = git_revision_hash() {
|
||||
println!("cargo:rustc-env=RIPGREP_BUILD_GIT_HASH={}", rev);
|
||||
}
|
||||
}
|
||||
|
||||
fn git_revision_hash() -> Option<String> {
|
||||
let result = process::Command::new("git")
|
||||
.args(&["rev-parse", "--short=10", "HEAD"])
|
||||
.output();
|
||||
result.ok().and_then(|output| {
|
||||
let v = String::from_utf8_lossy(&output.stdout).trim().to_string();
|
||||
if v.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(v)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
fn generate_man_page<P: AsRef<Path>>(outdir: P) -> io::Result<()> {
|
||||
// If asciidoc isn't installed, then don't do anything.
|
||||
if let Err(err) = process::Command::new("a2x").output() {
|
||||
eprintln!("Could not run 'a2x' binary, skipping man page generation.");
|
||||
eprintln!("Error from running 'a2x': {}", err);
|
||||
return Ok(());
|
||||
}
|
||||
// 1. Read asciidoc template.
|
||||
// 2. Interpolate template with auto-generated docs.
|
||||
// 3. Save interpolation to disk.
|
||||
// 4. Use a2x (part of asciidoc) to convert to man page.
|
||||
let outdir = outdir.as_ref();
|
||||
let cwd = env::current_dir()?;
|
||||
let tpl_path = cwd.join("doc").join("rg.1.txt.tpl");
|
||||
let txt_path = outdir.join("rg.1.txt");
|
||||
|
||||
let mut tpl = String::new();
|
||||
File::open(&tpl_path)?.read_to_string(&mut tpl)?;
|
||||
tpl = tpl.replace("{OPTIONS}", &formatted_options()?);
|
||||
|
||||
let githash = git_revision_hash();
|
||||
let githash = githash.as_ref().map(|x| &**x);
|
||||
tpl = tpl.replace("{VERSION}", &app::long_version(githash));
|
||||
|
||||
File::create(&txt_path)?.write_all(tpl.as_bytes())?;
|
||||
let result = process::Command::new("a2x")
|
||||
.arg("--no-xmllint")
|
||||
.arg("--doctype").arg("manpage")
|
||||
.arg("--format").arg("manpage")
|
||||
.arg(&txt_path)
|
||||
.spawn()?
|
||||
.wait()?;
|
||||
if !result.success() {
|
||||
let msg = format!("'a2x' failed with exit code {:?}", result.code());
|
||||
return Err(ioerr(msg));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn formatted_options() -> io::Result<String> {
|
||||
let mut args = app::all_args_and_flags();
|
||||
args.sort_by(|x1, x2| x1.name.cmp(&x2.name));
|
||||
|
||||
let mut formatted = vec![];
|
||||
for arg in args {
|
||||
if arg.hidden {
|
||||
continue;
|
||||
}
|
||||
// ripgrep only has two positional arguments, and probably will only
|
||||
// ever have two positional arguments, so we just hardcode them into
|
||||
// the template.
|
||||
if let app::RGArgKind::Positional{..} = arg.kind {
|
||||
continue;
|
||||
}
|
||||
formatted.push(formatted_arg(&arg)?);
|
||||
}
|
||||
Ok(formatted.join("\n\n"))
|
||||
}
|
||||
|
||||
fn formatted_arg(arg: &RGArg) -> io::Result<String> {
|
||||
match arg.kind {
|
||||
RGArgKind::Positional{..} => panic!("unexpected positional argument"),
|
||||
RGArgKind::Switch { long, short, multiple } => {
|
||||
let mut out = vec![];
|
||||
|
||||
let mut header = format!("--{}", long);
|
||||
if let Some(short) = short {
|
||||
header = format!("-{}, {}", short, header);
|
||||
}
|
||||
if multiple {
|
||||
header = format!("*{}* ...::", header);
|
||||
} else {
|
||||
header = format!("*{}*::", header);
|
||||
}
|
||||
writeln!(out, "{}", header)?;
|
||||
writeln!(out, "{}", formatted_doc_txt(arg)?)?;
|
||||
|
||||
Ok(String::from_utf8(out).unwrap())
|
||||
}
|
||||
RGArgKind::Flag { long, short, value_name, multiple, .. } => {
|
||||
let mut out = vec![];
|
||||
|
||||
let mut header = format!("--{}", long);
|
||||
if let Some(short) = short {
|
||||
header = format!("-{}, {}", short, header);
|
||||
}
|
||||
if multiple {
|
||||
header = format!("*{}* _{}_ ...::", header, value_name);
|
||||
} else {
|
||||
header = format!("*{}* _{}_::", header, value_name);
|
||||
}
|
||||
writeln!(out, "{}", header)?;
|
||||
writeln!(out, "{}", formatted_doc_txt(arg)?)?;
|
||||
|
||||
Ok(String::from_utf8(out).unwrap())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn formatted_doc_txt(arg: &RGArg) -> io::Result<String> {
|
||||
let paragraphs: Vec<String> = arg.doc_long
|
||||
.replace("{", "{")
|
||||
.replace("}", r"}")
|
||||
.split("\n\n")
|
||||
.map(|s| s.to_string())
|
||||
.collect();
|
||||
if paragraphs.is_empty() {
|
||||
return Err(ioerr(format!("missing docs for --{}", arg.name)));
|
||||
}
|
||||
let first = format!(" {}", paragraphs[0].replace("\n", "\n "));
|
||||
if paragraphs.len() == 1 {
|
||||
return Ok(first);
|
||||
}
|
||||
Ok(format!("{}\n+\n{}", first, paragraphs[1..].join("\n+\n")))
|
||||
}
|
||||
|
||||
fn ioerr(msg: String) -> io::Error {
|
||||
io::Error::new(io::ErrorKind::Other, msg)
|
||||
}
|
||||
|
||||
61
ci/before_deploy.sh
Executable file → Normal file
61
ci/before_deploy.sh
Executable file → Normal file
@@ -1,56 +1,35 @@
|
||||
#!/bin/bash
|
||||
|
||||
# package the build artifacts
|
||||
# `before_deploy` phase: here we package the build artifacts
|
||||
|
||||
set -ex
|
||||
|
||||
. "$(dirname $0)/utils.sh"
|
||||
. $(dirname $0)/utils.sh
|
||||
|
||||
# Generate artifacts for release
|
||||
mk_artifacts() {
|
||||
if is_arm; then
|
||||
cargo build --target "$TARGET" --release
|
||||
else
|
||||
# Technically, MUSL builds will force PCRE2 to get statically compiled,
|
||||
# but we also want PCRE2 statically build for macOS binaries.
|
||||
PCRE2_SYS_STATIC=1 cargo build --target "$TARGET" --release --features 'pcre2'
|
||||
fi
|
||||
RUSTFLAGS="-C target-feature=+ssse3" \
|
||||
cargo build --target $TARGET --release --features simd-accel
|
||||
}
|
||||
|
||||
mk_tarball() {
|
||||
# When cross-compiling, use the right `strip` tool on the binary.
|
||||
local gcc_prefix="$(gcc_prefix)"
|
||||
# Create a temporary dir that contains our staging area.
|
||||
# $tmpdir/$name is what eventually ends up as the deployed archive.
|
||||
local tmpdir="$(mktemp -d)"
|
||||
# create a "staging" directory
|
||||
local td=$(mktempd)
|
||||
local out_dir=$(pwd)
|
||||
local name="${PROJECT_NAME}-${TRAVIS_TAG}-${TARGET}"
|
||||
local staging="$tmpdir/$name"
|
||||
mkdir -p "$staging"/{complete,doc}
|
||||
# The deployment directory is where the final archive will reside.
|
||||
# This path is known by the .travis.yml configuration.
|
||||
local out_dir="$(pwd)/deployment"
|
||||
mkdir -p "$out_dir"
|
||||
# Find the correct (most recent) Cargo "out" directory. The out directory
|
||||
# contains shell completion files and the man page.
|
||||
local cargo_out_dir="$(cargo_out_dir "target/$TARGET")"
|
||||
mkdir "$td/$name"
|
||||
mkdir "$td/$name/complete"
|
||||
|
||||
# Copy the ripgrep binary and strip it.
|
||||
cp "target/$TARGET/release/rg" "$staging/rg"
|
||||
"${gcc_prefix}strip" "$staging/rg"
|
||||
# Copy the licenses and README.
|
||||
cp {README.md,UNLICENSE,COPYING,LICENSE-MIT} "$staging/"
|
||||
# Copy documentation and man page.
|
||||
cp {CHANGELOG.md,FAQ.md,GUIDE.md} "$staging/doc/"
|
||||
if command -V a2x 2>&1 > /dev/null; then
|
||||
# The man page should only exist if we have asciidoc installed.
|
||||
cp "$cargo_out_dir/rg.1" "$staging/doc/"
|
||||
fi
|
||||
# Copy shell completion files.
|
||||
cp "$cargo_out_dir"/{rg.bash,rg.fish,_rg.ps1} "$staging/complete/"
|
||||
cp complete/_rg "$staging/complete/"
|
||||
cp target/$TARGET/release/rg "$td/$name/rg"
|
||||
strip "$td/$name/rg"
|
||||
cp {doc/rg.1,README.md,UNLICENSE,COPYING,LICENSE-MIT} "$td/$name/"
|
||||
cp \
|
||||
target/$TARGET/release/build/ripgrep-*/out/{rg.bash-completion,rg.fish,_rg.ps1} \
|
||||
"$td/$name/complete/"
|
||||
cp complete/_rg "$td/$name/complete/"
|
||||
|
||||
(cd "$tmpdir" && tar czf "$out_dir/$name.tar.gz" "$name")
|
||||
rm -rf "$tmpdir"
|
||||
pushd $td
|
||||
tar czf "$out_dir/$name.tar.gz" *
|
||||
popd
|
||||
rm -r $td
|
||||
}
|
||||
|
||||
main() {
|
||||
|
||||
@@ -1,43 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -e
|
||||
|
||||
# This script builds a binary dpkg for Debian based distros. It does not
|
||||
# currently run in CI, and is instead run manually and the resulting dpkg is
|
||||
# uploaded to GitHub via the web UI.
|
||||
#
|
||||
# Note that this requires 'cargo deb', which can be installed with
|
||||
# 'cargo install cargo-deb'.
|
||||
#
|
||||
# This should be run from the root of the ripgrep repo.
|
||||
|
||||
if ! command -V cargo-deb > /dev/null 2>&1; then
|
||||
echo "cargo-deb command missing" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# 'cargo deb' does not seem to provide a way to specify an asset that is
|
||||
# created at build time, such as ripgrep's man page. To work around this,
|
||||
# we force a debug build, copy out the man page (and shell completions)
|
||||
# produced from that build, put it into a predictable location and then build
|
||||
# the deb, which knows where to look.
|
||||
|
||||
DEPLOY_DIR=deployment/deb
|
||||
mkdir -p "$DEPLOY_DIR"
|
||||
cargo build
|
||||
|
||||
# Find and copy man page.
|
||||
manpage="$(find ./target/debug -name rg.1 -print0 | xargs -0 ls -t | head -n1)"
|
||||
cp "$manpage" "$DEPLOY_DIR/"
|
||||
|
||||
# Do the same for shell completions.
|
||||
compbash="$(find ./target/debug -name rg.bash -print0 | xargs -0 ls -t | head -n1)"
|
||||
cp "$compbash" "$DEPLOY_DIR/"
|
||||
compfish="$(find ./target/debug -name rg.fish -print0 | xargs -0 ls -t | head -n1)"
|
||||
cp "$compfish" "$DEPLOY_DIR/"
|
||||
compzsh="complete/_rg"
|
||||
cp "$compzsh" "$DEPLOY_DIR/"
|
||||
|
||||
# Since we're distributing the dpkg, we don't know whether the user will have
|
||||
# PCRE2 installed, so just do a static build.
|
||||
PCRE2_SYS_STATIC=1 cargo deb
|
||||
52
ci/install.sh
Executable file → Normal file
52
ci/install.sh
Executable file → Normal file
@@ -1,61 +1,57 @@
|
||||
#!/bin/bash
|
||||
|
||||
# install stuff needed for the `script` phase
|
||||
|
||||
# Where rustup gets installed.
|
||||
export PATH="$PATH:$HOME/.cargo/bin"
|
||||
# `install` phase: install stuff needed for the `script` phase
|
||||
|
||||
set -ex
|
||||
|
||||
. "$(dirname $0)/utils.sh"
|
||||
. $(dirname $0)/utils.sh
|
||||
|
||||
install_c_toolchain() {
|
||||
case $TARGET in
|
||||
aarch64-unknown-linux-gnu)
|
||||
sudo apt-get install -y --no-install-recommends \
|
||||
gcc-aarch64-linux-gnu libc6-arm64-cross libc6-dev-arm64-cross
|
||||
;;
|
||||
*)
|
||||
# For other targets, this is handled by addons.apt.packages in .travis.yml
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
install_rustup() {
|
||||
curl https://sh.rustup.rs -sSf \
|
||||
| sh -s -- -y --default-toolchain="$TRAVIS_RUST_VERSION"
|
||||
curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain=$TRAVIS_RUST_VERSION
|
||||
|
||||
rustc -V
|
||||
cargo -V
|
||||
}
|
||||
|
||||
install_targets() {
|
||||
install_standard_crates() {
|
||||
if [ $(host) != "$TARGET" ]; then
|
||||
rustup target add $TARGET
|
||||
fi
|
||||
}
|
||||
|
||||
install_osx_dependencies() {
|
||||
if ! is_osx; then
|
||||
return
|
||||
fi
|
||||
|
||||
brew install asciidoc docbook-xsl
|
||||
}
|
||||
|
||||
configure_cargo() {
|
||||
local prefix=$(gcc_prefix)
|
||||
if [ -n "${prefix}" ]; then
|
||||
local gcc_suffix=
|
||||
if [ -n "$GCC_VERSION" ]; then
|
||||
gcc_suffix="-$GCC_VERSION"
|
||||
fi
|
||||
local gcc="${prefix}gcc${gcc_suffix}"
|
||||
|
||||
if [ ! -z $prefix ]; then
|
||||
# information about the cross compiler
|
||||
"${gcc}" -v
|
||||
${prefix}gcc -v
|
||||
|
||||
# tell cargo which linker to use for cross compilation
|
||||
mkdir -p .cargo
|
||||
cat >>.cargo/config <<EOF
|
||||
[target.$TARGET]
|
||||
linker = "${gcc}"
|
||||
linker = "${prefix}gcc"
|
||||
EOF
|
||||
fi
|
||||
}
|
||||
|
||||
main() {
|
||||
install_osx_dependencies
|
||||
install_c_toolchain
|
||||
install_rustup
|
||||
install_targets
|
||||
install_standard_crates
|
||||
configure_cargo
|
||||
|
||||
# TODO if you need to install extra stuff add it here
|
||||
}
|
||||
|
||||
main
|
||||
|
||||
68
ci/script.sh
Executable file → Normal file
68
ci/script.sh
Executable file → Normal file
@@ -1,50 +1,42 @@
|
||||
#!/bin/bash
|
||||
|
||||
# build, test and generate docs in this phase
|
||||
# `script` phase: you usually build, test and generate docs in this phase
|
||||
|
||||
set -ex
|
||||
|
||||
. "$(dirname $0)/utils.sh"
|
||||
. $(dirname $0)/utils.sh
|
||||
|
||||
main() {
|
||||
# Test a normal debug build.
|
||||
if is_arm; then
|
||||
cargo build --target "$TARGET" --verbose
|
||||
else
|
||||
cargo build --target "$TARGET" --verbose --all --features 'pcre2'
|
||||
# NOTE Workaround for rust-lang/rust#31907 - disable doc tests when cross compiling
|
||||
# This has been fixed in the nightly channel but it would take a while to reach the other channels
|
||||
disable_cross_doctests() {
|
||||
if [ $(host) != "$TARGET" ] && [ "$TRAVIS_RUST_VERSION" = "stable" ]; then
|
||||
if [ "$TRAVIS_OS_NAME" = "osx" ]; then
|
||||
brew install gnu-sed --default-names
|
||||
fi
|
||||
find src -name '*.rs' -type f | xargs sed -i -e 's:\(//.\s*```\):\1 ignore,:g'
|
||||
fi
|
||||
}
|
||||
|
||||
# Show the output of the most recent build.rs stderr.
|
||||
set +x
|
||||
stderr="$(find "target/$TARGET/debug" -name stderr -print0 | xargs -0 ls -t | head -n1)"
|
||||
if [ -s "$stderr" ]; then
|
||||
echo "===== $stderr ====="
|
||||
cat "$stderr"
|
||||
echo "====="
|
||||
fi
|
||||
set -x
|
||||
run_test_suite() {
|
||||
cargo clean --target $TARGET --verbose
|
||||
cargo build --target $TARGET --verbose
|
||||
cargo test --target $TARGET --verbose
|
||||
cargo build --target $TARGET --verbose --manifest-path grep/Cargo.toml
|
||||
cargo test --target $TARGET --verbose --manifest-path grep/Cargo.toml
|
||||
cargo build --target $TARGET --verbose --manifest-path globset/Cargo.toml
|
||||
cargo test --target $TARGET --verbose --manifest-path globset/Cargo.toml
|
||||
cargo build --target $TARGET --verbose --manifest-path ignore/Cargo.toml
|
||||
cargo test --target $TARGET --verbose --manifest-path ignore/Cargo.toml
|
||||
cargo build --target $TARGET --verbose --manifest-path termcolor/Cargo.toml
|
||||
cargo test --target $TARGET --verbose --manifest-path termcolor/Cargo.toml
|
||||
|
||||
"$( dirname "${0}" )/test_complete.sh"
|
||||
|
||||
# sanity check the file type
|
||||
file target/"$TARGET"/debug/rg
|
||||
file target/$TARGET/debug/rg
|
||||
}
|
||||
|
||||
# Check that we've generated man page and other shell completions.
|
||||
outdir="$(cargo_out_dir "target/$TARGET/debug")"
|
||||
file "$outdir/rg.bash"
|
||||
file "$outdir/rg.fish"
|
||||
file "$outdir/_rg.ps1"
|
||||
file "$outdir/rg.1"
|
||||
|
||||
# Apparently tests don't work on arm, so just bail now. I guess we provide
|
||||
# ARM releases on a best effort basis?
|
||||
if is_arm; then
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Test that zsh completions are in sync with ripgrep's actual args.
|
||||
"$(dirname "${0}")/test_complete.sh"
|
||||
|
||||
# Run tests for ripgrep and all sub-crates.
|
||||
cargo test --target "$TARGET" --verbose --all --features 'pcre2'
|
||||
main() {
|
||||
# disable_cross_doctests
|
||||
run_test_suite
|
||||
}
|
||||
|
||||
main
|
||||
|
||||
0
ci/sha256.sh
Executable file → Normal file
0
ci/sha256.sh
Executable file → Normal file
@@ -1,73 +1,70 @@
|
||||
#!/usr/bin/env zsh
|
||||
|
||||
emulate zsh -o extended_glob -o no_function_argzero -o no_unset
|
||||
|
||||
##
|
||||
# Compares options in `rg --help` output to options in zsh completion function
|
||||
|
||||
emulate -R zsh
|
||||
setopt extended_glob
|
||||
setopt no_function_argzero
|
||||
setopt no_unset
|
||||
|
||||
get_comp_args() {
|
||||
# Technically there are many options that the completion system sets that
|
||||
# our function may rely on, but we'll trust that we've got it mostly right
|
||||
setopt local_options unset
|
||||
|
||||
# Our completion function recognises a special variable which tells it to
|
||||
# dump the _arguments specs and then just return. But do this in a sub-shell
|
||||
# anyway to avoid any weirdness
|
||||
( _RG_COMPLETE_LIST_ARGS=1 source $1 )
|
||||
return $?
|
||||
}
|
||||
|
||||
main() {
|
||||
local diff
|
||||
local rg="${0:a:h}/../target/${TARGET:-}/release/rg"
|
||||
local _rg="${0:a:h}/../complete/_rg"
|
||||
local rg="${${0:a}:h}/../target/${TARGET:-}/release/rg"
|
||||
local _rg="${${0:a}:h}/../complete/_rg"
|
||||
local -a help_args comp_args
|
||||
|
||||
[[ -e $rg ]] || rg=${rg/%\/release\/rg/\/debug\/rg}
|
||||
|
||||
rg=${rg:a}
|
||||
_rg=${_rg:a}
|
||||
|
||||
[[ -e $rg ]] || {
|
||||
print -r >&2 "File not found: $rg"
|
||||
printf >&2 'File not found: %s\n' $rg
|
||||
return 1
|
||||
}
|
||||
[[ -e $_rg ]] || {
|
||||
print -r >&2 "File not found: $_rg"
|
||||
printf >&2 'File not found: %s\n' $_rg
|
||||
return 1
|
||||
}
|
||||
|
||||
print -rl - 'Comparing options:' "-$rg" "+$_rg"
|
||||
printf 'Comparing options:\n-%s\n+%s\n' $rg $_rg
|
||||
|
||||
# 'Parse' options out of the `--help` output. To prevent false positives we
|
||||
# only look at lines where the first non-white-space character is `-`, or
|
||||
# where a long option starting with certain letters (see `_rg`) is found.
|
||||
# Occasionally we may have to handle some manually, however
|
||||
# only look at lines where the first non-white-space character is `-`
|
||||
help_args=( ${(f)"$(
|
||||
$rg --help |
|
||||
$rg -i -- '^\s+--?[a-z0-9]|--[imnp]' |
|
||||
$rg -ior '$1' -- $'[\t /\"\'`.,](-[a-z0-9]|--[a-z0-9-]+)\\b' |
|
||||
$rg -v -- --print0 | # False positives
|
||||
$rg -- '^\s*-' |
|
||||
$rg -io -- '[\t ,](-[a-z0-9]|--[a-z0-9-]+)\b' |
|
||||
tr -d '\t ,' |
|
||||
sort -u
|
||||
)"} )
|
||||
|
||||
# 'Parse' options out of the completion function
|
||||
comp_args=( ${(f)"$( get_comp_args $_rg )"} )
|
||||
|
||||
# Note that we currently exclude hidden (!...) options; matching these
|
||||
# properly against the `--help` output could be irritating
|
||||
comp_args=( ${comp_args#\(*\)} ) # Strip excluded options
|
||||
comp_args=( ${comp_args#\*} ) # Strip repetition indicator
|
||||
comp_args=( ${comp_args%%-[:[]*} ) # Strip everything after -optname-
|
||||
comp_args=( ${comp_args%%[:+=[]*} ) # Strip everything after other optspecs
|
||||
comp_args=( ${comp_args##[^-]*} ) # Remove non-options
|
||||
comp_args=( ${(f)"$( print -rl - $comp_args | sort -u )"} )
|
||||
|
||||
# This probably isn't necessary, but we should ensure the same order
|
||||
comp_args=( ${(f)"$( printf '%s\n' $comp_args | sort -u )"} )
|
||||
|
||||
(( $#help_args )) || {
|
||||
print -r >&2 'Failed to get help_args'
|
||||
printf >&2 'Failed to get help_args\n'
|
||||
return 1
|
||||
}
|
||||
(( $#comp_args )) || {
|
||||
print -r >&2 'Failed to get comp_args'
|
||||
printf >&2 'Failed to get comp_args\n'
|
||||
return 1
|
||||
}
|
||||
|
||||
@@ -76,12 +73,12 @@ main() {
|
||||
diff -U2 \
|
||||
--label '`rg --help`' \
|
||||
--label '`_rg`' \
|
||||
=( print -rl - $help_args ) =( print -rl - $comp_args )
|
||||
=( printf '%s\n' $help_args ) =( printf '%s\n' $comp_args )
|
||||
else
|
||||
diff -U2 \
|
||||
-L '`rg --help`' \
|
||||
-L '`_rg`' \
|
||||
=( print -rl - $help_args ) =( print -rl - $comp_args )
|
||||
=( printf '%s\n' $help_args ) =( printf '%s\n' $comp_args )
|
||||
fi
|
||||
)"
|
||||
|
||||
@@ -94,4 +91,4 @@ main() {
|
||||
return 0
|
||||
}
|
||||
|
||||
main "$@"
|
||||
main "${@}"
|
||||
|
||||
88
ci/utils.sh
88
ci/utils.sh
@@ -1,19 +1,5 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Various utility functions used through CI.
|
||||
|
||||
# Finds Cargo's `OUT_DIR` directory from the most recent build.
|
||||
#
|
||||
# This requires one parameter corresponding to the target directory
|
||||
# to search for the build output.
|
||||
cargo_out_dir() {
|
||||
# This works by finding the most recent stamp file, which is produced by
|
||||
# every ripgrep build.
|
||||
target_dir="$1"
|
||||
find "$target_dir" -name ripgrep-stamp -print0 \
|
||||
| xargs -0 ls -t \
|
||||
| head -n1 \
|
||||
| xargs dirname
|
||||
mktempd() {
|
||||
echo $(mktemp -d 2>/dev/null || mktemp -d -t tmp)
|
||||
}
|
||||
|
||||
host() {
|
||||
@@ -27,12 +13,37 @@ host() {
|
||||
esac
|
||||
}
|
||||
|
||||
architecture() {
|
||||
gcc_prefix() {
|
||||
case "$TARGET" in
|
||||
x86_64-*)
|
||||
aarch64-unknown-linux-gnu)
|
||||
echo aarch64-linux-gnu-
|
||||
;;
|
||||
arm*-gnueabihf)
|
||||
echo arm-linux-gnueabihf-
|
||||
;;
|
||||
*)
|
||||
return
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
dobin() {
|
||||
[ -z $MAKE_DEB ] && die 'dobin: $MAKE_DEB not set'
|
||||
[ $# -lt 1 ] && die "dobin: at least one argument needed"
|
||||
|
||||
local f prefix=$(gcc_prefix)
|
||||
for f in "$@"; do
|
||||
install -m0755 $f $dtd/debian/usr/bin/
|
||||
${prefix}strip -s $dtd/debian/usr/bin/$(basename $f)
|
||||
done
|
||||
}
|
||||
|
||||
architecture() {
|
||||
case $1 in
|
||||
x86_64-unknown-linux-gnu|x86_64-unknown-linux-musl)
|
||||
echo amd64
|
||||
;;
|
||||
i686-*|i586-*|i386-*)
|
||||
i686-unknown-linux-gnu|i686-unknown-linux-musl)
|
||||
echo i386
|
||||
;;
|
||||
arm*-unknown-linux-gnueabihf)
|
||||
@@ -43,42 +54,3 @@ architecture() {
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
gcc_prefix() {
|
||||
case "$(architecture)" in
|
||||
armhf)
|
||||
echo arm-linux-gnueabihf-
|
||||
;;
|
||||
*)
|
||||
return
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
is_x86() {
|
||||
case "$(architecture)" in
|
||||
amd64|i386) return 0 ;;
|
||||
*) return 1 ;;
|
||||
esac
|
||||
}
|
||||
|
||||
is_arm() {
|
||||
case "$(architecture)" in
|
||||
armhf) return 0 ;;
|
||||
*) return 1 ;;
|
||||
esac
|
||||
}
|
||||
|
||||
is_linux() {
|
||||
case "$TRAVIS_OS_NAME" in
|
||||
linux) return 0 ;;
|
||||
*) return 1 ;;
|
||||
esac
|
||||
}
|
||||
|
||||
is_osx() {
|
||||
case "$TRAVIS_OS_NAME" in
|
||||
osx) return 0 ;;
|
||||
*) return 1 ;;
|
||||
esac
|
||||
}
|
||||
|
||||
8
compile
Executable file
8
compile
Executable file
@@ -0,0 +1,8 @@
|
||||
#!/bin/sh
|
||||
|
||||
# export RUSTFLAGS="-C target-feature=+ssse3"
|
||||
# cargo build --release --features 'simd-accel'
|
||||
|
||||
export RUSTFLAGS="-C target-cpu=native"
|
||||
cargo build --release --features 'simd-accel avx-accel'
|
||||
# cargo build --release --features 'simd-accel avx-accel' --target x86_64-unknown-linux-musl
|
||||
615
complete/_rg
615
complete/_rg
@@ -6,357 +6,155 @@
|
||||
# Run ci/test_complete.sh after building to ensure that the options supported by
|
||||
# this function stay in synch with the `rg` binary.
|
||||
#
|
||||
# For convenience, a completion reference guide is included at the bottom of
|
||||
# this file.
|
||||
# @see https://github.com/zsh-users/zsh/blob/master/Etc/completion-style-guide
|
||||
#
|
||||
# Originally based on code from the zsh-users project — see copyright notice
|
||||
# below.
|
||||
# Based on code from the zsh-users project — see copyright notice below.
|
||||
|
||||
_rg() {
|
||||
local curcontext=$curcontext no='!' descr ret=1
|
||||
local -a context line state state_descr args tmp suf
|
||||
local -A opt_args
|
||||
local state_descr ret curcontext="${curcontext:-}"
|
||||
local -a context line state
|
||||
local -A opt_args val_args
|
||||
local -a rg_args
|
||||
|
||||
# ripgrep has many options which negate the effect of a more common one — for
|
||||
# example, `--no-column` to negate `--column`, and `--messages` to negate
|
||||
# `--no-messages`. There are so many of these, and they're so infrequently
|
||||
# used, that some users will probably find it irritating if they're completed
|
||||
# indiscriminately, so let's not do that unless either the current prefix
|
||||
# matches one of those negation options or the user has the `complete-all`
|
||||
# style set. Note that this prefix check has to be updated manually to account
|
||||
# for all of the potential negation options listed below!
|
||||
if
|
||||
# We also want to list all of these options during testing
|
||||
[[ $_RG_COMPLETE_LIST_ARGS == (1|t*|y*) ]] ||
|
||||
# (--[imnp]* => --ignore*, --messages, --no-*, --pcre2-unicode)
|
||||
[[ $PREFIX$SUFFIX == --[imnp]* ]] ||
|
||||
zstyle -t ":complete:$curcontext:*" complete-all
|
||||
then
|
||||
no=
|
||||
fi
|
||||
|
||||
# We make heavy use of argument groups here to prevent the option specs from
|
||||
# growing unwieldy. These aren't supported in zsh <5.4, though, so we'll strip
|
||||
# them out below if necessary. This makes the exclusions inaccurate on those
|
||||
# older versions, but oh well — it's not that big a deal
|
||||
args=(
|
||||
+ '(exclusive)' # Misc. fully exclusive options
|
||||
'(: * -)'{-h,--help}'[display help information]'
|
||||
'(: * -)'{-V,--version}'[display version information]'
|
||||
'(: * -)'--pcre2-version'[print the version of PCRE2 used by ripgrep, if available]'
|
||||
|
||||
+ '(buffered)' # buffering options
|
||||
'--line-buffered[force line buffering]'
|
||||
$no"--no-line-buffered[don't force line buffering]"
|
||||
'--block-buffered[force block buffering]'
|
||||
$no"--no-block-buffered[don't force block buffering]"
|
||||
|
||||
+ '(case)' # Case-sensitivity options
|
||||
{-i,--ignore-case}'[search case-insensitively]'
|
||||
{-s,--case-sensitive}'[search case-sensitively]'
|
||||
{-S,--smart-case}'[search case-insensitively if pattern is all lowercase]'
|
||||
|
||||
+ '(context-a)' # Context (after) options
|
||||
'(context-c)'{-A+,--after-context=}'[specify lines to show after each match]:number of lines'
|
||||
|
||||
+ '(context-b)' # Context (before) options
|
||||
'(context-c)'{-B+,--before-context=}'[specify lines to show before each match]:number of lines'
|
||||
|
||||
+ '(context-c)' # Context (combined) options
|
||||
'(context-a context-b)'{-C+,--context=}'[specify lines to show before and after each match]:number of lines'
|
||||
|
||||
+ '(column)' # Column options
|
||||
'--column[show column numbers for matches]'
|
||||
$no"--no-column[don't show column numbers for matches]"
|
||||
|
||||
+ '(count)' # Counting options
|
||||
{-c,--count}'[only show count of matching lines for each file]'
|
||||
'--count-matches[only show count of individual matches for each file]'
|
||||
|
||||
+ '(encoding)' # Encoding options
|
||||
{-E+,--encoding=}'[specify text encoding of files to search]: :_rg_encodings'
|
||||
$no'--no-encoding[use default text encoding]'
|
||||
|
||||
+ file # File-input options
|
||||
'(1)*'{-f+,--file=}'[specify file containing patterns to search for]: :_files'
|
||||
|
||||
+ '(file-match)' # Files with/without match options
|
||||
'(stats)'{-l,--files-with-matches}'[only show names of files with matches]'
|
||||
'(stats)--files-without-match[only show names of files without matches]'
|
||||
|
||||
+ '(file-name)' # File-name options
|
||||
{-H,--with-filename}'[show file name for matches]'
|
||||
{-I,--no-filename}"[don't show file name for matches]"
|
||||
|
||||
+ '(file-system)' # File system options
|
||||
"--one-file-system[don't descend into directories on other file systems]"
|
||||
$no'--no-one-file-system[descend into directories on other file systems]'
|
||||
|
||||
+ '(fixed)' # Fixed-string options
|
||||
{-F,--fixed-strings}'[treat pattern as literal string instead of regular expression]'
|
||||
$no"--no-fixed-strings[don't treat pattern as literal string]"
|
||||
|
||||
+ '(follow)' # Symlink-following options
|
||||
{-L,--follow}'[follow symlinks]'
|
||||
$no"--no-follow[don't follow symlinks]"
|
||||
|
||||
+ glob # File-glob options
|
||||
'*'{-g+,--glob=}'[include/exclude files matching specified glob]:glob'
|
||||
'*--iglob=[include/exclude files matching specified case-insensitive glob]:glob'
|
||||
|
||||
+ '(heading)' # Heading options
|
||||
'(pretty-vimgrep)--heading[show matches grouped by file name]'
|
||||
"(pretty-vimgrep)--no-heading[don't show matches grouped by file name]"
|
||||
|
||||
+ '(hidden)' # Hidden-file options
|
||||
'--hidden[search hidden files and directories]'
|
||||
$no"--no-hidden[don't search hidden files and directories]"
|
||||
|
||||
+ '(hybrid)' # hybrid regex options
|
||||
'--auto-hybrid-regex[dynamically use PCRE2 if necessary]'
|
||||
$no"--no-auto-hybrid-regex[don't dynamically use PCRE2 if necessary]"
|
||||
|
||||
+ '(ignore)' # Ignore-file options
|
||||
"(--no-ignore-global --no-ignore-parent --no-ignore-vcs --no-ignore-dot)--no-ignore[don't respect ignore files]"
|
||||
$no'(--ignore-global --ignore-parent --ignore-vcs --ignore-dot)--ignore[respect ignore files]'
|
||||
|
||||
+ '(ignore-file-case-insensitive)' # Ignore-file case sensitivity options
|
||||
'--ignore-file-case-insensitive[process ignore files case insensitively]'
|
||||
$no'--no-ignore-file-case-insensitive[process ignore files case sensitively]'
|
||||
|
||||
+ '(ignore-global)' # Global ignore-file options
|
||||
"--no-ignore-global[don't respect global ignore files]"
|
||||
$no'--ignore-global[respect global ignore files]'
|
||||
|
||||
+ '(ignore-parent)' # Parent ignore-file options
|
||||
"--no-ignore-parent[don't respect ignore files in parent directories]"
|
||||
$no'--ignore-parent[respect ignore files in parent directories]'
|
||||
|
||||
+ '(ignore-vcs)' # VCS ignore-file options
|
||||
"--no-ignore-vcs[don't respect version control ignore files]"
|
||||
$no'--ignore-vcs[respect version control ignore files]'
|
||||
|
||||
+ '(ignore-dot)' # .ignore-file options
|
||||
"--no-ignore-dot[don't respect .ignore files]"
|
||||
$no'--ignore-dot[respect .ignore files]'
|
||||
|
||||
+ '(json)' # JSON options
|
||||
'--json[output results in JSON Lines format]'
|
||||
$no"--no-json[don't output results in JSON Lines format]"
|
||||
|
||||
+ '(line-number)' # Line-number options
|
||||
{-n,--line-number}'[show line numbers for matches]'
|
||||
{-N,--no-line-number}"[don't show line numbers for matches]"
|
||||
|
||||
+ '(line-terminator)' # Line-terminator options
|
||||
'--crlf[use CRLF as line terminator]'
|
||||
$no"--no-crlf[don't use CRLF as line terminator]"
|
||||
'(text)--null-data[use NUL as line terminator]'
|
||||
|
||||
+ '(max-columns-preview)' # max column preview options
|
||||
'--max-columns-preview[show preview for long lines (with -M)]'
|
||||
$no"--no-max-columns-preview[don't show preview for long lines (with -M)]"
|
||||
|
||||
+ '(max-depth)' # Directory-depth options
|
||||
'--max-depth=[specify max number of directories to descend]:number of directories'
|
||||
'!--maxdepth=:number of directories'
|
||||
|
||||
+ '(messages)' # Error-message options
|
||||
'(--no-ignore-messages)--no-messages[suppress some error messages]'
|
||||
$no"--messages[don't suppress error messages affected by --no-messages]"
|
||||
|
||||
+ '(messages-ignore)' # Ignore-error message options
|
||||
"--no-ignore-messages[don't show ignore-file parse error messages]"
|
||||
$no'--ignore-messages[show ignore-file parse error messages]'
|
||||
|
||||
+ '(mmap)' # mmap options
|
||||
'--mmap[search using memory maps when possible]'
|
||||
"--no-mmap[don't search using memory maps]"
|
||||
|
||||
+ '(multiline)' # Multiline options
|
||||
{-U,--multiline}'[permit matching across multiple lines]'
|
||||
$no'(multiline-dotall)--no-multiline[restrict matches to at most one line each]'
|
||||
|
||||
+ '(multiline-dotall)' # Multiline DOTALL options
|
||||
'(--no-multiline)--multiline-dotall[allow "." to match newline (with -U)]'
|
||||
$no"(--no-multiline)--no-multiline-dotall[don't allow \".\" to match newline (with -U)]"
|
||||
|
||||
+ '(only)' # Only-match options
|
||||
{-o,--only-matching}'[show only matching part of each line]'
|
||||
|
||||
+ '(passthru)' # Pass-through options
|
||||
'(--vimgrep)--passthru[show both matching and non-matching lines]'
|
||||
'!(--vimgrep)--passthrough'
|
||||
|
||||
+ '(pcre2)' # PCRE2 options
|
||||
{-P,--pcre2}'[enable matching with PCRE2]'
|
||||
$no'(pcre2-unicode)--no-pcre2[disable matching with PCRE2]'
|
||||
|
||||
+ '(pcre2-unicode)' # PCRE2 Unicode options
|
||||
$no'(--no-pcre2 --no-pcre2-unicode)--pcre2-unicode[enable PCRE2 Unicode mode (with -P)]'
|
||||
'(--no-pcre2 --pcre2-unicode)--no-pcre2-unicode[disable PCRE2 Unicode mode (with -P)]'
|
||||
|
||||
+ '(pre)' # Preprocessing options
|
||||
'(-z --search-zip)--pre=[specify preprocessor utility]:preprocessor utility:_command_names -e'
|
||||
$no'--no-pre[disable preprocessor utility]'
|
||||
|
||||
+ pre-glob # Preprocessing glob options
|
||||
'*--pre-glob[include/exclude files for preprocessing with --pre]'
|
||||
|
||||
+ '(pretty-vimgrep)' # Pretty/vimgrep display options
|
||||
'(heading)'{-p,--pretty}'[alias for --color=always --heading -n]'
|
||||
'(heading passthru)--vimgrep[show results in vim-compatible format]'
|
||||
|
||||
+ regexp # Explicit pattern options
|
||||
'(1 file)*'{-e+,--regexp=}'[specify pattern]:pattern'
|
||||
|
||||
+ '(replace)' # Replacement options
|
||||
{-r+,--replace=}'[specify string used to replace matches]:replace string'
|
||||
|
||||
+ '(sort)' # File-sorting options
|
||||
'(threads)--sort=[sort results in ascending order (disables parallelism)]:sort method:((
|
||||
none\:"no sorting"
|
||||
path\:"sort by file path"
|
||||
modified\:"sort by last modified time"
|
||||
accessed\:"sort by last accessed time"
|
||||
created\:"sort by creation time"
|
||||
))'
|
||||
'(threads)--sortr=[sort results in descending order (disables parallelism)]:sort method:((
|
||||
none\:"no sorting"
|
||||
path\:"sort by file path"
|
||||
modified\:"sort by last modified time"
|
||||
accessed\:"sort by last accessed time"
|
||||
created\:"sort by creation time"
|
||||
))'
|
||||
'!(threads)--sort-files[sort results by file path (disables parallelism)]'
|
||||
|
||||
+ '(stats)' # Statistics options
|
||||
'(--files file-match)--stats[show search statistics]'
|
||||
$no"--no-stats[don't show search statistics]"
|
||||
|
||||
+ '(text)' # Binary-search options
|
||||
{-a,--text}'[search binary files as if they were text]'
|
||||
"--binary[search binary files, don't print binary data]"
|
||||
$no"--no-binary[don't search binary files]"
|
||||
$no"(--null-data)--no-text[don't search binary files as if they were text]"
|
||||
|
||||
+ '(threads)' # Thread-count options
|
||||
'(sort)'{-j+,--threads=}'[specify approximate number of threads to use]:number of threads'
|
||||
|
||||
+ '(trim)' # Trim options
|
||||
'--trim[trim any ASCII whitespace prefix from each line]'
|
||||
$no"--no-trim[don't trim ASCII whitespace prefix from each line]"
|
||||
|
||||
+ type # Type options
|
||||
'*'{-t+,--type=}'[only search files matching specified type]: :_rg_types'
|
||||
'*--type-add=[add new glob for specified file type]: :->typespec'
|
||||
'*--type-clear=[clear globs previously defined for specified file type]: :_rg_types'
|
||||
# This should actually be exclusive with everything but other type options
|
||||
'(: *)--type-list[show all supported file types and their associated globs]'
|
||||
'*'{-T+,--type-not=}"[don't search files matching specified file type]: :_rg_types"
|
||||
|
||||
+ '(word-line)' # Whole-word/line match options
|
||||
{-w,--word-regexp}'[only show matches surrounded by word boundaries]'
|
||||
{-x,--line-regexp}'[only show matches surrounded by line boundaries]'
|
||||
|
||||
+ '(zip)' # Compression options
|
||||
'(--pre)'{-z,--search-zip}'[search in compressed files]'
|
||||
$no"--no-search-zip[don't search in compressed files]"
|
||||
|
||||
+ misc # Other options — no need to separate these at the moment
|
||||
'(-b --byte-offset)'{-b,--byte-offset}'[show 0-based byte offset for each matching line]'
|
||||
'--color=[specify when to use colors in output]:when:((
|
||||
never\:"never use colors"
|
||||
auto\:"use colors or not based on stdout, TERM, etc."
|
||||
always\:"always use colors"
|
||||
ansi\:"always use ANSI colors (even on Windows)"
|
||||
))'
|
||||
'*--colors=[specify color and style settings]: :->colorspec'
|
||||
# Sort by long option name to match `rg --help`
|
||||
rg_args=(
|
||||
'(-A -C --after-context --context)'{-A+,--after-context=}'[specify lines to show after each match]:number of lines'
|
||||
'(-B -C --before-context --context)'{-B+,--before-context=}'[specify lines to show before each match]:number of lines'
|
||||
'(-i -s -S --ignore-case --case-sensitive --smart-case)'{-s,--case-sensitive}'[search case-sensitively]'
|
||||
'--color=[specify when to use colors in output]:when:( never auto always ansi )'
|
||||
'*--colors=[specify color settings and styles]: :->colorspec'
|
||||
'--column[show column numbers]'
|
||||
'(-A -B -C --after-context --before-context --context)'{-C+,--context=}'[specify lines to show before and after each match]:number of lines'
|
||||
'--context-separator=[specify string used to separate non-continuous context lines in output]:separator'
|
||||
'(-c --count)'{-c,--count}'[only show count of matches for each file]'
|
||||
'--debug[show debug messages]'
|
||||
'--dfa-size-limit=[specify upper size limit of generated DFA]:DFA size (bytes)'
|
||||
"(1 stats)--files[show each file that would be searched (but don't search)]"
|
||||
'*--ignore-file=[specify additional ignore file]:ignore file:_files'
|
||||
'--dfa-size-limit=[specify upper size limit of generated DFA]:DFA size'
|
||||
'(-E --encoding)'{-E+,--encoding=}'[specify text encoding of files to search]: :_rg_encodings'
|
||||
'*'{-f+,--file=}'[specify file containing patterns to search for]:file:_files'
|
||||
"(1)--files[show each file that would be searched (but don't search)]"
|
||||
'(-l --files-with-matches --files-without-match)'{-l,--files-with-matches}'[only show names of files with matches]'
|
||||
'(-l --files-with-matches --files-without-match)--files-without-match[only show names of files without matches]'
|
||||
'(-F --fixed-strings)'{-F,--fixed-strings}'[treat pattern as literal string instead of regular expression]'
|
||||
'(-L --follow)'{-L,--follow}'[follow symlinks]'
|
||||
'*'{-g+,--glob=}'[include or exclude files for searching that match the specified glob]:glob'
|
||||
'(: -)'{-h,--help}'[display help information]'
|
||||
'(-p --no-heading --pretty --vimgrep)--heading[show matches grouped by file name]'
|
||||
'--hidden[search hidden files and directories]'
|
||||
'*--iglob=[include or exclude files for searching that match the specified case-insensitive glob]:glob'
|
||||
'(-i -s -S --case-sensitive --ignore-case --smart-case)'{-i,--ignore-case}'[search case-insensitively]'
|
||||
'--ignore-file=[specify additional ignore file]:file:_files'
|
||||
'(-v --invert-match)'{-v,--invert-match}'[invert matching]'
|
||||
'(-n -N --line-number --no-line-number)'{-n,--line-number}'[show line numbers]'
|
||||
'(-w -x --line-regexp --word-regexp)'{-x,--line-regexp}'[only show matches surrounded by line boundaries]'
|
||||
'(-M --max-columns)'{-M+,--max-columns=}'[specify max length of lines to print]:number of bytes'
|
||||
'(-m --max-count)'{-m+,--max-count=}'[specify max number of matches per file]:number of matches'
|
||||
'--max-filesize=[specify size above which files should be ignored]:file size (bytes)'
|
||||
"--no-config[don't load configuration files]"
|
||||
'--max-filesize=[specify size above which files should be ignored]:file size'
|
||||
'--maxdepth=[specify max number of directories to descend]:number of directories'
|
||||
'(--mmap --no-mmap)--mmap[search using memory maps when possible]'
|
||||
'(-H --with-filename --no-filename)--no-filename[suppress all file names]'
|
||||
"(-p --heading --pretty --vimgrep)--no-heading[don't group matches by file name]"
|
||||
"(--no-ignore-parent)--no-ignore[don't respect ignore files]"
|
||||
"--no-ignore-parent[don't respect ignore files in parent directories]"
|
||||
"--no-ignore-vcs[don't respect version control ignore files]"
|
||||
'(-n -N --line-number --no-line-number)'{-N,--no-line-number}'[suppress line numbers]'
|
||||
'--no-messages[suppress all error messages]'
|
||||
"(--mmap --no-mmap)--no-mmap[don't search using memory maps]"
|
||||
'(-0 --null)'{-0,--null}'[print NUL byte after file names]'
|
||||
'(-o --only-matching -r --replace)'{-o,--only-matching}'[show only matching part of each line]'
|
||||
'--path-separator=[specify path separator to use when printing file names]:separator'
|
||||
'(-p --heading --no-heading --pretty --vimgrep)'{-p,--pretty}'[alias for --color=always --heading -n]'
|
||||
'(-q --quiet)'{-q,--quiet}'[suppress normal output]'
|
||||
'--regex-size-limit=[specify upper size limit of compiled regex]:regex size (bytes)'
|
||||
'--regex-size-limit=[specify upper size limit of compiled regex]:regex size'
|
||||
'(1 -f --file)*'{-e+,--regexp=}'[specify pattern]:pattern'
|
||||
'(-o --only-matching -r --replace)'{-r+,--replace=}'[specify string used to replace matches]:replace string'
|
||||
'(-i -s -S --ignore-case --case-sensitive --smart-case)'{-S,--smart-case}'[search case-insensitively if the pattern is all lowercase]'
|
||||
'(-j --threads)--sort-files[sort results by file path (disables parallelism)]'
|
||||
'(-a --text)'{-a,--text}'[search binary files as if they were text]'
|
||||
'(-j --sort-files --threads)'{-j+,--threads=}'[specify approximate number of threads to use]:number of threads'
|
||||
'*'{-t+,--type=}'[only search files matching specified type]: :_rg_types'
|
||||
'*--type-add=[add new glob for file type]: :->typespec'
|
||||
'*--type-clear=[clear globs previously defined for specified file type]: :_rg_types'
|
||||
# This should actually be exclusive with everything but other type options
|
||||
'(:)--type-list[show all supported file types and their associated globs]'
|
||||
'*'{-T+,--type-not=}"[don't search files matching specified type]: :_rg_types"
|
||||
'*'{-u,--unrestricted}'[reduce level of "smart" searching]'
|
||||
|
||||
+ operand # Operands
|
||||
'(--files --type-list file regexp)1: :_guard "^-*" pattern'
|
||||
'(--type-list)*: :_files'
|
||||
'(: -)'{-V,--version}'[display version information]'
|
||||
'(-p --heading --no-heading --pretty)--vimgrep[show results in vim-compatible format]'
|
||||
'(-H --no-filename --with-filename)'{-H,--with-filename}'[display the file name for matches]'
|
||||
'(-w -x --line-regexp --word-regexp)'{-w,--word-regexp}'[only show matches surrounded by word boundaries]'
|
||||
'(-e -f --file --files --regexp --type-list)1: :_rg_pattern'
|
||||
'(--type-list)*:file:_files'
|
||||
)
|
||||
|
||||
# This is used with test_complete.sh to verify that there are no options
|
||||
# listed in the help output that aren't also defined here
|
||||
[[ $_RG_COMPLETE_LIST_ARGS == (1|t*|y*) ]] && {
|
||||
print -rl - $args
|
||||
[[ ${_RG_COMPLETE_LIST_ARGS:-} == (1|t*|y*) ]] && {
|
||||
printf '%s\n' "${rg_args[@]}"
|
||||
return 0
|
||||
}
|
||||
|
||||
# Strip out argument groups where unsupported (see above)
|
||||
[[ $ZSH_VERSION == (4|5.<0-3>)(.*)# ]] &&
|
||||
args=( ${(@)args:#(#i)(+|[a-z0-9][a-z0-9_-]#|\([a-z0-9][a-z0-9_-]#\))} )
|
||||
_arguments -s -S : "${rg_args[@]}" && return 0
|
||||
|
||||
_arguments -C -s -S : $args && ret=0
|
||||
while (( $#state )); do
|
||||
case "${state[1]}" in
|
||||
colorspec)
|
||||
# @todo I don't like this because it allows you to do weird things like
|
||||
# `line:line:bg:`. Also, i would like the `compadd -q` behaviour
|
||||
[[ -prefix *:none: ]] && return 1
|
||||
[[ -prefix *:*:*:* ]] && return 1
|
||||
|
||||
case $state in
|
||||
colorspec)
|
||||
if [[ ${IPREFIX#--*=}$PREFIX == [^:]# ]]; then
|
||||
suf=( -qS: )
|
||||
tmp=(
|
||||
'column:specify coloring for column numbers'
|
||||
'line:specify coloring for line numbers'
|
||||
'match:specify coloring for match text'
|
||||
'path:specify coloring for file names'
|
||||
)
|
||||
descr='color/style type'
|
||||
elif [[ ${IPREFIX#--*=}$PREFIX == (column|line|match|path):[^:]# ]]; then
|
||||
suf=( -qS: )
|
||||
tmp=(
|
||||
'none:clear color/style for type'
|
||||
'bg:specify background color'
|
||||
'fg:specify foreground color'
|
||||
'style:specify text style'
|
||||
)
|
||||
descr='color/style attribute'
|
||||
elif [[ ${IPREFIX#--*=}$PREFIX == [^:]##:(bg|fg):[^:]# ]]; then
|
||||
tmp=( black blue green red cyan magenta yellow white )
|
||||
descr='color name or r,g,b'
|
||||
elif [[ ${IPREFIX#--*=}$PREFIX == [^:]##:style:[^:]# ]]; then
|
||||
tmp=( {,no}bold {,no}intense {,no}underline )
|
||||
descr='style name'
|
||||
else
|
||||
_message -e colorspec 'no more arguments'
|
||||
fi
|
||||
_values -S ':' 'color/style type' \
|
||||
'column[specify coloring for column numbers]: :->attribute' \
|
||||
'line[specify coloring for line numbers]: :->attribute' \
|
||||
'match[specify coloring for match text]: :->attribute' \
|
||||
'path[specify color for file names]: :->attribute' && return 0
|
||||
|
||||
(( $#tmp )) && {
|
||||
compset -P '*:'
|
||||
_describe -t colorspec $descr tmp $suf && ret=0
|
||||
}
|
||||
;;
|
||||
[[ "${state}" == 'attribute' ]] &&
|
||||
_values -S ':' 'color/style attribute' \
|
||||
'none[clear color/style for type]' \
|
||||
'bg[specify background color]: :->color' \
|
||||
'fg[specify foreground color]: :->color' \
|
||||
'style[specify text style]: :->style' && return 0
|
||||
|
||||
typespec)
|
||||
if compset -P '[^:]##:include:'; then
|
||||
_sequence -s , _rg_types && ret=0
|
||||
# @todo This bit in particular could be better, but it's a little
|
||||
# complex, and attempting to solve it seems to run us up against a crash
|
||||
# bug — zsh # 40362
|
||||
elif compset -P '[^:]##:'; then
|
||||
_message 'glob or include directive' && ret=1
|
||||
elif [[ ! -prefix *:* ]]; then
|
||||
_rg_types -qS : && ret=0
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
[[ "${state}" == 'color' ]] &&
|
||||
_values -S ':' 'color value' \
|
||||
black blue green red cyan magenta yellow white && return 0
|
||||
|
||||
return ret
|
||||
[[ "${state}" == 'style' ]] &&
|
||||
_values -S ':' 'style value' \
|
||||
bold nobold intense nointense && return 0
|
||||
;;
|
||||
|
||||
typespec)
|
||||
if compset -P '[^:]##:include:'; then
|
||||
_sequence -s ',' _rg_types && return 0
|
||||
# @todo This bit in particular could be better, but it's a little
|
||||
# complex, and attempting to solve it seems to run us up against a crash
|
||||
# bug — zsh # 40362
|
||||
elif compset -P '[^:]##:'; then
|
||||
_message 'glob or include directive' && return 1
|
||||
elif [[ ! -prefix *:* ]]; then
|
||||
_rg_types -qS ':' && return 0
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
shift state
|
||||
done
|
||||
|
||||
return 1
|
||||
}
|
||||
|
||||
# zsh 5.1 refuses to complete options if a 'match-less' operand like our pattern
|
||||
# could be 'completed' instead. We can use _guard() to avoid this problem, but
|
||||
# it introduces another one: zsh won't print the message if we try to complete
|
||||
# the pattern after having passed `--`. To work around *that* problem, we can
|
||||
# use this function to bypass the _guard() when `--` is on the command line.
|
||||
# This is inaccurate (it'd get confused by e.g. `rg -e --`), but zsh's handling
|
||||
# of `--` isn't accurate anyway
|
||||
_rg_pattern() {
|
||||
if (( ${words[(I)--]} )); then
|
||||
_message 'pattern'
|
||||
else
|
||||
_guard '^-*' 'pattern'
|
||||
fi
|
||||
}
|
||||
|
||||
# Complete encodings
|
||||
@@ -389,10 +187,10 @@ _rg_encodings() {
|
||||
shift{-,_}jis csshiftjis {,x-}sjis ms_kanji ms932
|
||||
utf{,-}8 utf-16{,be,le} unicode-1-1-utf-8
|
||||
windows-{31j,874,949,125{0..8}} dos-874 tis-620 ansi_x3.4-1968
|
||||
x-user-defined auto none
|
||||
x-user-defined auto
|
||||
)
|
||||
|
||||
_wanted encodings expl encoding compadd -a "$@" - _encodings
|
||||
_wanted rg-encodings expl 'encoding' compadd -a "${@}" - _encodings
|
||||
}
|
||||
|
||||
# Complete file types
|
||||
@@ -400,163 +198,12 @@ _rg_types() {
|
||||
local -a expl
|
||||
local -aU _types
|
||||
|
||||
_types=( ${(@)${(f)"$( _call_program types rg --type-list )"}%%:*} )
|
||||
_types=( ${${(f)"$( _call_program rg-types rg --type-list )"}%%:*} )
|
||||
|
||||
_wanted types expl 'file type' compadd -a "$@" - _types
|
||||
_wanted rg-types expl 'file type' compadd -a "${@}" - _types
|
||||
}
|
||||
|
||||
_rg "$@"
|
||||
|
||||
################################################################################
|
||||
# ZSH COMPLETION REFERENCE
|
||||
#
|
||||
# For the convenience of developers who aren't especially familiar with zsh
|
||||
# completion functions, a brief reference guide follows. This is in no way
|
||||
# comprehensive; it covers just enough of the basic structure, syntax, and
|
||||
# conventions to help someone make simple changes like adding new options. For
|
||||
# more complete documentation regarding zsh completion functions, please see the
|
||||
# following:
|
||||
#
|
||||
# * http://zsh.sourceforge.net/Doc/Release/Completion-System.html
|
||||
# * https://github.com/zsh-users/zsh/blob/master/Etc/completion-style-guide
|
||||
#
|
||||
# OVERVIEW
|
||||
#
|
||||
# Most zsh completion functions are defined in terms of `_arguments`, which is a
|
||||
# shell function that takes a series of argument specifications. The specs for
|
||||
# `rg` are stored in an array, which is common for more complex functions; the
|
||||
# elements of the array are passed to `_arguments` on invocation.
|
||||
#
|
||||
# ARGUMENT-SPECIFICATION SYNTAX
|
||||
#
|
||||
# The following is a contrived example of the argument specs for a simple tool:
|
||||
#
|
||||
# '(: * -)'{-h,--help}'[display help information]'
|
||||
# '(-q -v --quiet --verbose)'{-q,--quiet}'[decrease output verbosity]'
|
||||
# '!(-q -v --quiet --verbose)--silent'
|
||||
# '(-q -v --quiet --verbose)'{-v,--verbose}'[increase output verbosity]'
|
||||
# '--color=[specify when to use colors]:when:(always never auto)'
|
||||
# '*:example file:_files'
|
||||
#
|
||||
# Although there may appear to be six specs here, there are actually nine; we
|
||||
# use brace expansion to combine specs for options that go by multiple names,
|
||||
# like `-q` and `--quiet`. This is customary, and ties in with the fact that zsh
|
||||
# merges completion possibilities together when they have the same description.
|
||||
#
|
||||
# The first line defines the option `-h`/`--help`. With most tools, it isn't
|
||||
# useful to complete anything after `--help` because it effectively overrides
|
||||
# all others; the `(: * -)` at the beginning of the spec tells zsh not to
|
||||
# complete any other operands (`:` and `*`) or options (`-`) after this one has
|
||||
# been used. The `[...]` at the end associates a description with `-h`/`--help`;
|
||||
# as mentioned, zsh will see the identical descriptions and merge these options
|
||||
# together when offering completion possibilities.
|
||||
#
|
||||
# The next line defines `-q`/`--quiet`. Here we don't want to suppress further
|
||||
# completions entirely, but we don't want to offer `-q` if `--quiet` has been
|
||||
# given (since they do the same thing), nor do we want to offer `-v` (since it
|
||||
# doesn't make sense to be quiet and verbose at the same time). We don't need to
|
||||
# tell zsh not to offer `--quiet` a second time, since that's the default
|
||||
# behaviour, but since this line expands to two specs describing `-q` *and*
|
||||
# `--quiet` we do need to explicitly list all of them here.
|
||||
#
|
||||
# The next line defines a hidden option `--silent` — maybe it's a deprecated
|
||||
# synonym for `--quiet`. The leading `!` indicates that zsh shouldn't offer this
|
||||
# option during completion. The benefit of providing a spec for an option that
|
||||
# shouldn't be completed is that, if someone *does* use it, we can correctly
|
||||
# suppress completion of other options afterwards.
|
||||
#
|
||||
# The next line defines `-v`/`--verbose`; this works just like `-q`/`--quiet`.
|
||||
#
|
||||
# The next line defines `--color`. In this example, `--color` doesn't have a
|
||||
# corresponding short option, so we don't need to use brace expansion. Further,
|
||||
# there are no other options it's exclusive with (just itself), so we don't need
|
||||
# to define those at the beginning. However, it does take a mandatory argument.
|
||||
# The `=` at the end of `--color=` indicates that the argument may appear either
|
||||
# like `--color always` or like `--color=always`; this is how most GNU-style
|
||||
# command-line tools work. The corresponding short option would normally use `+`
|
||||
# — for example, `-c+` would allow either `-c always` or `-calways`. For this
|
||||
# option, the arguments are known ahead of time, so we can simply list them in
|
||||
# parentheses at the end (`when` is used as the description for the argument).
|
||||
#
|
||||
# The last line defines an operand (a non-option argument). In this example, the
|
||||
# operand can be used any number of times (the leading `*`), and it should be a
|
||||
# file path, so we tell zsh to call the `_files` function to complete it. The
|
||||
# `example file` in the middle is the description to use for this operand; we
|
||||
# could use a space instead to accept the default provided by `_files`.
|
||||
#
|
||||
# GROUPING ARGUMENT SPECIFICATIONS
|
||||
#
|
||||
# Newer versions of zsh support grouping argument specs together. All specs
|
||||
# following a `+` and then a group name are considered to be members of the
|
||||
# named group. Grouping is useful mostly for organisational purposes; it makes
|
||||
# the relationship between different options more obvious, and makes it easier
|
||||
# to specify exclusions.
|
||||
#
|
||||
# We could rewrite our example above using grouping as follows:
|
||||
#
|
||||
# '(: * -)'{-h,--help}'[display help information]'
|
||||
# '--color=[specify when to use colors]:when:(always never auto)'
|
||||
# '*:example file:_files'
|
||||
# + '(verbosity)'
|
||||
# {-q,--quiet}'[decrease output verbosity]'
|
||||
# '!--silent'
|
||||
# {-v,--verbose}'[increase output verbosity]'
|
||||
#
|
||||
# Here we take advantage of a useful feature of spec grouping — when the group
|
||||
# name is surrounded by parentheses, as in `(verbosity)`, it tells zsh that all
|
||||
# of the options in that group are exclusive with each other. As a result, we
|
||||
# don't need to manually list out the exclusions at the beginning of each
|
||||
# option.
|
||||
#
|
||||
# Groups can also be referred to by name in other argument specs; for example:
|
||||
#
|
||||
# '(xyz)--aaa' '*: :_files'
|
||||
# + xyz --xxx --yyy --zzz
|
||||
#
|
||||
# Here we use the group name `xyz` to tell zsh that `--xxx`, `--yyy`, and
|
||||
# `--zzz` are not to be completed after `--aaa`. This makes the exclusion list
|
||||
# much more compact and reusable.
|
||||
#
|
||||
# CONVENTIONS
|
||||
#
|
||||
# zsh completion functions generally adhere to the following conventions:
|
||||
#
|
||||
# * Use two spaces for indentation
|
||||
# * Combine specs for options with different names using brace expansion
|
||||
# * In combined specs, list the short option first (as in `{-a,--text}`)
|
||||
# * Use `+` or `=` as described above for options that take arguments
|
||||
# * Provide a description for all options, option-arguments, and operands
|
||||
# * Capitalise/punctuate argument descriptions as phrases, not complete
|
||||
# sentences — 'display help information', never 'Display help information.'
|
||||
# (but still capitalise acronyms and proper names)
|
||||
# * Write argument descriptions as verb phrases — 'display x', 'enable y',
|
||||
# 'use z'
|
||||
# * Word descriptions to make it clear when an option expects an argument;
|
||||
# usually this is done with the word 'specify', as in 'specify x' or
|
||||
# 'use specified x')
|
||||
# * Write argument descriptions as tersely as possible — for example, articles
|
||||
# like 'a' and 'the' should be omitted unless it would be confusing
|
||||
#
|
||||
# Other conventions currently used by this function:
|
||||
#
|
||||
# * Order argument specs alphabetically by group name, then option name
|
||||
# * Group options that are directly related, mutually exclusive, or frequently
|
||||
# referenced by other argument specs
|
||||
# * Use only characters in the set [a-z0-9_-] in group names
|
||||
# * Order exclusion lists as follows: short options, long options, groups
|
||||
# * Use American English in descriptions
|
||||
# * Use 'don't' in descriptions instead of 'do not'
|
||||
# * Word descriptions for related options as similarly as possible. For example,
|
||||
# `--foo[enable foo]` and `--no-foo[disable foo]`, or `--foo[use foo]` and
|
||||
# `--no-foo[don't use foo]`
|
||||
# * Word descriptions to make it clear when an option only makes sense with
|
||||
# another option, usually by adding '(with -x)' to the end
|
||||
# * Don't quote strings or variables unnecessarily. When quotes are required,
|
||||
# prefer single-quotes to double-quotes
|
||||
# * Prefix option specs with `$no` when the option serves only to negate the
|
||||
# behaviour of another option that must be provided explicitly by the user.
|
||||
# This prevents rarely used options from cluttering up the completion menu
|
||||
################################################################################
|
||||
_rg "${@}"
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
# Copyright (c) 2011 Github zsh-users - http://github.com/zsh-users
|
||||
|
||||
5
doc/convert-to-man
Executable file
5
doc/convert-to-man
Executable file
@@ -0,0 +1,5 @@
|
||||
#!/bin/sh -e
|
||||
|
||||
pandoc -s -t man rg.1.md -o rg.1
|
||||
sed -i.bak 's/\.TH.*/.TH "rg" "1"/g' rg.1
|
||||
rm -f rg.1.bak # BSD `sed` requires the creation of a back-up file
|
||||
569
doc/rg.1
Normal file
569
doc/rg.1
Normal file
@@ -0,0 +1,569 @@
|
||||
.\" Automatically generated by Pandoc 1.19.2.1
|
||||
.\"
|
||||
.TH "rg" "1"
|
||||
.hy
|
||||
.SH NAME
|
||||
.PP
|
||||
rg \- recursively search current directory for lines matching a pattern
|
||||
.SH SYNOPSIS
|
||||
.PP
|
||||
rg [\f[I]options\f[]] \f[I]PATTERN\f[] [\f[I]path\f[] ...]
|
||||
.PP
|
||||
rg [\f[I]options\f[]] [\-e \f[I]PATTERN\f[] ...] [\-f \f[I]FILE\f[] ...]
|
||||
[\f[I]path\f[] ...]
|
||||
.PP
|
||||
rg [\f[I]options\f[]] \-\-files [\f[I]path\f[] ...]
|
||||
.PP
|
||||
rg [\f[I]options\f[]] \-\-type\-list
|
||||
.PP
|
||||
rg [\f[I]options\f[]] \-\-help
|
||||
.PP
|
||||
rg [\f[I]options\f[]] \-\-version
|
||||
.SH DESCRIPTION
|
||||
.PP
|
||||
ripgrep (rg) combines the usability of The Silver Searcher (an ack
|
||||
clone) with the raw speed of grep.
|
||||
.PP
|
||||
ripgrep\[aq]s regex engine uses finite automata and guarantees linear
|
||||
time searching.
|
||||
Because of this, features like backreferences and arbitrary lookaround
|
||||
are not supported.
|
||||
.PP
|
||||
Note that ripgrep may abort unexpectedly when using default settings if
|
||||
it searches a file that is simultaneously truncated.
|
||||
This behavior can be avoided by passing the \-\-no\-mmap flag.
|
||||
.PP
|
||||
Project home page: https://github.com/BurntSushi/ripgrep
|
||||
.SH COMMON OPTIONS
|
||||
.TP
|
||||
.B \-a, \-\-text
|
||||
Search binary files as if they were text.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-c, \-\-count
|
||||
Only show count of line matches for each file.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-\-color \f[I]WHEN\f[]
|
||||
Whether to use color in the output.
|
||||
Valid values are never, auto, always or ansi.
|
||||
The default is auto.
|
||||
When always is used, coloring is attempted based on your environment.
|
||||
When ansi is used, coloring is forcefully done using ANSI escape color
|
||||
codes.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-e, \-\-regexp \f[I]PATTERN\f[] ...
|
||||
Use PATTERN to search.
|
||||
This option can be provided multiple times, where all patterns given are
|
||||
searched.
|
||||
This is also useful when searching for patterns that start with a dash.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-F, \-\-fixed\-strings
|
||||
Treat the pattern as a literal string instead of a regular expression.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-g, \-\-glob \f[I]GLOB\f[] ...
|
||||
Include or exclude files for searching that match the given glob.
|
||||
This always overrides any other ignore logic if there is a conflict, but
|
||||
is otherwise applied in addition to ignore files (e.g., .gitignore or
|
||||
\&.ignore).
|
||||
Multiple glob flags may be used.
|
||||
Globbing rules match .gitignore globs.
|
||||
Precede a glob with a \[aq]!\[aq] to exclude it.
|
||||
.RS
|
||||
.PP
|
||||
The \-\-glob flag subsumes the functionality of both the \-\-include and
|
||||
\-\-exclude flags commonly found in other tools.
|
||||
.PP
|
||||
Values given to \-g must be quoted or your shell will expand them and
|
||||
result in unexpected behavior.
|
||||
.PP
|
||||
Combine with the \-\-files flag to return matched filenames (i.e., to
|
||||
replicate ack/ag\[aq]s \-g flag).
|
||||
For example:
|
||||
.IP
|
||||
.nf
|
||||
\f[C]
|
||||
rg\ \-g\ \[aq]*.foo\[aq]\ \-\-files
|
||||
\f[]
|
||||
.fi
|
||||
.RE
|
||||
.TP
|
||||
.B \-h, \-\-help
|
||||
Show this usage message.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-i, \-\-ignore\-case
|
||||
Case insensitive search.
|
||||
Overridden by \-\-case\-sensitive.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-n, \-\-line\-number
|
||||
Show line numbers (1\-based).
|
||||
This is enabled by default at a tty.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-N, \-\-no\-line\-number
|
||||
Suppress line numbers.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-q, \-\-quiet
|
||||
Do not print anything to stdout.
|
||||
If a match is found in a file, stop searching that file.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-t, \-\-type \f[I]TYPE\f[] ...
|
||||
Only search files matching TYPE.
|
||||
Multiple type flags may be provided.
|
||||
Use the \-\-type\-list flag to list all available types.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-T, \-\-type\-not \f[I]TYPE\f[] ...
|
||||
Do not search files matching TYPE.
|
||||
Multiple not\-type flags may be provided.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-u, \-\-unrestricted ...
|
||||
Reduce the level of \[aq]smart\[aq] searching.
|
||||
A single \-u doesn\[aq]t respect .gitignore (etc.) files.
|
||||
Two \-u flags will search hidden files and directories.
|
||||
Three \-u flags will search binary files.
|
||||
\-uu is equivalent to \f[C]grep\ \-r\f[], and \-uuu is equivalent to
|
||||
\f[C]grep\ \-a\ \-r\f[].
|
||||
.RS
|
||||
.PP
|
||||
Note that the \-u flags are convenient aliases for other combinations of
|
||||
flags.
|
||||
\-u aliases \-\-no\-ignore.
|
||||
\-uu aliases \-\-no\-ignore \-\-hidden.
|
||||
\-uuu aliases \-\-no\-ignore \-\-hidden \-\-text.
|
||||
.RE
|
||||
.TP
|
||||
.B \-v, \-\-invert\-match
|
||||
Invert matching.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-w, \-\-word\-regexp
|
||||
Only show matches surrounded by word boundaries.
|
||||
This is equivalent to putting \\b before and after the search pattern.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-x, \-\-line\-regexp
|
||||
Only show matches surrounded by line boundaries.
|
||||
This is equivalent to putting ^...$ around the search pattern.
|
||||
.RS
|
||||
.RE
|
||||
.SH LESS COMMON OPTIONS
|
||||
.TP
|
||||
.B \-A, \-\-after\-context \f[I]NUM\f[]
|
||||
Show NUM lines after each match.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-B, \-\-before\-context \f[I]NUM\f[]
|
||||
Show NUM lines before each match.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-C, \-\-context \f[I]NUM\f[]
|
||||
Show NUM lines before and after each match.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-\-colors \f[I]SPEC\f[] ...
|
||||
This flag specifies color settings for use in the output.
|
||||
This flag may be provided multiple times.
|
||||
Settings are applied iteratively.
|
||||
Colors are limited to one of eight choices: red, blue, green, cyan,
|
||||
magenta, yellow, white and black.
|
||||
Styles are limited to nobold, bold, nointense or intense.
|
||||
.RS
|
||||
.PP
|
||||
The format of the flag is {type}:{attribute}:{value}.
|
||||
{type} should be one of path, line, column or match.
|
||||
{attribute} can be fg, bg or style.
|
||||
Value is either a color (for fg and bg) or a text style.
|
||||
A special format, {type}:none, will clear all color settings for {type}.
|
||||
.PP
|
||||
For example, the following command will change the match color to
|
||||
magenta and the background color for line numbers to yellow:
|
||||
.IP
|
||||
.nf
|
||||
\f[C]
|
||||
rg\ \-\-colors\ \[aq]match:fg:magenta\[aq]\ \-\-colors\ \[aq]line:bg:yellow\[aq]\ foo.
|
||||
\f[]
|
||||
.fi
|
||||
.RE
|
||||
.TP
|
||||
.B \-\-column
|
||||
Show column numbers (1 based) in output.
|
||||
This only shows the column numbers for the first match on each line.
|
||||
Note that this doesn\[aq]t try to account for Unicode.
|
||||
One byte is equal to one column.
|
||||
This implies \-\-line\-number.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-\-context\-separator \f[I]SEPARATOR\f[]
|
||||
The string to use when separating non\-continuous context lines.
|
||||
Escape sequences may be used.
|
||||
[default: \-\-]
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-\-debug
|
||||
Show debug messages.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-E, \-\-encoding \f[I]ENCODING\f[]
|
||||
Specify the text encoding that ripgrep will use on all files searched.
|
||||
The default value is \[aq]auto\[aq], which will cause ripgrep to do a
|
||||
best effort automatic detection of encoding on a per\-file basis.
|
||||
Other supported values can be found in the list of labels here:
|
||||
https://encoding.spec.whatwg.org/#concept\-encoding\-get
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-f, \-\-file \f[I]FILE\f[] ...
|
||||
Search for patterns from the given file, with one pattern per line.
|
||||
When this flag is used or multiple times or in combination with the
|
||||
\-e/\-\-regexp flag, then all patterns provided are searched.
|
||||
Empty pattern lines will match all input lines, and the newline is not
|
||||
counted as part of the pattern.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-\-files
|
||||
Print each file that would be searched (but don\[aq]t search).
|
||||
.RS
|
||||
.PP
|
||||
Combine with the \-g flag to return matched paths, for example:
|
||||
.IP
|
||||
.nf
|
||||
\f[C]
|
||||
rg\ \-g\ \[aq]*.foo\[aq]\ \-\-files
|
||||
\f[]
|
||||
.fi
|
||||
.RE
|
||||
.TP
|
||||
.B \-l, \-\-files\-with\-matches
|
||||
Only show path of each file with matches.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-\-files\-without\-match
|
||||
Only show path of each file with no matches.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-H, \-\-with\-filename
|
||||
Display the file name for matches.
|
||||
This is the default when more than one file is searched.
|
||||
If \-\-heading is enabled, the file name will be shown above clusters of
|
||||
matches from each file; otherwise, the file name will be shown on each
|
||||
match.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-\-no\-filename
|
||||
Never show the filename for a match.
|
||||
This is the default when one file is searched.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-\-heading
|
||||
Show the file name above clusters of matches from each file instead of
|
||||
showing the file name for every match.
|
||||
This is the default mode at a tty.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-\-no\-heading
|
||||
Don\[aq]t group matches by each file.
|
||||
If \-H/\-\-with\-filename is enabled, then file names will be shown for
|
||||
every line matched.
|
||||
This is the default mode when not at a tty.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-\-hidden
|
||||
Search hidden directories and files.
|
||||
(Hidden directories and files are skipped by default.)
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-\-iglob \f[I]GLOB\f[] ...
|
||||
Include or exclude files/directories case insensitively.
|
||||
This always overrides any other ignore logic if there is a conflict, but
|
||||
is otherwise applied in addition to ignore files (e.g., .gitignore or
|
||||
\&.ignore).
|
||||
Multiple glob flags may be used.
|
||||
Globbing rules match .gitignore globs.
|
||||
Precede a glob with a \[aq]!\[aq] to exclude it.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-\-ignore\-file \f[I]FILE\f[] ...
|
||||
Specify additional ignore files for filtering file paths.
|
||||
Ignore files should be in the gitignore format and are matched relative
|
||||
to the current working directory.
|
||||
These ignore files have lower precedence than all other ignore files.
|
||||
When specifying multiple ignore files, earlier files have lower
|
||||
precedence than later files.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-L, \-\-follow
|
||||
Follow symlinks.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-M, \-\-max\-columns \f[I]NUM\f[]
|
||||
Don\[aq]t print lines longer than this limit in bytes.
|
||||
Longer lines are omitted, and only the number of matches in that line is
|
||||
printed.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-m, \-\-max\-count \f[I]NUM\f[]
|
||||
Limit the number of matching lines per file searched to NUM.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-\-max\-filesize \f[I]NUM\f[]+\f[I]SUFFIX\f[]?
|
||||
Ignore files larger than \f[I]NUM\f[] in size.
|
||||
Directories will never be ignored.
|
||||
.RS
|
||||
.PP
|
||||
\f[I]SUFFIX\f[] is optional and may be one of K, M or G.
|
||||
These correspond to kilobytes, megabytes and gigabytes respectively.
|
||||
If omitted the input is treated as bytes.
|
||||
.RE
|
||||
.TP
|
||||
.B \-\-maxdepth \f[I]NUM\f[]
|
||||
Descend at most NUM directories below the command line arguments.
|
||||
A value of zero searches only the starting\-points themselves.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-\-mmap
|
||||
Search using memory maps when possible.
|
||||
This is enabled by default when ripgrep thinks it will be faster.
|
||||
(Note that mmap searching doesn\[aq]t currently support the various
|
||||
context related options.)
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-\-no\-messages
|
||||
Suppress all error messages.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-\-no\-mmap
|
||||
Never use memory maps, even when they might be faster.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-\-no\-ignore
|
||||
Don\[aq]t respect ignore files (.gitignore, .ignore, etc.) This implies
|
||||
\-\-no\-ignore\-parent.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-\-no\-ignore\-parent
|
||||
Don\[aq]t respect ignore files in parent directories.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-\-no\-ignore\-vcs
|
||||
Don\[aq]t respect version control ignore files (e.g., .gitignore).
|
||||
Note that .ignore files will continue to be respected.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-0, \-\-null
|
||||
Whenever a file name is printed, follow it with a NUL byte.
|
||||
This includes printing filenames before matches, and when printing a
|
||||
list of matching files such as with \-\-count, \-\-files\-with\-matches
|
||||
and \-\-files.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-o, \-\-only\-matching
|
||||
Print only the matched (non\-empty) parts of a matching line, with each
|
||||
such part on a separate output line.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-\-path\-separator \f[I]SEPARATOR\f[]
|
||||
The path separator to use when printing file paths.
|
||||
This defaults to your platform\[aq]s path separator, which is / on Unix
|
||||
and \\ on Windows.
|
||||
This flag is intended for overriding the default when the environment
|
||||
demands it (e.g., cygwin).
|
||||
A path separator is limited to a single byte.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-p, \-\-pretty
|
||||
Alias for \-\-color=always \-\-heading \-\-line\-number.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-r, \-\-replace \f[I]ARG\f[]
|
||||
Replace every match with the string given when printing search results.
|
||||
Neither this flag nor any other flag will modify your files.
|
||||
.RS
|
||||
.PP
|
||||
Capture group indices (e.g., $5) and names (e.g., $foo) are supported in
|
||||
the replacement string.
|
||||
.PP
|
||||
Note that the replacement by default replaces each match, and NOT the
|
||||
entire line.
|
||||
To replace the entire line, you should match the entire line.
|
||||
For example, to emit only the first phone numbers in each line:
|
||||
.IP
|
||||
.nf
|
||||
\f[C]
|
||||
rg\ \[aq]^.*([0\-9]{3}\-[0\-9]{3}\-[0\-9]{4}).*$\[aq]\ \-\-replace\ \[aq]$1\[aq]
|
||||
\f[]
|
||||
.fi
|
||||
.RE
|
||||
.TP
|
||||
.B \-s, \-\-case\-sensitive
|
||||
Search case sensitively (default).
|
||||
Overrides \-\-ignore\-case and \-\-smart\-case.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-S, \-\-smart\-case
|
||||
Search case insensitively if the pattern is all lowercase.
|
||||
Search case sensitively otherwise.
|
||||
This is overridden by either \-\-case\-sensitive or \-\-ignore\-case.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-\-sort\-files
|
||||
Sort results by file path.
|
||||
Note that this currently disables all parallelism and runs search in a
|
||||
single thread.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-j, \-\-threads \f[I]ARG\f[]
|
||||
The number of threads to use.
|
||||
0 means use the number of logical CPUs (capped at 12).
|
||||
[default: 0]
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-\-version
|
||||
Show the version number of ripgrep and exit.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-\-vimgrep
|
||||
Show results with every match on its own line, including line numbers
|
||||
and column numbers.
|
||||
With this option, a line with more than one match will be printed more
|
||||
than once.
|
||||
.RS
|
||||
.PP
|
||||
Recommended .vimrc configuration:
|
||||
.IP
|
||||
.nf
|
||||
\f[C]
|
||||
\ \ set\ grepprg=rg\\\ \-\-vimgrep
|
||||
\ \ set\ grepformat^=%f:%l:%c:%m
|
||||
\f[]
|
||||
.fi
|
||||
.PP
|
||||
Use :grep to grep for something, then :cn and :cp to navigate through
|
||||
the matches.
|
||||
.RE
|
||||
.SH FILE TYPE MANAGEMENT OPTIONS
|
||||
.TP
|
||||
.B \-\-type\-list
|
||||
Show all supported file types and their associated globs.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-\-type\-add \f[I]ARG\f[] ...
|
||||
Add a new glob for a particular file type.
|
||||
Only one glob can be added at a time.
|
||||
Multiple \-\-type\-add flags can be provided.
|
||||
Unless \-\-type\-clear is used, globs are added to any existing globs
|
||||
inside of ripgrep.
|
||||
Note that this must be passed to every invocation of rg.
|
||||
Type settings are NOT persisted.
|
||||
Example:
|
||||
.RS
|
||||
.IP
|
||||
.nf
|
||||
\f[C]
|
||||
\ \ rg\ \-\-type\-add\ \[aq]foo:*.foo\[aq]\ \-tfoo\ PATTERN
|
||||
\f[]
|
||||
.fi
|
||||
.PP
|
||||
\-\-type\-add can also be used to include rules from other types with
|
||||
the special include directive.
|
||||
The include directive permits specifying one or more other type names
|
||||
(separated by a comma) that have been defined and its rules will
|
||||
automatically be imported into the type specified.
|
||||
For example, to create a type called src that matches C++, Python and
|
||||
Markdown files, one can use:
|
||||
.IP
|
||||
.nf
|
||||
\f[C]
|
||||
\ \ \-\-type\-add\ \[aq]src:include:cpp,py,md\[aq]
|
||||
\f[]
|
||||
.fi
|
||||
.PP
|
||||
Additional glob rules can still be added to the src type by using the
|
||||
\-\-type\-add flag again:
|
||||
.IP
|
||||
.nf
|
||||
\f[C]
|
||||
\ \ \-\-type\-add\ \[aq]src:include:cpp,py,md\[aq]\ \-\-type\-add\ \[aq]src:*.foo\[aq]
|
||||
\f[]
|
||||
.fi
|
||||
.PP
|
||||
Note that type names must consist only of Unicode letters or numbers.
|
||||
Punctuation characters are not allowed.
|
||||
.RE
|
||||
.TP
|
||||
.B \-\-type\-clear \f[I]TYPE\f[] ...
|
||||
Clear the file type globs previously defined for TYPE.
|
||||
This only clears the default type definitions that are found inside of
|
||||
ripgrep.
|
||||
Note that this must be passed to every invocation of rg.
|
||||
.RS
|
||||
.RE
|
||||
.SH SHELL COMPLETION
|
||||
.PP
|
||||
Shell completion files are included in the release tarball for Bash,
|
||||
Fish, Zsh and PowerShell.
|
||||
.PP
|
||||
For \f[B]bash\f[], move \f[C]rg.bash\-completion\f[] to
|
||||
\f[C]$XDG_CONFIG_HOME/bash_completion\f[] or
|
||||
\f[C]/etc/bash_completion.d/\f[].
|
||||
.PP
|
||||
For \f[B]fish\f[], move \f[C]rg.fish\f[] to
|
||||
\f[C]$HOME/.config/fish/completions\f[].
|
||||
374
doc/rg.1.md
Normal file
374
doc/rg.1.md
Normal file
@@ -0,0 +1,374 @@
|
||||
# NAME
|
||||
|
||||
rg - recursively search current directory for lines matching a pattern
|
||||
|
||||
# SYNOPSIS
|
||||
|
||||
rg [*options*] *PATTERN* [*path* ...]
|
||||
|
||||
rg [*options*] [-e *PATTERN* ...] [-f *FILE* ...] [*path* ...]
|
||||
|
||||
rg [*options*] --files [*path* ...]
|
||||
|
||||
rg [*options*] --type-list
|
||||
|
||||
rg [*options*] --help
|
||||
|
||||
rg [*options*] --version
|
||||
|
||||
# DESCRIPTION
|
||||
|
||||
ripgrep (rg) combines the usability of The Silver Searcher (an ack clone) with
|
||||
the raw speed of grep.
|
||||
|
||||
ripgrep's regex engine uses finite automata and guarantees linear time
|
||||
searching. Because of this, features like backreferences and arbitrary
|
||||
lookaround are not supported.
|
||||
|
||||
Note that ripgrep may abort unexpectedly when using default settings if it
|
||||
searches a file that is simultaneously truncated. This behavior can be avoided
|
||||
by passing the --no-mmap flag.
|
||||
|
||||
Project home page: https://github.com/BurntSushi/ripgrep
|
||||
|
||||
# COMMON OPTIONS
|
||||
|
||||
-a, --text
|
||||
: Search binary files as if they were text.
|
||||
|
||||
-c, --count
|
||||
: Only show count of line matches for each file.
|
||||
|
||||
--color *WHEN*
|
||||
: Whether to use color in the output. Valid values are never, auto, always or
|
||||
ansi. The default is auto. When always is used, coloring is attempted based
|
||||
on your environment. When ansi is used, coloring is forcefully done using
|
||||
ANSI escape color codes.
|
||||
|
||||
-e, --regexp *PATTERN* ...
|
||||
: Use PATTERN to search. This option can be provided multiple times, where all
|
||||
patterns given are searched. This is also useful when searching for patterns
|
||||
that start with a dash.
|
||||
|
||||
-F, --fixed-strings
|
||||
: Treat the pattern as a literal string instead of a regular expression.
|
||||
|
||||
-g, --glob *GLOB* ...
|
||||
: Include or exclude files for searching that match the given glob. This always
|
||||
overrides any other ignore logic if there is a conflict, but is otherwise
|
||||
applied in addition to ignore files (e.g., .gitignore or .ignore). Multiple
|
||||
glob flags may be used. Globbing rules match .gitignore globs. Precede a
|
||||
glob with a '!' to exclude it.
|
||||
|
||||
The --glob flag subsumes the functionality of both the --include and
|
||||
--exclude flags commonly found in other tools.
|
||||
|
||||
Values given to -g must be quoted or your shell will expand them and result
|
||||
in unexpected behavior.
|
||||
|
||||
Combine with the --files flag to return matched filenames
|
||||
(i.e., to replicate ack/ag's -g flag). For example:
|
||||
|
||||
rg -g '*.foo' --files
|
||||
|
||||
-h, --help
|
||||
: Show this usage message.
|
||||
|
||||
-i, --ignore-case
|
||||
: Case insensitive search. Overridden by --case-sensitive.
|
||||
|
||||
-n, --line-number
|
||||
: Show line numbers (1-based). This is enabled by default at a tty.
|
||||
|
||||
-N, --no-line-number
|
||||
: Suppress line numbers.
|
||||
|
||||
-q, --quiet
|
||||
: Do not print anything to stdout. If a match is found in a file, stop
|
||||
searching that file.
|
||||
|
||||
-t, --type *TYPE* ...
|
||||
: Only search files matching TYPE. Multiple type flags may be provided. Use the
|
||||
--type-list flag to list all available types.
|
||||
|
||||
-T, --type-not *TYPE* ...
|
||||
: Do not search files matching TYPE. Multiple not-type flags may be provided.
|
||||
|
||||
-u, --unrestricted ...
|
||||
: Reduce the level of 'smart' searching. A single -u doesn't respect .gitignore
|
||||
(etc.) files. Two -u flags will search hidden files and directories. Three
|
||||
-u flags will search binary files. -uu is equivalent to `grep -r`, and -uuu
|
||||
is equivalent to `grep -a -r`.
|
||||
|
||||
Note that the -u flags are convenient aliases for other combinations of
|
||||
flags. -u aliases --no-ignore. -uu aliases --no-ignore --hidden.
|
||||
-uuu aliases --no-ignore --hidden --text.
|
||||
|
||||
-v, --invert-match
|
||||
: Invert matching.
|
||||
|
||||
-w, --word-regexp
|
||||
: Only show matches surrounded by word boundaries. This is equivalent to
|
||||
putting \\b before and after the search pattern.
|
||||
|
||||
-x, --line-regexp
|
||||
: Only show matches surrounded by line boundaries. This is equivalent to
|
||||
putting ^...$ around the search pattern.
|
||||
|
||||
# LESS COMMON OPTIONS
|
||||
|
||||
-A, --after-context *NUM*
|
||||
: Show NUM lines after each match.
|
||||
|
||||
-B, --before-context *NUM*
|
||||
: Show NUM lines before each match.
|
||||
|
||||
-C, --context *NUM*
|
||||
: Show NUM lines before and after each match.
|
||||
|
||||
--colors *SPEC* ...
|
||||
: This flag specifies color settings for use in the output. This flag may be
|
||||
provided multiple times. Settings are applied iteratively. Colors are limited
|
||||
to one of eight choices: red, blue, green, cyan, magenta, yellow, white and
|
||||
black. Styles are limited to nobold, bold, nointense or intense.
|
||||
|
||||
The format of the flag is {type}:{attribute}:{value}. {type} should be one
|
||||
of path, line, column or match. {attribute} can be fg, bg or style. Value
|
||||
is either a color (for fg and bg) or a text style. A special format,
|
||||
{type}:none, will clear all color settings for {type}.
|
||||
|
||||
For example, the following command will change the match color to magenta
|
||||
and the background color for line numbers to yellow:
|
||||
|
||||
rg --colors 'match:fg:magenta' --colors 'line:bg:yellow' foo.
|
||||
|
||||
--column
|
||||
: Show column numbers (1 based) in output. This only shows the column
|
||||
numbers for the first match on each line. Note that this doesn't try
|
||||
to account for Unicode. One byte is equal to one column. This implies
|
||||
--line-number.
|
||||
|
||||
--context-separator *SEPARATOR*
|
||||
: The string to use when separating non-continuous context lines. Escape
|
||||
sequences may be used. [default: --]
|
||||
|
||||
--debug
|
||||
: Show debug messages.
|
||||
|
||||
-E, --encoding *ENCODING*
|
||||
: Specify the text encoding that ripgrep will use on all files
|
||||
searched. The default value is 'auto', which will cause ripgrep to do
|
||||
a best effort automatic detection of encoding on a per-file basis.
|
||||
Other supported values can be found in the list of labels here:
|
||||
https://encoding.spec.whatwg.org/#concept-encoding-get
|
||||
|
||||
-f, --file *FILE* ...
|
||||
: Search for patterns from the given file, with one pattern per line. When this
|
||||
flag is used or multiple times or in combination with the -e/--regexp flag,
|
||||
then all patterns provided are searched. Empty pattern lines will match all
|
||||
input lines, and the newline is not counted as part of the pattern.
|
||||
|
||||
--files
|
||||
: Print each file that would be searched (but don't search).
|
||||
|
||||
Combine with the -g flag to return matched paths, for example:
|
||||
|
||||
rg -g '*.foo' --files
|
||||
|
||||
-l, --files-with-matches
|
||||
: Only show path of each file with matches.
|
||||
|
||||
--files-without-match
|
||||
: Only show path of each file with no matches.
|
||||
|
||||
-H, --with-filename
|
||||
: Display the file name for matches. This is the default when
|
||||
more than one file is searched. If --heading is enabled, the
|
||||
file name will be shown above clusters of matches from each
|
||||
file; otherwise, the file name will be shown on each match.
|
||||
|
||||
--no-filename
|
||||
: Never show the filename for a match. This is the default when
|
||||
one file is searched.
|
||||
|
||||
--heading
|
||||
: Show the file name above clusters of matches from each file instead of
|
||||
showing the file name for every match. This is the default mode at a tty.
|
||||
|
||||
--no-heading
|
||||
: Don't group matches by each file. If -H/--with-filename is enabled, then
|
||||
file names will be shown for every line matched. This is the default mode
|
||||
when not at a tty.
|
||||
|
||||
--hidden
|
||||
: Search hidden directories and files. (Hidden directories and files are
|
||||
skipped by default.)
|
||||
|
||||
--iglob *GLOB* ...
|
||||
: Include or exclude files/directories case insensitively. This always
|
||||
overrides any other ignore logic if there is a conflict, but is otherwise
|
||||
applied in addition to ignore files (e.g., .gitignore or .ignore). Multiple
|
||||
glob flags may be used. Globbing rules match .gitignore globs. Precede a
|
||||
glob with a '!' to exclude it.
|
||||
|
||||
--ignore-file *FILE* ...
|
||||
: Specify additional ignore files for filtering file paths.
|
||||
Ignore files should be in the gitignore format and are matched
|
||||
relative to the current working directory. These ignore files
|
||||
have lower precedence than all other ignore files. When
|
||||
specifying multiple ignore files, earlier files have lower
|
||||
precedence than later files.
|
||||
|
||||
-L, --follow
|
||||
: Follow symlinks.
|
||||
|
||||
-M, --max-columns *NUM*
|
||||
: Don't print lines longer than this limit in bytes. Longer lines are omitted,
|
||||
and only the number of matches in that line is printed.
|
||||
|
||||
-m, --max-count *NUM*
|
||||
: Limit the number of matching lines per file searched to NUM.
|
||||
|
||||
--max-filesize *NUM*+*SUFFIX*?
|
||||
: Ignore files larger than *NUM* in size. Directories will never be ignored.
|
||||
|
||||
*SUFFIX* is optional and may be one of K, M or G. These correspond to
|
||||
kilobytes, megabytes and gigabytes respectively. If omitted the input is
|
||||
treated as bytes.
|
||||
|
||||
--maxdepth *NUM*
|
||||
: Descend at most NUM directories below the command line arguments.
|
||||
A value of zero searches only the starting-points themselves.
|
||||
|
||||
--mmap
|
||||
: Search using memory maps when possible. This is enabled by default
|
||||
when ripgrep thinks it will be faster. (Note that mmap searching
|
||||
doesn't currently support the various context related options.)
|
||||
|
||||
--no-messages
|
||||
: Suppress all error messages.
|
||||
|
||||
--no-mmap
|
||||
: Never use memory maps, even when they might be faster.
|
||||
|
||||
--no-ignore
|
||||
: Don't respect ignore files (.gitignore, .ignore, etc.)
|
||||
This implies --no-ignore-parent.
|
||||
|
||||
--no-ignore-parent
|
||||
: Don't respect ignore files in parent directories.
|
||||
|
||||
--no-ignore-vcs
|
||||
: Don't respect version control ignore files (e.g., .gitignore).
|
||||
Note that .ignore files will continue to be respected.
|
||||
|
||||
-0, --null
|
||||
: Whenever a file name is printed, follow it with a NUL byte.
|
||||
This includes printing filenames before matches, and when printing
|
||||
a list of matching files such as with --count, --files-with-matches
|
||||
and --files.
|
||||
|
||||
-o, --only-matching
|
||||
: Print only the matched (non-empty) parts of a matching line, with each such
|
||||
part on a separate output line.
|
||||
|
||||
--path-separator *SEPARATOR*
|
||||
: The path separator to use when printing file paths. This defaults to your
|
||||
platform's path separator, which is / on Unix and \\ on Windows. This flag is
|
||||
intended for overriding the default when the environment demands it (e.g.,
|
||||
cygwin). A path separator is limited to a single byte.
|
||||
|
||||
-p, --pretty
|
||||
: Alias for --color=always --heading --line-number.
|
||||
|
||||
-r, --replace *ARG*
|
||||
: Replace every match with the string given when printing search results.
|
||||
Neither this flag nor any other flag will modify your files.
|
||||
|
||||
Capture group indices (e.g., $5) and names (e.g., $foo) are supported
|
||||
in the replacement string.
|
||||
|
||||
Note that the replacement by default replaces each match, and NOT the
|
||||
entire line. To replace the entire line, you should match the entire line.
|
||||
For example, to emit only the first phone numbers in each line:
|
||||
|
||||
rg '^.*([0-9]{3}-[0-9]{3}-[0-9]{4}).*$' --replace '$1'
|
||||
|
||||
-s, --case-sensitive
|
||||
: Search case sensitively (default). Overrides --ignore-case and --smart-case.
|
||||
|
||||
-S, --smart-case
|
||||
: Search case insensitively if the pattern is all lowercase.
|
||||
Search case sensitively otherwise. This is overridden by either
|
||||
--case-sensitive or --ignore-case.
|
||||
|
||||
--sort-files
|
||||
: Sort results by file path. Note that this currently
|
||||
disables all parallelism and runs search in a single thread.
|
||||
|
||||
-j, --threads *ARG*
|
||||
: The number of threads to use. 0 means use the number of logical CPUs
|
||||
(capped at 12). [default: 0]
|
||||
|
||||
--version
|
||||
: Show the version number of ripgrep and exit.
|
||||
|
||||
--vimgrep
|
||||
: Show results with every match on its own line, including
|
||||
line numbers and column numbers. With this option, a line with
|
||||
more than one match will be printed more than once.
|
||||
|
||||
Recommended .vimrc configuration:
|
||||
|
||||
set grepprg=rg\ --vimgrep
|
||||
set grepformat^=%f:%l:%c:%m
|
||||
|
||||
Use :grep to grep for something, then :cn and :cp to navigate through the
|
||||
matches.
|
||||
|
||||
# FILE TYPE MANAGEMENT OPTIONS
|
||||
|
||||
--type-list
|
||||
: Show all supported file types and their associated globs.
|
||||
|
||||
--type-add *ARG* ...
|
||||
: Add a new glob for a particular file type. Only one glob can be added
|
||||
at a time. Multiple --type-add flags can be provided. Unless --type-clear
|
||||
is used, globs are added to any existing globs inside of ripgrep. Note that
|
||||
this must be passed to every invocation of rg. Type settings are NOT
|
||||
persisted. Example:
|
||||
|
||||
rg --type-add 'foo:*.foo' -tfoo PATTERN
|
||||
|
||||
--type-add can also be used to include rules from other types
|
||||
with the special include directive. The include directive
|
||||
permits specifying one or more other type names (separated by a
|
||||
comma) that have been defined and its rules will automatically
|
||||
be imported into the type specified. For example, to create a
|
||||
type called src that matches C++, Python and Markdown files, one
|
||||
can use:
|
||||
|
||||
--type-add 'src:include:cpp,py,md'
|
||||
|
||||
Additional glob rules can still be added to the src type by
|
||||
using the --type-add flag again:
|
||||
|
||||
--type-add 'src:include:cpp,py,md' --type-add 'src:*.foo'
|
||||
|
||||
Note that type names must consist only of Unicode letters or
|
||||
numbers. Punctuation characters are not allowed.
|
||||
|
||||
--type-clear *TYPE* ...
|
||||
: Clear the file type globs previously defined for TYPE. This only clears
|
||||
the default type definitions that are found inside of ripgrep. Note
|
||||
that this must be passed to every invocation of rg.
|
||||
|
||||
# SHELL COMPLETION
|
||||
|
||||
Shell completion files are included in the release tarball for Bash, Fish, Zsh
|
||||
and PowerShell.
|
||||
|
||||
For **bash**, move `rg.bash-completion` to `$XDG_CONFIG_HOME/bash_completion`
|
||||
or `/etc/bash_completion.d/`.
|
||||
|
||||
For **fish**, move `rg.fish` to `$HOME/.config/fish/completions`.
|
||||
229
doc/rg.1.txt.tpl
229
doc/rg.1.txt.tpl
@@ -1,229 +0,0 @@
|
||||
rg(1)
|
||||
=====
|
||||
|
||||
Name
|
||||
----
|
||||
rg - recursively search current directory for lines matching a pattern
|
||||
|
||||
|
||||
Synopsis
|
||||
--------
|
||||
*rg* [_OPTIONS_] _PATTERN_ [_PATH_...]
|
||||
|
||||
*rg* [_OPTIONS_] *-e* _PATTERN_... [_PATH_...]
|
||||
|
||||
*rg* [_OPTIONS_] *-f* _PATTERNFILE_... [_PATH_...]
|
||||
|
||||
*rg* [_OPTIONS_] *--files* [_PATH_...]
|
||||
|
||||
*rg* [_OPTIONS_] *--type-list*
|
||||
|
||||
*command* | *rg* [_OPTIONS_] _PATTERN_
|
||||
|
||||
*rg* [_OPTIONS_] *--help*
|
||||
|
||||
*rg* [_OPTIONS_] *--version*
|
||||
|
||||
|
||||
DESCRIPTION
|
||||
-----------
|
||||
ripgrep (rg) recursively searches your current directory for a regex pattern.
|
||||
By default, ripgrep will respect your .gitignore and automatically skip hidden
|
||||
files/directories and binary files.
|
||||
|
||||
ripgrep's default regex engine uses finite automata and guarantees linear
|
||||
time searching. Because of this, features like backreferences and arbitrary
|
||||
look-around are not supported. However, if ripgrep is built with PCRE2, then
|
||||
the *--pcre2* flag can be used to enable backreferences and look-around.
|
||||
|
||||
ripgrep supports configuration files. Set *RIPGREP_CONFIG_PATH* to a
|
||||
configuration file. The file can specify one shell argument per line. Lines
|
||||
starting with *#* are ignored. For more details, see the man page or the
|
||||
*README*.
|
||||
|
||||
Tip: to disable all smart filtering and make ripgrep behave a bit more like
|
||||
classical grep, use *rg -uuu*.
|
||||
|
||||
|
||||
REGEX SYNTAX
|
||||
------------
|
||||
ripgrep uses Rust's regex engine by default, which documents its syntax:
|
||||
https://docs.rs/regex/*/regex/#syntax
|
||||
|
||||
ripgrep uses byte-oriented regexes, which has some additional documentation:
|
||||
https://docs.rs/regex/*/regex/bytes/index.html#syntax
|
||||
|
||||
To a first approximation, ripgrep uses Perl-like regexes without look-around or
|
||||
backreferences. This makes them very similar to the "extended" (ERE) regular
|
||||
expressions supported by *egrep*, but with a few additional features like
|
||||
Unicode character classes.
|
||||
|
||||
If you're using ripgrep with the *--pcre2* flag, then please consult
|
||||
https://www.pcre.org or the PCRE2 man pages for documentation on the supported
|
||||
syntax.
|
||||
|
||||
|
||||
POSITIONAL ARGUMENTS
|
||||
--------------------
|
||||
_PATTERN_::
|
||||
A regular expression used for searching. To match a pattern beginning with a
|
||||
dash, use the -e/--regexp option.
|
||||
|
||||
_PATH_::
|
||||
A file or directory to search. Directories are searched recursively. Paths
|
||||
specified explicitly on the command line override glob and ignore rules.
|
||||
|
||||
|
||||
OPTIONS
|
||||
-------
|
||||
Note that for many options, there exist flags to disable them. In some cases,
|
||||
those flags are not listed in a first class way below. For example, the
|
||||
*--column* flag (listed below) enables column numbers in ripgrep's output, but
|
||||
the *--no-column* flag (not listed below) disables them. The reverse can also
|
||||
exist. For example, the *--no-ignore* flag (listed below) disables ripgrep's
|
||||
*gitignore* logic, but the *--ignore* flag (not listed below) enables it. These
|
||||
flags are useful for overriding a ripgrep configuration file on the command
|
||||
line. Each flag's documentation notes whether an inverted flag exists. In all
|
||||
cases, the flag specified last takes precedence.
|
||||
|
||||
{OPTIONS}
|
||||
|
||||
|
||||
EXIT STATUS
|
||||
-----------
|
||||
If ripgrep finds a match, then the exit status of the program is 0. If no match
|
||||
could be found, then the exit status is 1. If an error occurred, then the exit
|
||||
status is always 2 unless ripgrep was run with the *--quiet* flag and a match
|
||||
was found. In summary:
|
||||
|
||||
* `0` exit status occurs only when at least one match was found, and if
|
||||
no error occurred, unless *--quiet* was given.
|
||||
* `1` exit status occurs only when no match was found and no error occurred.
|
||||
* `2` exit status occurs when an error occurred. This is true for both
|
||||
catastrophic errors (e.g., a regex syntax error) and for soft errors (e.g.,
|
||||
unable to read a file).
|
||||
|
||||
|
||||
CONFIGURATION FILES
|
||||
-------------------
|
||||
ripgrep supports reading configuration files that change ripgrep's default
|
||||
behavior. The format of the configuration file is an "rc" style and is very
|
||||
simple. It is defined by two rules:
|
||||
|
||||
1. Every line is a shell argument, after trimming whitespace.
|
||||
2. Lines starting with *#* (optionally preceded by any amount of
|
||||
whitespace) are ignored.
|
||||
|
||||
ripgrep will look for a single configuration file if and only if the
|
||||
*RIPGREP_CONFIG_PATH* environment variable is set and is non-empty.
|
||||
ripgrep will parse shell arguments from this file on startup and will
|
||||
behave as if the arguments in this file were prepended to any explicit
|
||||
arguments given to ripgrep on the command line.
|
||||
|
||||
For example, if your ripgreprc file contained a single line:
|
||||
|
||||
--smart-case
|
||||
|
||||
then the following command
|
||||
|
||||
RIPGREP_CONFIG_PATH=wherever/.ripgreprc rg foo
|
||||
|
||||
would behave identically to the following command
|
||||
|
||||
rg --smart-case foo
|
||||
|
||||
another example is adding types
|
||||
|
||||
--type-add
|
||||
web:*.{html,css,js}*
|
||||
|
||||
would behave identically to the following command
|
||||
|
||||
rg --type-add 'web:*.{html,css,js}*' foo
|
||||
|
||||
same with using globs
|
||||
|
||||
--glob=!git/*
|
||||
|
||||
or
|
||||
|
||||
--glob
|
||||
!git/*
|
||||
|
||||
would behave identically to the following command
|
||||
|
||||
rg --glob '!git/*' foo
|
||||
|
||||
ripgrep also provides a flag, *--no-config*, that when present will suppress
|
||||
any and all support for configuration. This includes any future support
|
||||
for auto-loading configuration files from pre-determined paths.
|
||||
|
||||
Conflicts between configuration files and explicit arguments are handled
|
||||
exactly like conflicts in the same command line invocation. That is,
|
||||
this command:
|
||||
|
||||
RIPGREP_CONFIG_PATH=wherever/.ripgreprc rg foo --case-sensitive
|
||||
|
||||
is exactly equivalent to
|
||||
|
||||
rg --smart-case foo --case-sensitive
|
||||
|
||||
in which case, the *--case-sensitive* flag would override the *--smart-case*
|
||||
flag.
|
||||
|
||||
|
||||
SHELL COMPLETION
|
||||
----------------
|
||||
Shell completion files are included in the release tarball for Bash, Fish, Zsh
|
||||
and PowerShell.
|
||||
|
||||
For *bash*, move *rg.bash* to *$XDG_CONFIG_HOME/bash_completion*
|
||||
or */etc/bash_completion.d/*.
|
||||
|
||||
For *fish*, move *rg.fish* to *$HOME/.config/fish/completions*.
|
||||
|
||||
For *zsh*, move *_rg* to one of your *$fpath* directories.
|
||||
|
||||
|
||||
CAVEATS
|
||||
-------
|
||||
ripgrep may abort unexpectedly when using default settings if it searches a
|
||||
file that is simultaneously truncated. This behavior can be avoided by passing
|
||||
the *--no-mmap* flag which will forcefully disable the use of memory maps in
|
||||
all cases.
|
||||
|
||||
ripgrep may use a large amount of memory depending on a few factors. Firstly,
|
||||
if ripgrep uses parallelism for search (the default), then the entire output
|
||||
for each individual file is buffered into memory in order to prevent
|
||||
interleaving matches in the output. To avoid this, you can disable parallelism
|
||||
with the *-j1* flag. Secondly, ripgrep always needs to have at least a single
|
||||
line in memory in order to execute a search. A file with a very long line can
|
||||
thus cause ripgrep to use a lot of memory. Generally, this only occurs when
|
||||
searching binary data with the *-a* flag enabled. (When the *-a* flag isn't
|
||||
enabled, ripgrep will replace all NUL bytes with line terminators, which
|
||||
typically prevents exorbitant memory usage.) Thirdly, when ripgrep searches
|
||||
a large file using a memory map, the process will report its resident memory
|
||||
usage as the size of the file. However, this does not mean ripgrep actually
|
||||
needed to use that much memory; the operating system will generally handle this
|
||||
for you.
|
||||
|
||||
|
||||
VERSION
|
||||
-------
|
||||
{VERSION}
|
||||
|
||||
|
||||
HOMEPAGE
|
||||
--------
|
||||
https://github.com/BurntSushi/ripgrep
|
||||
|
||||
Please report bugs and feature requests in the issue tracker. Please do your
|
||||
best to provide a reproducible test case for bugs. This should include the
|
||||
corpus being searched, the *rg* command, the actual output and the expected
|
||||
output. Please also include the output of running the same *rg* command but
|
||||
with the *--debug* flag.
|
||||
|
||||
|
||||
AUTHORS
|
||||
-------
|
||||
Andrew Gallant <jamslam@gmail.com>
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "globset"
|
||||
version = "0.4.3" #:version
|
||||
version = "0.2.1" #:version
|
||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||
description = """
|
||||
Cross platform single glob and glob set matching. Glob set matching is the
|
||||
@@ -19,14 +19,14 @@ name = "globset"
|
||||
bench = false
|
||||
|
||||
[dependencies]
|
||||
aho-corasick = "0.7.3"
|
||||
bstr = { version = "0.1.2", default-features = false, features = ["std"] }
|
||||
fnv = "1.0.6"
|
||||
log = "0.4.5"
|
||||
regex = "1.1.5"
|
||||
aho-corasick = "0.6.0"
|
||||
fnv = "1.0"
|
||||
log = "0.3"
|
||||
memchr = "2"
|
||||
regex = "0.2.1"
|
||||
|
||||
[dev-dependencies]
|
||||
glob = "0.3.0"
|
||||
glob = "0.2"
|
||||
|
||||
[features]
|
||||
simd-accel = []
|
||||
simd-accel = ["regex/simd-accel"]
|
||||
|
||||
@@ -4,7 +4,7 @@ Cross platform single glob and glob set matching. Glob set matching is the
|
||||
process of matching one or more glob patterns against a single candidate path
|
||||
simultaneously, and returning all of the globs that matched.
|
||||
|
||||
[](https://travis-ci.org/BurntSushi/ripgrep)
|
||||
[](https://travis-ci.org/BurntSushi/ripgrep)
|
||||
[](https://ci.appveyor.com/project/BurntSushi/ripgrep)
|
||||
[](https://crates.io/crates/globset)
|
||||
|
||||
@@ -20,7 +20,7 @@ Add this to your `Cargo.toml`:
|
||||
|
||||
```toml
|
||||
[dependencies]
|
||||
globset = "0.3"
|
||||
globset = "0.2"
|
||||
```
|
||||
|
||||
and this to your crate root:
|
||||
@@ -36,7 +36,7 @@ This example shows how to match a single glob against a single file path.
|
||||
```rust
|
||||
use globset::Glob;
|
||||
|
||||
let glob = Glob::new("*.rs")?.compile_matcher();
|
||||
let glob = try!(Glob::new("*.rs")).compile_matcher();
|
||||
|
||||
assert!(glob.is_match("foo.rs"));
|
||||
assert!(glob.is_match("foo/bar.rs"));
|
||||
@@ -51,8 +51,8 @@ semantics. In this example, we prevent wildcards from matching path separators.
|
||||
```rust
|
||||
use globset::GlobBuilder;
|
||||
|
||||
let glob = GlobBuilder::new("*.rs")
|
||||
.literal_separator(true).build()?.compile_matcher();
|
||||
let glob = try!(GlobBuilder::new("*.rs")
|
||||
.literal_separator(true).build()).compile_matcher();
|
||||
|
||||
assert!(glob.is_match("foo.rs"));
|
||||
assert!(!glob.is_match("foo/bar.rs")); // no longer matches
|
||||
@@ -69,10 +69,10 @@ use globset::{Glob, GlobSetBuilder};
|
||||
let mut builder = GlobSetBuilder::new();
|
||||
// A GlobBuilder can be used to configure each glob's match semantics
|
||||
// independently.
|
||||
builder.add(Glob::new("*.rs")?);
|
||||
builder.add(Glob::new("src/lib.rs")?);
|
||||
builder.add(Glob::new("src/**/foo.rs")?);
|
||||
let set = builder.build()?;
|
||||
builder.add(try!(Glob::new("*.rs")));
|
||||
builder.add(try!(Glob::new("src/lib.rs")));
|
||||
builder.add(try!(Glob::new("src/**/foo.rs")));
|
||||
let set = try!(builder.build());
|
||||
|
||||
assert_eq!(set.matches("src/bar/baz/foo.rs"), vec![0, 2]);
|
||||
```
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
use std::ffi::{OsStr, OsString};
|
||||
use std::fmt;
|
||||
use std::hash;
|
||||
use std::iter;
|
||||
@@ -27,7 +28,7 @@ pub enum MatchStrategy {
|
||||
BasenameLiteral(String),
|
||||
/// A pattern matches if and only if the file path's extension matches this
|
||||
/// literal string.
|
||||
Extension(String),
|
||||
Extension(OsString),
|
||||
/// A pattern matches if and only if this prefix literal is a prefix of the
|
||||
/// candidate file path.
|
||||
Prefix(String),
|
||||
@@ -46,7 +47,7 @@ pub enum MatchStrategy {
|
||||
/// extension. Note that this is a necessary but NOT sufficient criterion.
|
||||
/// Namely, if the extension matches, then a full regex search is still
|
||||
/// required.
|
||||
RequiredExtension(String),
|
||||
RequiredExtension(OsString),
|
||||
/// A regex needs to be used for matching.
|
||||
Regex,
|
||||
}
|
||||
@@ -120,7 +121,7 @@ impl GlobMatcher {
|
||||
|
||||
/// Tests whether the given path matches this pattern or not.
|
||||
pub fn is_match_candidate(&self, path: &Candidate) -> bool {
|
||||
self.re.is_match(path.path.as_bytes())
|
||||
self.re.is_match(&path.path)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -145,7 +146,7 @@ impl GlobStrategic {
|
||||
|
||||
/// Tests whether the given path matches this pattern or not.
|
||||
fn is_match_candidate(&self, candidate: &Candidate) -> bool {
|
||||
let byte_path = candidate.path.as_bytes();
|
||||
let byte_path = &*candidate.path;
|
||||
|
||||
match self.strategy {
|
||||
MatchStrategy::Literal(ref lit) => lit.as_bytes() == byte_path,
|
||||
@@ -153,7 +154,7 @@ impl GlobStrategic {
|
||||
lit.as_bytes() == &*candidate.basename
|
||||
}
|
||||
MatchStrategy::Extension(ref ext) => {
|
||||
ext.as_bytes() == &*candidate.ext
|
||||
candidate.ext == ext
|
||||
}
|
||||
MatchStrategy::Prefix(ref pre) => {
|
||||
starts_with(pre.as_bytes(), byte_path)
|
||||
@@ -165,8 +166,7 @@ impl GlobStrategic {
|
||||
ends_with(suffix.as_bytes(), byte_path)
|
||||
}
|
||||
MatchStrategy::RequiredExtension(ref ext) => {
|
||||
let ext = ext.as_bytes();
|
||||
&*candidate.ext == ext && self.re.is_match(byte_path)
|
||||
candidate.ext == ext && self.re.is_match(byte_path)
|
||||
}
|
||||
MatchStrategy::Regex => self.re.is_match(byte_path),
|
||||
}
|
||||
@@ -187,26 +187,13 @@ pub struct GlobBuilder<'a> {
|
||||
opts: GlobOptions,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
|
||||
#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)]
|
||||
struct GlobOptions {
|
||||
/// Whether to match case insensitively.
|
||||
case_insensitive: bool,
|
||||
/// Whether to require a literal separator to match a separator in a file
|
||||
/// path. e.g., when enabled, `*` won't match `/`.
|
||||
literal_separator: bool,
|
||||
/// Whether or not to use `\` to escape special characters.
|
||||
/// e.g., when enabled, `\*` will match a literal `*`.
|
||||
backslash_escape: bool,
|
||||
}
|
||||
|
||||
impl GlobOptions {
|
||||
fn default() -> GlobOptions {
|
||||
GlobOptions {
|
||||
case_insensitive: false,
|
||||
literal_separator: false,
|
||||
backslash_escape: !is_separator('\\'),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Default, Eq, PartialEq)]
|
||||
@@ -275,19 +262,6 @@ impl Glob {
|
||||
}
|
||||
|
||||
/// Returns the regular expression string for this glob.
|
||||
///
|
||||
/// Note that regular expressions for globs are intended to be matched on
|
||||
/// arbitrary bytes (`&[u8]`) instead of Unicode strings (`&str`). In
|
||||
/// particular, globs are frequently used on file paths, where there is no
|
||||
/// general guarantee that file paths are themselves valid UTF-8. As a
|
||||
/// result, callers will need to ensure that they are using a regex API
|
||||
/// that can match on arbitrary bytes. For example, the
|
||||
/// [`regex`](https://crates.io/regex)
|
||||
/// crate's
|
||||
/// [`Regex`](https://docs.rs/regex/*/regex/struct.Regex.html)
|
||||
/// API is not suitable for this since it matches on `&str`, but its
|
||||
/// [`bytes::Regex`](https://docs.rs/regex/*/regex/bytes/struct.Regex.html)
|
||||
/// API is suitable for this.
|
||||
pub fn regex(&self) -> &str {
|
||||
&self.re
|
||||
}
|
||||
@@ -321,7 +295,7 @@ impl Glob {
|
||||
/// std::path::Path::extension returns. Namely, this extension includes
|
||||
/// the '.'. Also, paths like `.rs` are considered to have an extension
|
||||
/// of `.rs`.
|
||||
fn ext(&self) -> Option<String> {
|
||||
fn ext(&self) -> Option<OsString> {
|
||||
if self.opts.case_insensitive {
|
||||
return None;
|
||||
}
|
||||
@@ -345,11 +319,11 @@ impl Glob {
|
||||
Some(&Token::Literal('.')) => {}
|
||||
_ => return None,
|
||||
}
|
||||
let mut lit = ".".to_string();
|
||||
let mut lit = OsStr::new(".").to_os_string();
|
||||
for t in self.tokens[start + 2..].iter() {
|
||||
match *t {
|
||||
Token::Literal('.') | Token::Literal('/') => return None,
|
||||
Token::Literal(c) => lit.push(c),
|
||||
Token::Literal(c) => lit.push(c.to_string()),
|
||||
_ => return None,
|
||||
}
|
||||
}
|
||||
@@ -363,7 +337,7 @@ impl Glob {
|
||||
/// This is like `ext`, but returns an extension even if it isn't sufficent
|
||||
/// to imply a match. Namely, if an extension is returned, then it is
|
||||
/// necessary but not sufficient for a match.
|
||||
fn required_ext(&self) -> Option<String> {
|
||||
fn required_ext(&self) -> Option<OsString> {
|
||||
if self.opts.case_insensitive {
|
||||
return None;
|
||||
}
|
||||
@@ -386,7 +360,7 @@ impl Glob {
|
||||
None
|
||||
} else {
|
||||
ext.reverse();
|
||||
Some(ext.into_iter().collect())
|
||||
Some(OsString::from(ext.into_iter().collect::<String>()))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -535,7 +509,7 @@ impl Glob {
|
||||
Some(&self.tokens[start..])
|
||||
}
|
||||
|
||||
/// Returns the pattern as a literal if and only if the pattern exclusively
|
||||
/// Returns the pattern as a literal if and only if the pattern exclusiely
|
||||
/// matches the basename of a file path *and* is a literal.
|
||||
///
|
||||
/// The basic format of these patterns is `**/{literal}`, where `{literal}`
|
||||
@@ -575,9 +549,8 @@ impl<'a> GlobBuilder<'a> {
|
||||
chars: self.glob.chars().peekable(),
|
||||
prev: None,
|
||||
cur: None,
|
||||
opts: &self.opts,
|
||||
};
|
||||
p.parse()?;
|
||||
try!(p.parse());
|
||||
if p.stack.is_empty() {
|
||||
Err(Error {
|
||||
glob: Some(self.glob.to_string()),
|
||||
@@ -612,19 +585,6 @@ impl<'a> GlobBuilder<'a> {
|
||||
self.opts.literal_separator = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// When enabled, a back slash (`\`) may be used to escape
|
||||
/// special characters in a glob pattern. Additionally, this will
|
||||
/// prevent `\` from being interpreted as a path separator on all
|
||||
/// platforms.
|
||||
///
|
||||
/// This is enabled by default on platforms where `\` is not a
|
||||
/// path separator and disabled by default on platforms where `\`
|
||||
/// is a path separator.
|
||||
pub fn backslash_escape(&mut self, yes: bool) -> &mut GlobBuilder<'a> {
|
||||
self.opts.backslash_escape = yes;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl Tokens {
|
||||
@@ -750,7 +710,6 @@ struct Parser<'a> {
|
||||
chars: iter::Peekable<str::Chars<'a>>,
|
||||
prev: Option<char>,
|
||||
cur: Option<char>,
|
||||
opts: &'a GlobOptions,
|
||||
}
|
||||
|
||||
impl<'a> Parser<'a> {
|
||||
@@ -761,14 +720,20 @@ impl<'a> Parser<'a> {
|
||||
fn parse(&mut self) -> Result<(), Error> {
|
||||
while let Some(c) = self.bump() {
|
||||
match c {
|
||||
'?' => self.push_token(Token::Any)?,
|
||||
'*' => self.parse_star()?,
|
||||
'[' => self.parse_class()?,
|
||||
'{' => self.push_alternate()?,
|
||||
'}' => self.pop_alternate()?,
|
||||
',' => self.parse_comma()?,
|
||||
'\\' => self.parse_backslash()?,
|
||||
c => self.push_token(Token::Literal(c))?,
|
||||
'?' => try!(self.push_token(Token::Any)),
|
||||
'*' => try!(self.parse_star()),
|
||||
'[' => try!(self.parse_class()),
|
||||
'{' => try!(self.push_alternate()),
|
||||
'}' => try!(self.pop_alternate()),
|
||||
',' => try!(self.parse_comma()),
|
||||
c => {
|
||||
if is_separator(c) {
|
||||
// Normalize all patterns to use / as a separator.
|
||||
try!(self.push_token(Token::Literal('/')))
|
||||
} else {
|
||||
try!(self.push_token(Token::Literal(c)))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
@@ -821,82 +786,42 @@ impl<'a> Parser<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_backslash(&mut self) -> Result<(), Error> {
|
||||
if self.opts.backslash_escape {
|
||||
match self.bump() {
|
||||
None => Err(self.error(ErrorKind::DanglingEscape)),
|
||||
Some(c) => self.push_token(Token::Literal(c)),
|
||||
}
|
||||
} else if is_separator('\\') {
|
||||
// Normalize all patterns to use / as a separator.
|
||||
self.push_token(Token::Literal('/'))
|
||||
} else {
|
||||
self.push_token(Token::Literal('\\'))
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_star(&mut self) -> Result<(), Error> {
|
||||
let prev = self.prev;
|
||||
if self.peek() != Some('*') {
|
||||
self.push_token(Token::ZeroOrMore)?;
|
||||
if self.chars.peek() != Some(&'*') {
|
||||
try!(self.push_token(Token::ZeroOrMore));
|
||||
return Ok(());
|
||||
}
|
||||
assert!(self.bump() == Some('*'));
|
||||
if !self.have_tokens()? {
|
||||
if !self.peek().map_or(true, is_separator) {
|
||||
self.push_token(Token::ZeroOrMore)?;
|
||||
self.push_token(Token::ZeroOrMore)?;
|
||||
} else {
|
||||
self.push_token(Token::RecursivePrefix)?;
|
||||
assert!(self.bump().map_or(true, is_separator));
|
||||
if !try!(self.have_tokens()) {
|
||||
try!(self.push_token(Token::RecursivePrefix));
|
||||
let next = self.bump();
|
||||
if !next.map(is_separator).unwrap_or(true) {
|
||||
return Err(self.error(ErrorKind::InvalidRecursive));
|
||||
}
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
try!(self.pop_token());
|
||||
if !prev.map(is_separator).unwrap_or(false) {
|
||||
if self.stack.len() <= 1
|
||||
|| (prev != Some(',') && prev != Some('{'))
|
||||
{
|
||||
self.push_token(Token::ZeroOrMore)?;
|
||||
self.push_token(Token::ZeroOrMore)?;
|
||||
return Ok(());
|
||||
|| (prev != Some(',') && prev != Some('{')) {
|
||||
return Err(self.error(ErrorKind::InvalidRecursive));
|
||||
}
|
||||
}
|
||||
let is_suffix =
|
||||
match self.peek() {
|
||||
None => {
|
||||
assert!(self.bump().is_none());
|
||||
true
|
||||
}
|
||||
Some(',') | Some('}') if self.stack.len() >= 2 => {
|
||||
true
|
||||
}
|
||||
Some(c) if is_separator(c) => {
|
||||
assert!(self.bump().map(is_separator).unwrap_or(false));
|
||||
false
|
||||
}
|
||||
_ => {
|
||||
self.push_token(Token::ZeroOrMore)?;
|
||||
self.push_token(Token::ZeroOrMore)?;
|
||||
return Ok(());
|
||||
}
|
||||
};
|
||||
match self.pop_token()? {
|
||||
Token::RecursivePrefix => {
|
||||
self.push_token(Token::RecursivePrefix)?;
|
||||
match self.chars.peek() {
|
||||
None => {
|
||||
assert!(self.bump().is_none());
|
||||
self.push_token(Token::RecursiveSuffix)
|
||||
}
|
||||
Token::RecursiveSuffix => {
|
||||
self.push_token(Token::RecursiveSuffix)?;
|
||||
Some(&',') | Some(&'}') if self.stack.len() >= 2 => {
|
||||
self.push_token(Token::RecursiveSuffix)
|
||||
}
|
||||
_ => {
|
||||
if is_suffix {
|
||||
self.push_token(Token::RecursiveSuffix)?;
|
||||
} else {
|
||||
self.push_token(Token::RecursiveZeroOrMore)?;
|
||||
}
|
||||
Some(&c) if is_separator(c) => {
|
||||
assert!(self.bump().map(is_separator).unwrap_or(false));
|
||||
self.push_token(Token::RecursiveZeroOrMore)
|
||||
}
|
||||
_ => Err(self.error(ErrorKind::InvalidRecursive)),
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn parse_class(&mut self) -> Result<(), Error> {
|
||||
@@ -915,15 +840,12 @@ impl<'a> Parser<'a> {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
let mut negated = false;
|
||||
let mut ranges = vec![];
|
||||
let negated = match self.chars.peek() {
|
||||
Some(&'!') | Some(&'^') => {
|
||||
let bump = self.bump();
|
||||
assert!(bump == Some('!') || bump == Some('^'));
|
||||
true
|
||||
}
|
||||
_ => false,
|
||||
};
|
||||
if self.chars.peek() == Some(&'!') {
|
||||
assert!(self.bump() == Some('!'));
|
||||
negated = true;
|
||||
}
|
||||
let mut first = true;
|
||||
let mut in_range = false;
|
||||
loop {
|
||||
@@ -948,7 +870,7 @@ impl<'a> Parser<'a> {
|
||||
// invariant: in_range is only set when there is
|
||||
// already at least one character seen.
|
||||
let r = ranges.last_mut().unwrap();
|
||||
add_to_last_range(&self.glob, r, '-')?;
|
||||
try!(add_to_last_range(&self.glob, r, '-'));
|
||||
in_range = false;
|
||||
} else {
|
||||
assert!(!ranges.is_empty());
|
||||
@@ -959,8 +881,8 @@ impl<'a> Parser<'a> {
|
||||
if in_range {
|
||||
// invariant: in_range is only set when there is
|
||||
// already at least one character seen.
|
||||
add_to_last_range(
|
||||
&self.glob, ranges.last_mut().unwrap(), c)?;
|
||||
try!(add_to_last_range(
|
||||
&self.glob, ranges.last_mut().unwrap(), c));
|
||||
} else {
|
||||
ranges.push((c, c));
|
||||
}
|
||||
@@ -985,10 +907,6 @@ impl<'a> Parser<'a> {
|
||||
self.cur = self.chars.next();
|
||||
self.cur
|
||||
}
|
||||
|
||||
fn peek(&mut self) -> Option<char> {
|
||||
self.chars.peek().map(|&ch| ch)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -1006,15 +924,16 @@ fn ends_with(needle: &[u8], haystack: &[u8]) -> bool {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::ffi::{OsStr, OsString};
|
||||
|
||||
use {GlobSetBuilder, ErrorKind};
|
||||
use super::{Glob, GlobBuilder, Token};
|
||||
use super::Token::*;
|
||||
|
||||
#[derive(Clone, Copy, Debug, Default)]
|
||||
struct Options {
|
||||
casei: Option<bool>,
|
||||
litsep: Option<bool>,
|
||||
bsesc: Option<bool>,
|
||||
casei: bool,
|
||||
litsep: bool,
|
||||
}
|
||||
|
||||
macro_rules! syntax {
|
||||
@@ -1044,17 +963,11 @@ mod tests {
|
||||
($name:ident, $pat:expr, $re:expr, $options:expr) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
let mut builder = GlobBuilder::new($pat);
|
||||
if let Some(casei) = $options.casei {
|
||||
builder.case_insensitive(casei);
|
||||
}
|
||||
if let Some(litsep) = $options.litsep {
|
||||
builder.literal_separator(litsep);
|
||||
}
|
||||
if let Some(bsesc) = $options.bsesc {
|
||||
builder.backslash_escape(bsesc);
|
||||
}
|
||||
let pat = builder.build().unwrap();
|
||||
let pat = GlobBuilder::new($pat)
|
||||
.case_insensitive($options.casei)
|
||||
.literal_separator($options.litsep)
|
||||
.build()
|
||||
.unwrap();
|
||||
assert_eq!(format!("(?-u){}", $re), pat.regex());
|
||||
}
|
||||
};
|
||||
@@ -1067,17 +980,11 @@ mod tests {
|
||||
($name:ident, $pat:expr, $path:expr, $options:expr) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
let mut builder = GlobBuilder::new($pat);
|
||||
if let Some(casei) = $options.casei {
|
||||
builder.case_insensitive(casei);
|
||||
}
|
||||
if let Some(litsep) = $options.litsep {
|
||||
builder.literal_separator(litsep);
|
||||
}
|
||||
if let Some(bsesc) = $options.bsesc {
|
||||
builder.backslash_escape(bsesc);
|
||||
}
|
||||
let pat = builder.build().unwrap();
|
||||
let pat = GlobBuilder::new($pat)
|
||||
.case_insensitive($options.casei)
|
||||
.literal_separator($options.litsep)
|
||||
.build()
|
||||
.unwrap();
|
||||
let matcher = pat.compile_matcher();
|
||||
let strategic = pat.compile_strategic_matcher();
|
||||
let set = GlobSetBuilder::new().add(pat).build().unwrap();
|
||||
@@ -1095,17 +1002,11 @@ mod tests {
|
||||
($name:ident, $pat:expr, $path:expr, $options:expr) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
let mut builder = GlobBuilder::new($pat);
|
||||
if let Some(casei) = $options.casei {
|
||||
builder.case_insensitive(casei);
|
||||
}
|
||||
if let Some(litsep) = $options.litsep {
|
||||
builder.literal_separator(litsep);
|
||||
}
|
||||
if let Some(bsesc) = $options.bsesc {
|
||||
builder.backslash_escape(bsesc);
|
||||
}
|
||||
let pat = builder.build().unwrap();
|
||||
let pat = GlobBuilder::new($pat)
|
||||
.case_insensitive($options.casei)
|
||||
.literal_separator($options.litsep)
|
||||
.build()
|
||||
.unwrap();
|
||||
let matcher = pat.compile_matcher();
|
||||
let strategic = pat.compile_strategic_matcher();
|
||||
let set = GlobSetBuilder::new().add(pat).build().unwrap();
|
||||
@@ -1117,6 +1018,7 @@ mod tests {
|
||||
}
|
||||
|
||||
fn s(string: &str) -> String { string.to_string() }
|
||||
fn os(string: &str) -> OsString { OsStr::new(string).to_os_string() }
|
||||
|
||||
fn class(s: char, e: char) -> Token {
|
||||
Class { negated: false, ranges: vec![(s, e)] }
|
||||
@@ -1171,9 +1073,14 @@ mod tests {
|
||||
syntax!(cls17, "[a-z0-9]", vec![rclass(&[('a', 'z'), ('0', '9')])]);
|
||||
syntax!(cls18, "[!0-9a-z]", vec![rclassn(&[('0', '9'), ('a', 'z')])]);
|
||||
syntax!(cls19, "[!a-z0-9]", vec![rclassn(&[('a', 'z'), ('0', '9')])]);
|
||||
syntax!(cls20, "[^a]", vec![classn('a', 'a')]);
|
||||
syntax!(cls21, "[^a-z]", vec![classn('a', 'z')]);
|
||||
|
||||
syntaxerr!(err_rseq1, "a**", ErrorKind::InvalidRecursive);
|
||||
syntaxerr!(err_rseq2, "**a", ErrorKind::InvalidRecursive);
|
||||
syntaxerr!(err_rseq3, "a**b", ErrorKind::InvalidRecursive);
|
||||
syntaxerr!(err_rseq4, "***", ErrorKind::InvalidRecursive);
|
||||
syntaxerr!(err_rseq5, "/a**", ErrorKind::InvalidRecursive);
|
||||
syntaxerr!(err_rseq6, "/**a", ErrorKind::InvalidRecursive);
|
||||
syntaxerr!(err_rseq7, "/a**b", ErrorKind::InvalidRecursive);
|
||||
syntaxerr!(err_unclosed1, "[", ErrorKind::UnclosedClass);
|
||||
syntaxerr!(err_unclosed2, "[]", ErrorKind::UnclosedClass);
|
||||
syntaxerr!(err_unclosed3, "[!", ErrorKind::UnclosedClass);
|
||||
@@ -1182,24 +1089,12 @@ mod tests {
|
||||
syntaxerr!(err_range2, "[z--]", ErrorKind::InvalidRange('z', '-'));
|
||||
|
||||
const CASEI: Options = Options {
|
||||
casei: Some(true),
|
||||
litsep: None,
|
||||
bsesc: None,
|
||||
casei: true,
|
||||
litsep: false,
|
||||
};
|
||||
const SLASHLIT: Options = Options {
|
||||
casei: None,
|
||||
litsep: Some(true),
|
||||
bsesc: None,
|
||||
};
|
||||
const NOBSESC: Options = Options {
|
||||
casei: None,
|
||||
litsep: None,
|
||||
bsesc: Some(false),
|
||||
};
|
||||
const BSESC: Options = Options {
|
||||
casei: None,
|
||||
litsep: None,
|
||||
bsesc: Some(true),
|
||||
casei: false,
|
||||
litsep: true,
|
||||
};
|
||||
|
||||
toregex!(re_casei, "a", "(?i)^a$", &CASEI);
|
||||
@@ -1217,30 +1112,8 @@ mod tests {
|
||||
toregex!(re8, "[*]", r"^[\*]$");
|
||||
toregex!(re9, "[+]", r"^[\+]$");
|
||||
toregex!(re10, "+", r"^\+$");
|
||||
toregex!(re11, "☃", r"^\xe2\x98\x83$");
|
||||
toregex!(re12, "**", r"^.*$");
|
||||
toregex!(re13, "**/", r"^.*$");
|
||||
toregex!(re14, "**/*", r"^(?:/?|.*/).*$");
|
||||
toregex!(re15, "**/**", r"^.*$");
|
||||
toregex!(re16, "**/**/*", r"^(?:/?|.*/).*$");
|
||||
toregex!(re17, "**/**/**", r"^.*$");
|
||||
toregex!(re18, "**/**/**/*", r"^(?:/?|.*/).*$");
|
||||
toregex!(re19, "a/**", r"^a(?:/?|/.*)$");
|
||||
toregex!(re20, "a/**/**", r"^a(?:/?|/.*)$");
|
||||
toregex!(re21, "a/**/**/**", r"^a(?:/?|/.*)$");
|
||||
toregex!(re22, "a/**/b", r"^a(?:/|/.*/)b$");
|
||||
toregex!(re23, "a/**/**/b", r"^a(?:/|/.*/)b$");
|
||||
toregex!(re24, "a/**/**/**/b", r"^a(?:/|/.*/)b$");
|
||||
toregex!(re25, "**/b", r"^(?:/?|.*/)b$");
|
||||
toregex!(re26, "**/**/b", r"^(?:/?|.*/)b$");
|
||||
toregex!(re27, "**/**/**/b", r"^(?:/?|.*/)b$");
|
||||
toregex!(re28, "a**", r"^a.*.*$");
|
||||
toregex!(re29, "**a", r"^.*.*a$");
|
||||
toregex!(re30, "a**b", r"^a.*.*b$");
|
||||
toregex!(re31, "***", r"^.*.*.*$");
|
||||
toregex!(re32, "/a**", r"^/a.*.*$");
|
||||
toregex!(re33, "/**a", r"^/.*.*a$");
|
||||
toregex!(re34, "/a**b", r"^/a.*.*b$");
|
||||
toregex!(re11, "**", r"^.*$");
|
||||
toregex!(re12, "☃", r"^\xe2\x98\x83$");
|
||||
|
||||
matches!(match1, "a", "a");
|
||||
matches!(match2, "a*b", "a_b");
|
||||
@@ -1277,7 +1150,6 @@ mod tests {
|
||||
matches!(matchrec22, ".*/**", ".abc/abc");
|
||||
matches!(matchrec23, "foo/**", "foo");
|
||||
matches!(matchrec24, "**/foo/bar", "foo/bar");
|
||||
matches!(matchrec25, "some/*/needle.txt", "some/one/needle.txt");
|
||||
|
||||
matches!(matchrange1, "a[0-9]b", "a0b");
|
||||
matches!(matchrange2, "a[0-9]b", "a9b");
|
||||
@@ -1290,7 +1162,6 @@ mod tests {
|
||||
matches!(matchrange9, "[-a-c]", "b");
|
||||
matches!(matchrange10, "[a-c-]", "b");
|
||||
matches!(matchrange11, "[-]", "-");
|
||||
matches!(matchrange12, "a[^0-9]b", "a_b");
|
||||
|
||||
matches!(matchpat1, "*hello.txt", "hello.txt");
|
||||
matches!(matchpat2, "*hello.txt", "gareth_says_hello.txt");
|
||||
@@ -1334,17 +1205,6 @@ mod tests {
|
||||
#[cfg(not(unix))]
|
||||
matches!(matchslash5, "abc\\def", "abc/def", SLASHLIT);
|
||||
|
||||
matches!(matchbackslash1, "\\[", "[", BSESC);
|
||||
matches!(matchbackslash2, "\\?", "?", BSESC);
|
||||
matches!(matchbackslash3, "\\*", "*", BSESC);
|
||||
matches!(matchbackslash4, "\\[a-z]", "\\a", NOBSESC);
|
||||
matches!(matchbackslash5, "\\?", "\\a", NOBSESC);
|
||||
matches!(matchbackslash6, "\\*", "\\\\", NOBSESC);
|
||||
#[cfg(unix)]
|
||||
matches!(matchbackslash7, "\\a", "a");
|
||||
#[cfg(not(unix))]
|
||||
matches!(matchbackslash8, "\\a", "/a");
|
||||
|
||||
nmatches!(matchnot1, "a*b*c", "abcd");
|
||||
nmatches!(matchnot2, "abc*abc*abc", "abcabcabcabcabcabcabca");
|
||||
nmatches!(matchnot3, "some/**/needle.txt", "some/other/notthis.txt");
|
||||
@@ -1374,35 +1234,18 @@ mod tests {
|
||||
nmatches!(matchnot25, "*.c", "mozilla-sha1/sha1.c", SLASHLIT);
|
||||
nmatches!(matchnot26, "**/m4/ltoptions.m4",
|
||||
"csharp/src/packages/repositories.config", SLASHLIT);
|
||||
nmatches!(matchnot27, "a[^0-9]b", "a0b");
|
||||
nmatches!(matchnot28, "a[^0-9]b", "a9b");
|
||||
nmatches!(matchnot29, "[^-]", "-");
|
||||
nmatches!(matchnot30, "some/*/needle.txt", "some/needle.txt");
|
||||
nmatches!(
|
||||
matchrec31,
|
||||
"some/*/needle.txt", "some/one/two/needle.txt", SLASHLIT);
|
||||
nmatches!(
|
||||
matchrec32,
|
||||
"some/*/needle.txt", "some/one/two/three/needle.txt", SLASHLIT);
|
||||
|
||||
macro_rules! extract {
|
||||
($which:ident, $name:ident, $pat:expr, $expect:expr) => {
|
||||
extract!($which, $name, $pat, $expect, Options::default());
|
||||
};
|
||||
($which:ident, $name:ident, $pat:expr, $expect:expr, $options:expr) => {
|
||||
($which:ident, $name:ident, $pat:expr, $expect:expr, $opts:expr) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
let mut builder = GlobBuilder::new($pat);
|
||||
if let Some(casei) = $options.casei {
|
||||
builder.case_insensitive(casei);
|
||||
}
|
||||
if let Some(litsep) = $options.litsep {
|
||||
builder.literal_separator(litsep);
|
||||
}
|
||||
if let Some(bsesc) = $options.bsesc {
|
||||
builder.backslash_escape(bsesc);
|
||||
}
|
||||
let pat = builder.build().unwrap();
|
||||
let pat = GlobBuilder::new($pat)
|
||||
.case_insensitive($opts.casei)
|
||||
.literal_separator($opts.litsep)
|
||||
.build().unwrap();
|
||||
assert_eq!($expect, pat.$which());
|
||||
}
|
||||
};
|
||||
@@ -1459,19 +1302,19 @@ mod tests {
|
||||
Literal('f'), Literal('o'), ZeroOrMore, Literal('o'),
|
||||
]), SLASHLIT);
|
||||
|
||||
ext!(extract_ext1, "**/*.rs", Some(s(".rs")));
|
||||
ext!(extract_ext1, "**/*.rs", Some(os(".rs")));
|
||||
ext!(extract_ext2, "**/*.rs.bak", None);
|
||||
ext!(extract_ext3, "*.rs", Some(s(".rs")));
|
||||
ext!(extract_ext3, "*.rs", Some(os(".rs")));
|
||||
ext!(extract_ext4, "a*.rs", None);
|
||||
ext!(extract_ext5, "/*.c", None);
|
||||
ext!(extract_ext6, "*.c", None, SLASHLIT);
|
||||
ext!(extract_ext7, "*.c", Some(s(".c")));
|
||||
ext!(extract_ext7, "*.c", Some(os(".c")));
|
||||
|
||||
required_ext!(extract_req_ext1, "*.rs", Some(s(".rs")));
|
||||
required_ext!(extract_req_ext2, "/foo/bar/*.rs", Some(s(".rs")));
|
||||
required_ext!(extract_req_ext3, "/foo/bar/*.rs", Some(s(".rs")));
|
||||
required_ext!(extract_req_ext4, "/foo/bar/.rs", Some(s(".rs")));
|
||||
required_ext!(extract_req_ext5, ".rs", Some(s(".rs")));
|
||||
required_ext!(extract_req_ext1, "*.rs", Some(os(".rs")));
|
||||
required_ext!(extract_req_ext2, "/foo/bar/*.rs", Some(os(".rs")));
|
||||
required_ext!(extract_req_ext3, "/foo/bar/*.rs", Some(os(".rs")));
|
||||
required_ext!(extract_req_ext4, "/foo/bar/.rs", Some(os(".rs")));
|
||||
required_ext!(extract_req_ext5, ".rs", Some(os(".rs")));
|
||||
required_ext!(extract_req_ext6, "./rs", None);
|
||||
required_ext!(extract_req_ext7, "foo", None);
|
||||
required_ext!(extract_req_ext8, ".foo/", None);
|
||||
|
||||
@@ -22,7 +22,7 @@ This example shows how to match a single glob against a single file path.
|
||||
# fn example() -> Result<(), globset::Error> {
|
||||
use globset::Glob;
|
||||
|
||||
let glob = Glob::new("*.rs")?.compile_matcher();
|
||||
let glob = try!(Glob::new("*.rs")).compile_matcher();
|
||||
|
||||
assert!(glob.is_match("foo.rs"));
|
||||
assert!(glob.is_match("foo/bar.rs"));
|
||||
@@ -39,8 +39,8 @@ semantics. In this example, we prevent wildcards from matching path separators.
|
||||
# fn example() -> Result<(), globset::Error> {
|
||||
use globset::GlobBuilder;
|
||||
|
||||
let glob = GlobBuilder::new("*.rs")
|
||||
.literal_separator(true).build()?.compile_matcher();
|
||||
let glob = try!(GlobBuilder::new("*.rs")
|
||||
.literal_separator(true).build()).compile_matcher();
|
||||
|
||||
assert!(glob.is_match("foo.rs"));
|
||||
assert!(!glob.is_match("foo/bar.rs")); // no longer matches
|
||||
@@ -59,10 +59,10 @@ use globset::{Glob, GlobSetBuilder};
|
||||
let mut builder = GlobSetBuilder::new();
|
||||
// A GlobBuilder can be used to configure each glob's match semantics
|
||||
// independently.
|
||||
builder.add(Glob::new("*.rs")?);
|
||||
builder.add(Glob::new("src/lib.rs")?);
|
||||
builder.add(Glob::new("src/**/foo.rs")?);
|
||||
let set = builder.build()?;
|
||||
builder.add(try!(Glob::new("*.rs")));
|
||||
builder.add(try!(Glob::new("src/lib.rs")));
|
||||
builder.add(try!(Glob::new("src/**/foo.rs")));
|
||||
let set = try!(builder.build());
|
||||
|
||||
assert_eq!(set.matches("src/bar/baz/foo.rs"), vec![0, 2]);
|
||||
# Ok(()) } example().unwrap();
|
||||
@@ -91,11 +91,6 @@ Standard Unix-style glob syntax is supported:
|
||||
`[!ab]` to match any character except for `a` and `b`.
|
||||
* Metacharacters such as `*` and `?` can be escaped with character class
|
||||
notation. e.g., `[*]` matches `*`.
|
||||
* When backslash escapes are enabled, a backslash (`\`) will escape all meta
|
||||
characters in a glob. If it precedes a non-meta character, then the slash is
|
||||
ignored. A `\\` will match a literal `\\`. Note that this mode is only
|
||||
enabled on Unix platforms by default, but can be enabled on any platform
|
||||
via the `backslash_escape` setting on `Glob`.
|
||||
|
||||
A `GlobBuilder` can be used to prevent wildcards from matching path separators,
|
||||
or to enable case insensitive matching.
|
||||
@@ -104,25 +99,27 @@ or to enable case insensitive matching.
|
||||
#![deny(missing_docs)]
|
||||
|
||||
extern crate aho_corasick;
|
||||
extern crate bstr;
|
||||
extern crate fnv;
|
||||
#[macro_use]
|
||||
extern crate log;
|
||||
extern crate memchr;
|
||||
extern crate regex;
|
||||
|
||||
use std::borrow::Cow;
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::error::Error as StdError;
|
||||
use std::ffi::{OsStr, OsString};
|
||||
use std::fmt;
|
||||
use std::hash;
|
||||
use std::path::Path;
|
||||
use std::str;
|
||||
|
||||
use aho_corasick::AhoCorasick;
|
||||
use bstr::{B, BStr, BString};
|
||||
use aho_corasick::{Automaton, AcAutomaton, FullAcAutomaton};
|
||||
use regex::bytes::{Regex, RegexBuilder, RegexSet};
|
||||
|
||||
use pathutil::{file_name, file_name_ext, normalize_path};
|
||||
use pathutil::{
|
||||
file_name, file_name_ext, normalize_path, os_str_bytes, path_bytes,
|
||||
};
|
||||
use glob::MatchStrategy;
|
||||
pub use glob::{Glob, GlobBuilder, GlobMatcher};
|
||||
|
||||
@@ -141,13 +138,8 @@ pub struct Error {
|
||||
/// The kind of error that can occur when parsing a glob pattern.
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
pub enum ErrorKind {
|
||||
/// **DEPRECATED**.
|
||||
///
|
||||
/// This error used to occur for consistency with git's glob specification,
|
||||
/// but the specification now accepts all uses of `**`. When `**` does not
|
||||
/// appear adjacent to a path separator or at the beginning/end of a glob,
|
||||
/// it is now treated as two consecutive `*` patterns. As such, this error
|
||||
/// is no longer used.
|
||||
/// Occurs when a use of `**` is invalid. Namely, `**` can only appear
|
||||
/// adjacent to a path separator, or the beginning/end of a glob.
|
||||
InvalidRecursive,
|
||||
/// Occurs when a character class (e.g., `[abc]`) is not closed.
|
||||
UnclosedClass,
|
||||
@@ -162,17 +154,8 @@ pub enum ErrorKind {
|
||||
/// Occurs when an alternating group is nested inside another alternating
|
||||
/// group, e.g., `{{a,b},{c,d}}`.
|
||||
NestedAlternates,
|
||||
/// Occurs when an unescaped '\' is found at the end of a glob.
|
||||
DanglingEscape,
|
||||
/// An error associated with parsing or compiling a regex.
|
||||
Regex(String),
|
||||
/// Hints that destructuring should not be exhaustive.
|
||||
///
|
||||
/// This enum may grow additional variants, so this makes sure clients
|
||||
/// don't count on exhaustive matching. (Otherwise, adding a new variant
|
||||
/// could break existing code.)
|
||||
#[doc(hidden)]
|
||||
__Nonexhaustive,
|
||||
}
|
||||
|
||||
impl StdError for Error {
|
||||
@@ -216,11 +199,7 @@ impl ErrorKind {
|
||||
ErrorKind::NestedAlternates => {
|
||||
"nested alternate groups are not allowed"
|
||||
}
|
||||
ErrorKind::DanglingEscape => {
|
||||
"dangling '\\'"
|
||||
}
|
||||
ErrorKind::Regex(ref err) => err,
|
||||
ErrorKind::__Nonexhaustive => unreachable!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -244,14 +223,12 @@ impl fmt::Display for ErrorKind {
|
||||
| ErrorKind::UnopenedAlternates
|
||||
| ErrorKind::UnclosedAlternates
|
||||
| ErrorKind::NestedAlternates
|
||||
| ErrorKind::DanglingEscape
|
||||
| ErrorKind::Regex(_) => {
|
||||
write!(f, "{}", self.description())
|
||||
}
|
||||
ErrorKind::InvalidRange(s, e) => {
|
||||
write!(f, "invalid range; '{}' > '{}'", s, e)
|
||||
}
|
||||
ErrorKind::__Nonexhaustive => unreachable!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -291,23 +268,12 @@ pub struct GlobSet {
|
||||
}
|
||||
|
||||
impl GlobSet {
|
||||
/// Create an empty `GlobSet`. An empty set matches nothing.
|
||||
#[inline]
|
||||
pub fn empty() -> GlobSet {
|
||||
GlobSet {
|
||||
len: 0,
|
||||
strats: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if this set is empty, and therefore matches nothing.
|
||||
#[inline]
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.len == 0
|
||||
}
|
||||
|
||||
/// Returns the number of globs in this set.
|
||||
#[inline]
|
||||
pub fn len(&self) -> usize {
|
||||
self.len
|
||||
}
|
||||
@@ -446,8 +412,8 @@ impl GlobSet {
|
||||
GlobSetMatchStrategy::Suffix(suffixes.suffix()),
|
||||
GlobSetMatchStrategy::Prefix(prefixes.prefix()),
|
||||
GlobSetMatchStrategy::RequiredExtension(
|
||||
required_exts.build()?),
|
||||
GlobSetMatchStrategy::Regex(regexes.regex_set()?),
|
||||
try!(required_exts.build())),
|
||||
GlobSetMatchStrategy::Regex(try!(regexes.regex_set())),
|
||||
],
|
||||
})
|
||||
}
|
||||
@@ -455,7 +421,6 @@ impl GlobSet {
|
||||
|
||||
/// GlobSetBuilder builds a group of patterns that can be used to
|
||||
/// simultaneously match a file path.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct GlobSetBuilder {
|
||||
pats: Vec<Glob>,
|
||||
}
|
||||
@@ -476,6 +441,7 @@ impl GlobSetBuilder {
|
||||
}
|
||||
|
||||
/// Add a new pattern to this set.
|
||||
#[allow(dead_code)]
|
||||
pub fn add(&mut self, pat: Glob) -> &mut GlobSetBuilder {
|
||||
self.pats.push(pat);
|
||||
self
|
||||
@@ -490,25 +456,24 @@ impl GlobSetBuilder {
|
||||
/// path against multiple globs or sets of globs.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Candidate<'a> {
|
||||
path: Cow<'a, BStr>,
|
||||
basename: Cow<'a, BStr>,
|
||||
ext: Cow<'a, BStr>,
|
||||
path: Cow<'a, [u8]>,
|
||||
basename: Cow<'a, [u8]>,
|
||||
ext: &'a OsStr,
|
||||
}
|
||||
|
||||
impl<'a> Candidate<'a> {
|
||||
/// Create a new candidate for matching from the given path.
|
||||
pub fn new<P: AsRef<Path> + ?Sized>(path: &'a P) -> Candidate<'a> {
|
||||
let path = normalize_path(BString::from_path_lossy(path.as_ref()));
|
||||
let basename = file_name(&path).unwrap_or(Cow::Borrowed(B("")));
|
||||
let ext = file_name_ext(&basename).unwrap_or(Cow::Borrowed(B("")));
|
||||
let path = path.as_ref();
|
||||
let basename = file_name(path).unwrap_or(OsStr::new(""));
|
||||
Candidate {
|
||||
path: path,
|
||||
basename: basename,
|
||||
ext: ext,
|
||||
path: normalize_path(path_bytes(path)),
|
||||
basename: os_str_bytes(basename),
|
||||
ext: file_name_ext(basename).unwrap_or(OsStr::new("")),
|
||||
}
|
||||
}
|
||||
|
||||
fn path_prefix(&self, max: usize) -> &BStr {
|
||||
fn path_prefix(&self, max: usize) -> &[u8] {
|
||||
if self.path.len() <= max {
|
||||
&*self.path
|
||||
} else {
|
||||
@@ -516,7 +481,7 @@ impl<'a> Candidate<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
fn path_suffix(&self, max: usize) -> &BStr {
|
||||
fn path_suffix(&self, max: usize) -> &[u8] {
|
||||
if self.path.len() <= max {
|
||||
&*self.path
|
||||
} else {
|
||||
@@ -577,12 +542,12 @@ impl LiteralStrategy {
|
||||
}
|
||||
|
||||
fn is_match(&self, candidate: &Candidate) -> bool {
|
||||
self.0.contains_key(candidate.path.as_bytes())
|
||||
self.0.contains_key(&*candidate.path)
|
||||
}
|
||||
|
||||
#[inline(never)]
|
||||
fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
|
||||
if let Some(hits) = self.0.get(candidate.path.as_bytes()) {
|
||||
if let Some(hits) = self.0.get(&*candidate.path) {
|
||||
matches.extend(hits);
|
||||
}
|
||||
}
|
||||
@@ -604,7 +569,7 @@ impl BasenameLiteralStrategy {
|
||||
if candidate.basename.is_empty() {
|
||||
return false;
|
||||
}
|
||||
self.0.contains_key(candidate.basename.as_bytes())
|
||||
self.0.contains_key(&*candidate.basename)
|
||||
}
|
||||
|
||||
#[inline(never)]
|
||||
@@ -612,29 +577,29 @@ impl BasenameLiteralStrategy {
|
||||
if candidate.basename.is_empty() {
|
||||
return;
|
||||
}
|
||||
if let Some(hits) = self.0.get(candidate.basename.as_bytes()) {
|
||||
if let Some(hits) = self.0.get(&*candidate.basename) {
|
||||
matches.extend(hits);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct ExtensionStrategy(HashMap<Vec<u8>, Vec<usize>, Fnv>);
|
||||
struct ExtensionStrategy(HashMap<OsString, Vec<usize>, Fnv>);
|
||||
|
||||
impl ExtensionStrategy {
|
||||
fn new() -> ExtensionStrategy {
|
||||
ExtensionStrategy(HashMap::with_hasher(Fnv::default()))
|
||||
}
|
||||
|
||||
fn add(&mut self, global_index: usize, ext: String) {
|
||||
self.0.entry(ext.into_bytes()).or_insert(vec![]).push(global_index);
|
||||
fn add(&mut self, global_index: usize, ext: OsString) {
|
||||
self.0.entry(ext).or_insert(vec![]).push(global_index);
|
||||
}
|
||||
|
||||
fn is_match(&self, candidate: &Candidate) -> bool {
|
||||
if candidate.ext.is_empty() {
|
||||
return false;
|
||||
}
|
||||
self.0.contains_key(candidate.ext.as_bytes())
|
||||
self.0.contains_key(candidate.ext)
|
||||
}
|
||||
|
||||
#[inline(never)]
|
||||
@@ -642,7 +607,7 @@ impl ExtensionStrategy {
|
||||
if candidate.ext.is_empty() {
|
||||
return;
|
||||
}
|
||||
if let Some(hits) = self.0.get(candidate.ext.as_bytes()) {
|
||||
if let Some(hits) = self.0.get(candidate.ext) {
|
||||
matches.extend(hits);
|
||||
}
|
||||
}
|
||||
@@ -650,7 +615,7 @@ impl ExtensionStrategy {
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct PrefixStrategy {
|
||||
matcher: AhoCorasick,
|
||||
matcher: FullAcAutomaton<Vec<u8>>,
|
||||
map: Vec<usize>,
|
||||
longest: usize,
|
||||
}
|
||||
@@ -658,8 +623,8 @@ struct PrefixStrategy {
|
||||
impl PrefixStrategy {
|
||||
fn is_match(&self, candidate: &Candidate) -> bool {
|
||||
let path = candidate.path_prefix(self.longest);
|
||||
for m in self.matcher.find_overlapping_iter(path) {
|
||||
if m.start() == 0 {
|
||||
for m in self.matcher.find_overlapping(path) {
|
||||
if m.start == 0 {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@@ -668,9 +633,9 @@ impl PrefixStrategy {
|
||||
|
||||
fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
|
||||
let path = candidate.path_prefix(self.longest);
|
||||
for m in self.matcher.find_overlapping_iter(path) {
|
||||
if m.start() == 0 {
|
||||
matches.push(self.map[m.pattern()]);
|
||||
for m in self.matcher.find_overlapping(path) {
|
||||
if m.start == 0 {
|
||||
matches.push(self.map[m.pati]);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -678,7 +643,7 @@ impl PrefixStrategy {
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct SuffixStrategy {
|
||||
matcher: AhoCorasick,
|
||||
matcher: FullAcAutomaton<Vec<u8>>,
|
||||
map: Vec<usize>,
|
||||
longest: usize,
|
||||
}
|
||||
@@ -686,8 +651,8 @@ struct SuffixStrategy {
|
||||
impl SuffixStrategy {
|
||||
fn is_match(&self, candidate: &Candidate) -> bool {
|
||||
let path = candidate.path_suffix(self.longest);
|
||||
for m in self.matcher.find_overlapping_iter(path) {
|
||||
if m.end() == path.len() {
|
||||
for m in self.matcher.find_overlapping(path) {
|
||||
if m.end == path.len() {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@@ -696,27 +661,27 @@ impl SuffixStrategy {
|
||||
|
||||
fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
|
||||
let path = candidate.path_suffix(self.longest);
|
||||
for m in self.matcher.find_overlapping_iter(path) {
|
||||
if m.end() == path.len() {
|
||||
matches.push(self.map[m.pattern()]);
|
||||
for m in self.matcher.find_overlapping(path) {
|
||||
if m.end == path.len() {
|
||||
matches.push(self.map[m.pati]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct RequiredExtensionStrategy(HashMap<Vec<u8>, Vec<(usize, Regex)>, Fnv>);
|
||||
struct RequiredExtensionStrategy(HashMap<OsString, Vec<(usize, Regex)>, Fnv>);
|
||||
|
||||
impl RequiredExtensionStrategy {
|
||||
fn is_match(&self, candidate: &Candidate) -> bool {
|
||||
if candidate.ext.is_empty() {
|
||||
return false;
|
||||
}
|
||||
match self.0.get(candidate.ext.as_bytes()) {
|
||||
match self.0.get(candidate.ext) {
|
||||
None => false,
|
||||
Some(regexes) => {
|
||||
for &(_, ref re) in regexes {
|
||||
if re.is_match(candidate.path.as_bytes()) {
|
||||
if re.is_match(&*candidate.path) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@@ -730,9 +695,9 @@ impl RequiredExtensionStrategy {
|
||||
if candidate.ext.is_empty() {
|
||||
return;
|
||||
}
|
||||
if let Some(regexes) = self.0.get(candidate.ext.as_bytes()) {
|
||||
if let Some(regexes) = self.0.get(candidate.ext) {
|
||||
for &(global_index, ref re) in regexes {
|
||||
if re.is_match(candidate.path.as_bytes()) {
|
||||
if re.is_match(&*candidate.path) {
|
||||
matches.push(global_index);
|
||||
}
|
||||
}
|
||||
@@ -748,11 +713,11 @@ struct RegexSetStrategy {
|
||||
|
||||
impl RegexSetStrategy {
|
||||
fn is_match(&self, candidate: &Candidate) -> bool {
|
||||
self.matcher.is_match(candidate.path.as_bytes())
|
||||
self.matcher.is_match(&*candidate.path)
|
||||
}
|
||||
|
||||
fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
|
||||
for i in self.matcher.matches(candidate.path.as_bytes()) {
|
||||
for i in self.matcher.matches(&*candidate.path) {
|
||||
matches.push(self.map[i]);
|
||||
}
|
||||
}
|
||||
@@ -783,16 +748,18 @@ impl MultiStrategyBuilder {
|
||||
}
|
||||
|
||||
fn prefix(self) -> PrefixStrategy {
|
||||
let it = self.literals.into_iter().map(|s| s.into_bytes());
|
||||
PrefixStrategy {
|
||||
matcher: AhoCorasick::new_auto_configured(&self.literals),
|
||||
matcher: AcAutomaton::new(it).into_full(),
|
||||
map: self.map,
|
||||
longest: self.longest,
|
||||
}
|
||||
}
|
||||
|
||||
fn suffix(self) -> SuffixStrategy {
|
||||
let it = self.literals.into_iter().map(|s| s.into_bytes());
|
||||
SuffixStrategy {
|
||||
matcher: AhoCorasick::new_auto_configured(&self.literals),
|
||||
matcher: AcAutomaton::new(it).into_full(),
|
||||
map: self.map,
|
||||
longest: self.longest,
|
||||
}
|
||||
@@ -800,7 +767,7 @@ impl MultiStrategyBuilder {
|
||||
|
||||
fn regex_set(self) -> Result<RegexSetStrategy, Error> {
|
||||
Ok(RegexSetStrategy {
|
||||
matcher: new_regex_set(self.literals)?,
|
||||
matcher: try!(new_regex_set(self.literals)),
|
||||
map: self.map,
|
||||
})
|
||||
}
|
||||
@@ -808,7 +775,7 @@ impl MultiStrategyBuilder {
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct RequiredExtensionStrategyBuilder(
|
||||
HashMap<Vec<u8>, Vec<(usize, String)>>,
|
||||
HashMap<OsString, Vec<(usize, String)>>,
|
||||
);
|
||||
|
||||
impl RequiredExtensionStrategyBuilder {
|
||||
@@ -816,11 +783,8 @@ impl RequiredExtensionStrategyBuilder {
|
||||
RequiredExtensionStrategyBuilder(HashMap::new())
|
||||
}
|
||||
|
||||
fn add(&mut self, global_index: usize, ext: String, regex: String) {
|
||||
self.0
|
||||
.entry(ext.into_bytes())
|
||||
.or_insert(vec![])
|
||||
.push((global_index, regex));
|
||||
fn add(&mut self, global_index: usize, ext: OsString, regex: String) {
|
||||
self.0.entry(ext).or_insert(vec![]).push((global_index, regex));
|
||||
}
|
||||
|
||||
fn build(self) -> Result<RequiredExtensionStrategy, Error> {
|
||||
@@ -828,7 +792,7 @@ impl RequiredExtensionStrategyBuilder {
|
||||
for (ext, regexes) in self.0.into_iter() {
|
||||
exts.insert(ext.clone(), vec![]);
|
||||
for (global_index, regex) in regexes {
|
||||
let compiled = new_regex(®ex)?;
|
||||
let compiled = try!(new_regex(®ex));
|
||||
exts.get_mut(&ext).unwrap().push((global_index, compiled));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,26 +1,41 @@
|
||||
use std::borrow::Cow;
|
||||
|
||||
use bstr::BStr;
|
||||
use std::ffi::OsStr;
|
||||
use std::path::Path;
|
||||
|
||||
/// The final component of the path, if it is a normal file.
|
||||
///
|
||||
/// If the path terminates in ., .., or consists solely of a root of prefix,
|
||||
/// file_name will return None.
|
||||
pub fn file_name<'a>(path: &Cow<'a, BStr>) -> Option<Cow<'a, BStr>> {
|
||||
#[cfg(unix)]
|
||||
pub fn file_name<'a, P: AsRef<Path> + ?Sized>(
|
||||
path: &'a P,
|
||||
) -> Option<&'a OsStr> {
|
||||
use std::os::unix::ffi::OsStrExt;
|
||||
use memchr::memrchr;
|
||||
|
||||
let path = path.as_ref().as_os_str().as_bytes();
|
||||
if path.is_empty() {
|
||||
return None;
|
||||
} else if path.last() == Some(b'.') {
|
||||
} else if path.len() == 1 && path[0] == b'.' {
|
||||
return None;
|
||||
} else if path.last() == Some(&b'.') {
|
||||
return None;
|
||||
} else if path.len() >= 2 && &path[path.len() - 2..] == &b".."[..] {
|
||||
return None;
|
||||
}
|
||||
let last_slash = path.rfind_byte(b'/').map(|i| i + 1).unwrap_or(0);
|
||||
Some(match *path {
|
||||
Cow::Borrowed(path) => Cow::Borrowed(&path[last_slash..]),
|
||||
Cow::Owned(ref path) => {
|
||||
let mut path = path.clone();
|
||||
path.drain_bytes(..last_slash);
|
||||
Cow::Owned(path)
|
||||
}
|
||||
})
|
||||
let last_slash = memrchr(b'/', path).map(|i| i + 1).unwrap_or(0);
|
||||
Some(OsStr::from_bytes(&path[last_slash..]))
|
||||
}
|
||||
|
||||
/// The final component of the path, if it is a normal file.
|
||||
///
|
||||
/// If the path terminates in ., .., or consists solely of a root of prefix,
|
||||
/// file_name will return None.
|
||||
#[cfg(not(unix))]
|
||||
pub fn file_name<'a, P: AsRef<Path> + ?Sized>(
|
||||
path: &'a P,
|
||||
) -> Option<&'a OsStr> {
|
||||
path.as_ref().file_name()
|
||||
}
|
||||
|
||||
/// Return a file extension given a path's file name.
|
||||
@@ -39,28 +54,65 @@ pub fn file_name<'a>(path: &Cow<'a, BStr>) -> Option<Cow<'a, BStr>> {
|
||||
/// a pattern like `*.rs` is obviously trying to match files with a `rs`
|
||||
/// extension, but it also matches files like `.rs`, which doesn't have an
|
||||
/// extension according to std::path::Path::extension.
|
||||
pub fn file_name_ext<'a>(name: &Cow<'a, BStr>) -> Option<Cow<'a, BStr>> {
|
||||
pub fn file_name_ext(name: &OsStr) -> Option<&OsStr> {
|
||||
// Yes, these functions are awful, and yes, we are completely violating
|
||||
// the abstraction barrier of std::ffi. The barrier we're violating is
|
||||
// that an OsStr's encoding is *ASCII compatible*. While this is obviously
|
||||
// true on Unix systems, it's also true on Windows because an OsStr uses
|
||||
// WTF-8 internally: https://simonsapin.github.io/wtf-8/
|
||||
//
|
||||
// We should consider doing the same for the other path utility functions.
|
||||
// Right now, we don't break any barriers, but Windows users are paying
|
||||
// for it.
|
||||
//
|
||||
// Got any better ideas that don't cost anything? Hit me up. ---AG
|
||||
unsafe fn os_str_as_u8_slice(s: &OsStr) -> &[u8] {
|
||||
::std::mem::transmute(s)
|
||||
}
|
||||
unsafe fn u8_slice_as_os_str(s: &[u8]) -> &OsStr {
|
||||
::std::mem::transmute(s)
|
||||
}
|
||||
if name.is_empty() {
|
||||
return None;
|
||||
}
|
||||
let last_dot_at = match name.rfind_byte(b'.') {
|
||||
None => return None,
|
||||
Some(i) => i,
|
||||
};
|
||||
Some(match *name {
|
||||
Cow::Borrowed(name) => Cow::Borrowed(&name[last_dot_at..]),
|
||||
Cow::Owned(ref name) => {
|
||||
let mut name = name.clone();
|
||||
name.drain_bytes(..last_dot_at);
|
||||
Cow::Owned(name)
|
||||
let name = unsafe { os_str_as_u8_slice(name) };
|
||||
for (i, &b) in name.iter().enumerate().rev() {
|
||||
if b == b'.' {
|
||||
return Some(unsafe { u8_slice_as_os_str(&name[i..]) });
|
||||
}
|
||||
})
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Return raw bytes of a path, transcoded to UTF-8 if necessary.
|
||||
pub fn path_bytes(path: &Path) -> Cow<[u8]> {
|
||||
os_str_bytes(path.as_os_str())
|
||||
}
|
||||
|
||||
/// Return the raw bytes of the given OS string, possibly transcoded to UTF-8.
|
||||
#[cfg(unix)]
|
||||
pub fn os_str_bytes(s: &OsStr) -> Cow<[u8]> {
|
||||
use std::os::unix::ffi::OsStrExt;
|
||||
Cow::Borrowed(s.as_bytes())
|
||||
}
|
||||
|
||||
/// Return the raw bytes of the given OS string, possibly transcoded to UTF-8.
|
||||
#[cfg(not(unix))]
|
||||
pub fn os_str_bytes(s: &OsStr) -> Cow<[u8]> {
|
||||
// TODO(burntsushi): On Windows, OS strings are WTF-8, which is a superset
|
||||
// of UTF-8, so even if we could get at the raw bytes, they wouldn't
|
||||
// be useful. We *must* convert to UTF-8 before doing path matching.
|
||||
// Unfortunate, but necessary.
|
||||
match s.to_string_lossy() {
|
||||
Cow::Owned(s) => Cow::Owned(s.into_bytes()),
|
||||
Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Normalizes a path to use `/` as a separator everywhere, even on platforms
|
||||
/// that recognize other characters as separators.
|
||||
#[cfg(unix)]
|
||||
pub fn normalize_path(path: Cow<BStr>) -> Cow<BStr> {
|
||||
pub fn normalize_path(path: Cow<[u8]>) -> Cow<[u8]> {
|
||||
// UNIX only uses /, so we're good.
|
||||
path
|
||||
}
|
||||
@@ -68,7 +120,7 @@ pub fn normalize_path(path: Cow<BStr>) -> Cow<BStr> {
|
||||
/// Normalizes a path to use `/` as a separator everywhere, even on platforms
|
||||
/// that recognize other characters as separators.
|
||||
#[cfg(not(unix))]
|
||||
pub fn normalize_path(mut path: Cow<BStr>) -> Cow<BStr> {
|
||||
pub fn normalize_path(mut path: Cow<[u8]>) -> Cow<[u8]> {
|
||||
use std::path::is_separator;
|
||||
|
||||
for i in 0..path.len() {
|
||||
@@ -83,8 +135,7 @@ pub fn normalize_path(mut path: Cow<BStr>) -> Cow<BStr> {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::borrow::Cow;
|
||||
|
||||
use bstr::{B, BString};
|
||||
use std::ffi::OsStr;
|
||||
|
||||
use super::{file_name_ext, normalize_path};
|
||||
|
||||
@@ -92,9 +143,8 @@ mod tests {
|
||||
($name:ident, $file_name:expr, $ext:expr) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
let bs = BString::from($file_name);
|
||||
let got = file_name_ext(&Cow::Owned(bs));
|
||||
assert_eq!($ext.map(|s| Cow::Borrowed(B(s))), got);
|
||||
let got = file_name_ext(OsStr::new($file_name));
|
||||
assert_eq!($ext.map(OsStr::new), got);
|
||||
}
|
||||
};
|
||||
}
|
||||
@@ -109,8 +159,7 @@ mod tests {
|
||||
($name:ident, $path:expr, $expected:expr) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
let bs = BString::from_slice($path);
|
||||
let got = normalize_path(Cow::Owned(bs));
|
||||
let got = normalize_path(Cow::Owned($path.to_vec()));
|
||||
assert_eq!($expected.to_vec(), got.into_owned());
|
||||
}
|
||||
};
|
||||
|
||||
@@ -1,26 +0,0 @@
|
||||
[package]
|
||||
name = "grep-cli"
|
||||
version = "0.1.1" #:version
|
||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||
description = """
|
||||
Utilities for search oriented command line applications.
|
||||
"""
|
||||
documentation = "https://docs.rs/grep-cli"
|
||||
homepage = "https://github.com/BurntSushi/ripgrep"
|
||||
repository = "https://github.com/BurntSushi/ripgrep"
|
||||
readme = "README.md"
|
||||
keywords = ["regex", "grep", "cli", "utility", "util"]
|
||||
license = "Unlicense/MIT"
|
||||
|
||||
[dependencies]
|
||||
atty = "0.2.11"
|
||||
bstr = "0.1.2"
|
||||
globset = { version = "0.4.3", path = "../globset" }
|
||||
lazy_static = "1.1.0"
|
||||
log = "0.4.5"
|
||||
regex = "1.1"
|
||||
same-file = "1.0.4"
|
||||
termcolor = "1.0.4"
|
||||
|
||||
[target.'cfg(windows)'.dependencies.winapi-util]
|
||||
version = "0.1.1"
|
||||
@@ -1,38 +0,0 @@
|
||||
grep-cli
|
||||
--------
|
||||
A utility library that provides common routines desired in search oriented
|
||||
command line applications. This includes, but is not limited to, parsing hex
|
||||
escapes, detecting whether stdin is readable and more. To the extent possible,
|
||||
this crate strives for compatibility across Windows, macOS and Linux.
|
||||
|
||||
[](https://travis-ci.org/BurntSushi/ripgrep)
|
||||
[](https://ci.appveyor.com/project/BurntSushi/ripgrep)
|
||||
[](https://crates.io/crates/grep-cli)
|
||||
|
||||
Dual-licensed under MIT or the [UNLICENSE](http://unlicense.org).
|
||||
|
||||
|
||||
### Documentation
|
||||
|
||||
[https://docs.rs/grep-cli](https://docs.rs/grep-cli)
|
||||
|
||||
**NOTE:** You probably don't want to use this crate directly. Instead, you
|
||||
should prefer the facade defined in the
|
||||
[`grep`](https://docs.rs/grep)
|
||||
crate.
|
||||
|
||||
|
||||
### Usage
|
||||
|
||||
Add this to your `Cargo.toml`:
|
||||
|
||||
```toml
|
||||
[dependencies]
|
||||
grep-cli = "0.1"
|
||||
```
|
||||
|
||||
and this to your crate root:
|
||||
|
||||
```rust
|
||||
extern crate grep_cli;
|
||||
```
|
||||
@@ -1,382 +0,0 @@
|
||||
use std::ffi::{OsStr, OsString};
|
||||
use std::fs::File;
|
||||
use std::io;
|
||||
use std::path::Path;
|
||||
use std::process::Command;
|
||||
|
||||
use globset::{Glob, GlobSet, GlobSetBuilder};
|
||||
|
||||
use process::{CommandError, CommandReader, CommandReaderBuilder};
|
||||
|
||||
/// A builder for a matcher that determines which files get decompressed.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct DecompressionMatcherBuilder {
|
||||
/// The commands for each matching glob.
|
||||
commands: Vec<DecompressionCommand>,
|
||||
/// Whether to include the default matching rules.
|
||||
defaults: bool,
|
||||
}
|
||||
|
||||
/// A representation of a single command for decompressing data
|
||||
/// out-of-proccess.
|
||||
#[derive(Clone, Debug)]
|
||||
struct DecompressionCommand {
|
||||
/// The glob that matches this command.
|
||||
glob: String,
|
||||
/// The command or binary name.
|
||||
bin: OsString,
|
||||
/// The arguments to invoke with the command.
|
||||
args: Vec<OsString>,
|
||||
}
|
||||
|
||||
impl Default for DecompressionMatcherBuilder {
|
||||
fn default() -> DecompressionMatcherBuilder {
|
||||
DecompressionMatcherBuilder::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl DecompressionMatcherBuilder {
|
||||
/// Create a new builder for configuring a decompression matcher.
|
||||
pub fn new() -> DecompressionMatcherBuilder {
|
||||
DecompressionMatcherBuilder {
|
||||
commands: vec![],
|
||||
defaults: true,
|
||||
}
|
||||
}
|
||||
|
||||
/// Build a matcher for determining how to decompress files.
|
||||
///
|
||||
/// If there was a problem compiling the matcher, then an error is
|
||||
/// returned.
|
||||
pub fn build(&self) -> Result<DecompressionMatcher, CommandError> {
|
||||
let defaults =
|
||||
if !self.defaults {
|
||||
vec![]
|
||||
} else {
|
||||
default_decompression_commands()
|
||||
};
|
||||
let mut glob_builder = GlobSetBuilder::new();
|
||||
let mut commands = vec![];
|
||||
for decomp_cmd in defaults.iter().chain(&self.commands) {
|
||||
let glob = Glob::new(&decomp_cmd.glob).map_err(|err| {
|
||||
CommandError::io(io::Error::new(io::ErrorKind::Other, err))
|
||||
})?;
|
||||
glob_builder.add(glob);
|
||||
commands.push(decomp_cmd.clone());
|
||||
}
|
||||
let globs = glob_builder.build().map_err(|err| {
|
||||
CommandError::io(io::Error::new(io::ErrorKind::Other, err))
|
||||
})?;
|
||||
Ok(DecompressionMatcher { globs, commands })
|
||||
}
|
||||
|
||||
/// When enabled, the default matching rules will be compiled into this
|
||||
/// matcher before any other associations. When disabled, only the
|
||||
/// rules explicitly given to this builder will be used.
|
||||
///
|
||||
/// This is enabled by default.
|
||||
pub fn defaults(&mut self, yes: bool) -> &mut DecompressionMatcherBuilder {
|
||||
self.defaults = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Associates a glob with a command to decompress files matching the glob.
|
||||
///
|
||||
/// If multiple globs match the same file, then the most recently added
|
||||
/// glob takes precedence.
|
||||
///
|
||||
/// The syntax for the glob is documented in the
|
||||
/// [`globset` crate](https://docs.rs/globset/#syntax).
|
||||
pub fn associate<P, I, A>(
|
||||
&mut self,
|
||||
glob: &str,
|
||||
program: P,
|
||||
args: I,
|
||||
) -> &mut DecompressionMatcherBuilder
|
||||
where P: AsRef<OsStr>,
|
||||
I: IntoIterator<Item=A>,
|
||||
A: AsRef<OsStr>,
|
||||
{
|
||||
|
||||
let glob = glob.to_string();
|
||||
let bin = program.as_ref().to_os_string();
|
||||
let args = args
|
||||
.into_iter()
|
||||
.map(|a| a.as_ref().to_os_string())
|
||||
.collect();
|
||||
self.commands.push(DecompressionCommand { glob, bin, args });
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
/// A matcher for determining how to decompress files.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct DecompressionMatcher {
|
||||
/// The set of globs to match. Each glob has a corresponding entry in
|
||||
/// `commands`. When a glob matches, the corresponding command should be
|
||||
/// used to perform out-of-process decompression.
|
||||
globs: GlobSet,
|
||||
/// The commands for each matching glob.
|
||||
commands: Vec<DecompressionCommand>,
|
||||
}
|
||||
|
||||
impl Default for DecompressionMatcher {
|
||||
fn default() -> DecompressionMatcher {
|
||||
DecompressionMatcher::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl DecompressionMatcher {
|
||||
/// Create a new matcher with default rules.
|
||||
///
|
||||
/// To add more matching rules, build a matcher with
|
||||
/// [`DecompressionMatcherBuilder`](struct.DecompressionMatcherBuilder.html).
|
||||
pub fn new() -> DecompressionMatcher {
|
||||
DecompressionMatcherBuilder::new()
|
||||
.build()
|
||||
.expect("built-in matching rules should always compile")
|
||||
}
|
||||
|
||||
/// Return a pre-built command based on the given file path that can
|
||||
/// decompress its contents. If no such decompressor is known, then this
|
||||
/// returns `None`.
|
||||
///
|
||||
/// If there are multiple possible commands matching the given path, then
|
||||
/// the command added last takes precedence.
|
||||
pub fn command<P: AsRef<Path>>(&self, path: P) -> Option<Command> {
|
||||
for i in self.globs.matches(path).into_iter().rev() {
|
||||
let decomp_cmd = &self.commands[i];
|
||||
let mut cmd = Command::new(&decomp_cmd.bin);
|
||||
cmd.args(&decomp_cmd.args);
|
||||
return Some(cmd);
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Returns true if and only if the given file path has at least one
|
||||
/// matching command to perform decompression on.
|
||||
pub fn has_command<P: AsRef<Path>>(&self, path: P) -> bool {
|
||||
self.globs.is_match(path)
|
||||
}
|
||||
}
|
||||
|
||||
/// Configures and builds a streaming reader for decompressing data.
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct DecompressionReaderBuilder {
|
||||
matcher: DecompressionMatcher,
|
||||
command_builder: CommandReaderBuilder,
|
||||
}
|
||||
|
||||
impl DecompressionReaderBuilder {
|
||||
/// Create a new builder with the default configuration.
|
||||
pub fn new() -> DecompressionReaderBuilder {
|
||||
DecompressionReaderBuilder::default()
|
||||
}
|
||||
|
||||
/// Build a new streaming reader for decompressing data.
|
||||
///
|
||||
/// If decompression is done out-of-process and if there was a problem
|
||||
/// spawning the process, then its error is logged at the debug level and a
|
||||
/// passthru reader is returned that does no decompression. This behavior
|
||||
/// typically occurs when the given file path matches a decompression
|
||||
/// command, but is executing in an environment where the decompression
|
||||
/// command is not available.
|
||||
///
|
||||
/// If the given file path could not be matched with a decompression
|
||||
/// strategy, then a passthru reader is returned that does no
|
||||
/// decompression.
|
||||
pub fn build<P: AsRef<Path>>(
|
||||
&self,
|
||||
path: P,
|
||||
) -> Result<DecompressionReader, CommandError> {
|
||||
let path = path.as_ref();
|
||||
let mut cmd = match self.matcher.command(path) {
|
||||
None => return DecompressionReader::new_passthru(path),
|
||||
Some(cmd) => cmd,
|
||||
};
|
||||
cmd.arg(path);
|
||||
|
||||
match self.command_builder.build(&mut cmd) {
|
||||
Ok(cmd_reader) => Ok(DecompressionReader { rdr: Ok(cmd_reader) }),
|
||||
Err(err) => {
|
||||
debug!(
|
||||
"{}: error spawning command '{:?}': {} \
|
||||
(falling back to uncompressed reader)",
|
||||
path.display(),
|
||||
cmd,
|
||||
err,
|
||||
);
|
||||
DecompressionReader::new_passthru(path)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Set the matcher to use to look up the decompression command for each
|
||||
/// file path.
|
||||
///
|
||||
/// A set of sensible rules is enabled by default. Setting this will
|
||||
/// completely replace the current rules.
|
||||
pub fn matcher(
|
||||
&mut self,
|
||||
matcher: DecompressionMatcher,
|
||||
) -> &mut DecompressionReaderBuilder {
|
||||
self.matcher = matcher;
|
||||
self
|
||||
}
|
||||
|
||||
/// Get the underlying matcher currently used by this builder.
|
||||
pub fn get_matcher(&self) -> &DecompressionMatcher {
|
||||
&self.matcher
|
||||
}
|
||||
|
||||
/// When enabled, the reader will asynchronously read the contents of the
|
||||
/// command's stderr output. When disabled, stderr is only read after the
|
||||
/// stdout stream has been exhausted (or if the process quits with an error
|
||||
/// code).
|
||||
///
|
||||
/// Note that when enabled, this may require launching an additional
|
||||
/// thread in order to read stderr. This is done so that the process being
|
||||
/// executed is never blocked from writing to stdout or stderr. If this is
|
||||
/// disabled, then it is possible for the process to fill up the stderr
|
||||
/// buffer and deadlock.
|
||||
///
|
||||
/// This is enabled by default.
|
||||
pub fn async_stderr(
|
||||
&mut self,
|
||||
yes: bool,
|
||||
) -> &mut DecompressionReaderBuilder {
|
||||
self.command_builder.async_stderr(yes);
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
/// A streaming reader for decompressing the contents of a file.
|
||||
///
|
||||
/// The purpose of this reader is to provide a seamless way to decompress the
|
||||
/// contents of file using existing tools in the current environment. This is
|
||||
/// meant to be an alternative to using decompression libraries in favor of the
|
||||
/// simplicity and portability of using external commands such as `gzip` and
|
||||
/// `xz`. This does impose the overhead of spawning a process, so other means
|
||||
/// for performing decompression should be sought if this overhead isn't
|
||||
/// acceptable.
|
||||
///
|
||||
/// A decompression reader comes with a default set of matching rules that are
|
||||
/// meant to associate file paths with the corresponding command to use to
|
||||
/// decompress them. For example, a glob like `*.gz` matches gzip compressed
|
||||
/// files with the command `gzip -d -c`. If a file path does not match any
|
||||
/// existing rules, or if it matches a rule whose command does not exist in the
|
||||
/// current environment, then the decompression reader passes through the
|
||||
/// contents of the underlying file without doing any decompression.
|
||||
///
|
||||
/// The default matching rules are probably good enough for most cases, and if
|
||||
/// they require revision, pull requests are welcome. In cases where they must
|
||||
/// be changed or extended, they can be customized through the use of
|
||||
/// [`DecompressionMatcherBuilder`](struct.DecompressionMatcherBuilder.html)
|
||||
/// and
|
||||
/// [`DecompressionReaderBuilder`](struct.DecompressionReaderBuilder.html).
|
||||
///
|
||||
/// By default, this reader will asynchronously read the processes' stderr.
|
||||
/// This prevents subtle deadlocking bugs for noisy processes that write a lot
|
||||
/// to stderr. Currently, the entire contents of stderr is read on to the heap.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// This example shows how to read the decompressed contents of a file without
|
||||
/// needing to explicitly choose the decompression command to run.
|
||||
///
|
||||
/// Note that if you need to decompress multiple files, it is better to use
|
||||
/// `DecompressionReaderBuilder`, which will amortize the cost of compiling the
|
||||
/// matcher.
|
||||
///
|
||||
/// ```no_run
|
||||
/// use std::io::Read;
|
||||
/// use std::process::Command;
|
||||
/// use grep_cli::DecompressionReader;
|
||||
///
|
||||
/// # fn example() -> Result<(), Box<::std::error::Error>> {
|
||||
/// let mut rdr = DecompressionReader::new("/usr/share/man/man1/ls.1.gz")?;
|
||||
/// let mut contents = vec![];
|
||||
/// rdr.read_to_end(&mut contents)?;
|
||||
/// # Ok(()) }
|
||||
/// ```
|
||||
#[derive(Debug)]
|
||||
pub struct DecompressionReader {
|
||||
rdr: Result<CommandReader, File>,
|
||||
}
|
||||
|
||||
impl DecompressionReader {
|
||||
/// Build a new streaming reader for decompressing data.
|
||||
///
|
||||
/// If decompression is done out-of-process and if there was a problem
|
||||
/// spawning the process, then its error is returned.
|
||||
///
|
||||
/// If the given file path could not be matched with a decompression
|
||||
/// strategy, then a passthru reader is returned that does no
|
||||
/// decompression.
|
||||
///
|
||||
/// This uses the default matching rules for determining how to decompress
|
||||
/// the given file. To change those matching rules, use
|
||||
/// [`DecompressionReaderBuilder`](struct.DecompressionReaderBuilder.html)
|
||||
/// and
|
||||
/// [`DecompressionMatcherBuilder`](struct.DecompressionMatcherBuilder.html).
|
||||
///
|
||||
/// When creating readers for many paths. it is better to use the builder
|
||||
/// since it will amortize the cost of constructing the matcher.
|
||||
pub fn new<P: AsRef<Path>>(
|
||||
path: P,
|
||||
) -> Result<DecompressionReader, CommandError> {
|
||||
DecompressionReaderBuilder::new().build(path)
|
||||
}
|
||||
|
||||
/// Creates a new "passthru" decompression reader that reads from the file
|
||||
/// corresponding to the given path without doing decompression and without
|
||||
/// executing another process.
|
||||
fn new_passthru(path: &Path) -> Result<DecompressionReader, CommandError> {
|
||||
let file = File::open(path)?;
|
||||
Ok(DecompressionReader { rdr: Err(file) })
|
||||
}
|
||||
}
|
||||
|
||||
impl io::Read for DecompressionReader {
|
||||
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
|
||||
match self.rdr {
|
||||
Ok(ref mut rdr) => rdr.read(buf),
|
||||
Err(ref mut rdr) => rdr.read(buf),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn default_decompression_commands() -> Vec<DecompressionCommand> {
|
||||
const ARGS_GZIP: &[&str] = &["gzip", "-d", "-c"];
|
||||
const ARGS_BZIP: &[&str] = &["bzip2", "-d", "-c"];
|
||||
const ARGS_XZ: &[&str] = &["xz", "-d", "-c"];
|
||||
const ARGS_LZ4: &[&str] = &["lz4", "-d", "-c"];
|
||||
const ARGS_LZMA: &[&str] = &["xz", "--format=lzma", "-d", "-c"];
|
||||
const ARGS_BROTLI: &[&str] = &["brotli", "-d", "-c"];
|
||||
const ARGS_ZSTD: &[&str] = &["zstd", "-q", "-d", "-c"];
|
||||
|
||||
fn cmd(glob: &str, args: &[&str]) -> DecompressionCommand {
|
||||
DecompressionCommand {
|
||||
glob: glob.to_string(),
|
||||
bin: OsStr::new(&args[0]).to_os_string(),
|
||||
args: args
|
||||
.iter()
|
||||
.skip(1)
|
||||
.map(|s| OsStr::new(s).to_os_string())
|
||||
.collect(),
|
||||
}
|
||||
}
|
||||
vec![
|
||||
cmd("*.gz", ARGS_GZIP),
|
||||
cmd("*.tgz", ARGS_GZIP),
|
||||
cmd("*.bz2", ARGS_BZIP),
|
||||
cmd("*.tbz2", ARGS_BZIP),
|
||||
cmd("*.xz", ARGS_XZ),
|
||||
cmd("*.txz", ARGS_XZ),
|
||||
cmd("*.lz4", ARGS_LZ4),
|
||||
cmd("*.lzma", ARGS_LZMA),
|
||||
cmd("*.br", ARGS_BROTLI),
|
||||
cmd("*.zst", ARGS_ZSTD),
|
||||
cmd("*.zstd", ARGS_ZSTD),
|
||||
]
|
||||
}
|
||||
@@ -1,263 +0,0 @@
|
||||
use std::ffi::OsStr;
|
||||
use std::str;
|
||||
|
||||
use bstr::{BStr, BString};
|
||||
|
||||
/// A single state in the state machine used by `unescape`.
|
||||
#[derive(Clone, Copy, Eq, PartialEq)]
|
||||
enum State {
|
||||
/// The state after seeing a `\`.
|
||||
Escape,
|
||||
/// The state after seeing a `\x`.
|
||||
HexFirst,
|
||||
/// The state after seeing a `\x[0-9A-Fa-f]`.
|
||||
HexSecond(char),
|
||||
/// Default state.
|
||||
Literal,
|
||||
}
|
||||
|
||||
/// Escapes arbitrary bytes into a human readable string.
|
||||
///
|
||||
/// This converts `\t`, `\r` and `\n` into their escaped forms. It also
|
||||
/// converts the non-printable subset of ASCII in addition to invalid UTF-8
|
||||
/// bytes to hexadecimal escape sequences. Everything else is left as is.
|
||||
///
|
||||
/// The dual of this routine is [`unescape`](fn.unescape.html).
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// This example shows how to convert a byte string that contains a `\n` and
|
||||
/// invalid UTF-8 bytes into a `String`.
|
||||
///
|
||||
/// Pay special attention to the use of raw strings. That is, `r"\n"` is
|
||||
/// equivalent to `"\\n"`.
|
||||
///
|
||||
/// ```
|
||||
/// use grep_cli::escape;
|
||||
///
|
||||
/// assert_eq!(r"foo\nbar\xFFbaz", escape(b"foo\nbar\xFFbaz"));
|
||||
/// ```
|
||||
pub fn escape(bytes: &[u8]) -> String {
|
||||
let bytes = BStr::new(bytes);
|
||||
let mut escaped = String::new();
|
||||
for (s, e, ch) in bytes.char_indices() {
|
||||
if ch == '\u{FFFD}' {
|
||||
for b in bytes[s..e].bytes() {
|
||||
escape_byte(b, &mut escaped);
|
||||
}
|
||||
} else {
|
||||
escape_char(ch, &mut escaped);
|
||||
}
|
||||
}
|
||||
escaped
|
||||
}
|
||||
|
||||
/// Escapes an OS string into a human readable string.
|
||||
///
|
||||
/// This is like [`escape`](fn.escape.html), but accepts an OS string.
|
||||
pub fn escape_os(string: &OsStr) -> String {
|
||||
escape(BString::from_os_str_lossy(string).as_bytes())
|
||||
}
|
||||
|
||||
/// Unescapes a string.
|
||||
///
|
||||
/// It supports a limited set of escape sequences:
|
||||
///
|
||||
/// * `\t`, `\r` and `\n` are mapped to their corresponding ASCII bytes.
|
||||
/// * `\xZZ` hexadecimal escapes are mapped to their byte.
|
||||
///
|
||||
/// Everything else is left as is, including non-hexadecimal escapes like
|
||||
/// `\xGG`.
|
||||
///
|
||||
/// This is useful when it is desirable for a command line argument to be
|
||||
/// capable of specifying arbitrary bytes or otherwise make it easier to
|
||||
/// specify non-printable characters.
|
||||
///
|
||||
/// The dual of this routine is [`escape`](fn.escape.html).
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// This example shows how to convert an escaped string (which is valid UTF-8)
|
||||
/// into a corresponding sequence of bytes. Each escape sequence is mapped to
|
||||
/// its bytes, which may include invalid UTF-8.
|
||||
///
|
||||
/// Pay special attention to the use of raw strings. That is, `r"\n"` is
|
||||
/// equivalent to `"\\n"`.
|
||||
///
|
||||
/// ```
|
||||
/// use grep_cli::unescape;
|
||||
///
|
||||
/// assert_eq!(&b"foo\nbar\xFFbaz"[..], &*unescape(r"foo\nbar\xFFbaz"));
|
||||
/// ```
|
||||
pub fn unescape(s: &str) -> Vec<u8> {
|
||||
use self::State::*;
|
||||
|
||||
let mut bytes = vec![];
|
||||
let mut state = Literal;
|
||||
for c in s.chars() {
|
||||
match state {
|
||||
Escape => {
|
||||
match c {
|
||||
'\\' => { bytes.push(b'\\'); state = Literal; }
|
||||
'n' => { bytes.push(b'\n'); state = Literal; }
|
||||
'r' => { bytes.push(b'\r'); state = Literal; }
|
||||
't' => { bytes.push(b'\t'); state = Literal; }
|
||||
'x' => { state = HexFirst; }
|
||||
c => {
|
||||
bytes.extend(format!(r"\{}", c).into_bytes());
|
||||
state = Literal;
|
||||
}
|
||||
}
|
||||
}
|
||||
HexFirst => {
|
||||
match c {
|
||||
'0'...'9' | 'A'...'F' | 'a'...'f' => {
|
||||
state = HexSecond(c);
|
||||
}
|
||||
c => {
|
||||
bytes.extend(format!(r"\x{}", c).into_bytes());
|
||||
state = Literal;
|
||||
}
|
||||
}
|
||||
}
|
||||
HexSecond(first) => {
|
||||
match c {
|
||||
'0'...'9' | 'A'...'F' | 'a'...'f' => {
|
||||
let ordinal = format!("{}{}", first, c);
|
||||
let byte = u8::from_str_radix(&ordinal, 16).unwrap();
|
||||
bytes.push(byte);
|
||||
state = Literal;
|
||||
}
|
||||
c => {
|
||||
let original = format!(r"\x{}{}", first, c);
|
||||
bytes.extend(original.into_bytes());
|
||||
state = Literal;
|
||||
}
|
||||
}
|
||||
}
|
||||
Literal => {
|
||||
match c {
|
||||
'\\' => { state = Escape; }
|
||||
c => { bytes.extend(c.to_string().as_bytes()); }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
match state {
|
||||
Escape => bytes.push(b'\\'),
|
||||
HexFirst => bytes.extend(b"\\x"),
|
||||
HexSecond(c) => bytes.extend(format!("\\x{}", c).into_bytes()),
|
||||
Literal => {}
|
||||
}
|
||||
bytes
|
||||
}
|
||||
|
||||
/// Unescapes an OS string.
|
||||
///
|
||||
/// This is like [`unescape`](fn.unescape.html), but accepts an OS string.
|
||||
///
|
||||
/// Note that this first lossily decodes the given OS string as UTF-8. That
|
||||
/// is, an escaped string (the thing given) should be valid UTF-8.
|
||||
pub fn unescape_os(string: &OsStr) -> Vec<u8> {
|
||||
unescape(&string.to_string_lossy())
|
||||
}
|
||||
|
||||
/// Adds the given codepoint to the given string, escaping it if necessary.
|
||||
fn escape_char(cp: char, into: &mut String) {
|
||||
if cp.is_ascii() {
|
||||
escape_byte(cp as u8, into);
|
||||
} else {
|
||||
into.push(cp);
|
||||
}
|
||||
}
|
||||
|
||||
/// Adds the given byte to the given string, escaping it if necessary.
|
||||
fn escape_byte(byte: u8, into: &mut String) {
|
||||
match byte {
|
||||
0x21...0x5B | 0x5D...0x7D => into.push(byte as char),
|
||||
b'\n' => into.push_str(r"\n"),
|
||||
b'\r' => into.push_str(r"\r"),
|
||||
b'\t' => into.push_str(r"\t"),
|
||||
b'\\' => into.push_str(r"\\"),
|
||||
_ => into.push_str(&format!(r"\x{:02X}", byte)),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{escape, unescape};
|
||||
|
||||
fn b(bytes: &'static [u8]) -> Vec<u8> {
|
||||
bytes.to_vec()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty() {
|
||||
assert_eq!(b(b""), unescape(r""));
|
||||
assert_eq!(r"", escape(b""));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn backslash() {
|
||||
assert_eq!(b(b"\\"), unescape(r"\\"));
|
||||
assert_eq!(r"\\", escape(b"\\"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn nul() {
|
||||
assert_eq!(b(b"\x00"), unescape(r"\x00"));
|
||||
assert_eq!(r"\x00", escape(b"\x00"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn nl() {
|
||||
assert_eq!(b(b"\n"), unescape(r"\n"));
|
||||
assert_eq!(r"\n", escape(b"\n"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tab() {
|
||||
assert_eq!(b(b"\t"), unescape(r"\t"));
|
||||
assert_eq!(r"\t", escape(b"\t"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn carriage() {
|
||||
assert_eq!(b(b"\r"), unescape(r"\r"));
|
||||
assert_eq!(r"\r", escape(b"\r"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn nothing_simple() {
|
||||
assert_eq!(b(b"\\a"), unescape(r"\a"));
|
||||
assert_eq!(b(b"\\a"), unescape(r"\\a"));
|
||||
assert_eq!(r"\\a", escape(b"\\a"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn nothing_hex0() {
|
||||
assert_eq!(b(b"\\x"), unescape(r"\x"));
|
||||
assert_eq!(b(b"\\x"), unescape(r"\\x"));
|
||||
assert_eq!(r"\\x", escape(b"\\x"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn nothing_hex1() {
|
||||
assert_eq!(b(b"\\xz"), unescape(r"\xz"));
|
||||
assert_eq!(b(b"\\xz"), unescape(r"\\xz"));
|
||||
assert_eq!(r"\\xz", escape(b"\\xz"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn nothing_hex2() {
|
||||
assert_eq!(b(b"\\xzz"), unescape(r"\xzz"));
|
||||
assert_eq!(b(b"\\xzz"), unescape(r"\\xzz"));
|
||||
assert_eq!(r"\\xzz", escape(b"\\xzz"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn invalid_utf8() {
|
||||
assert_eq!(r"\xFF", escape(b"\xFF"));
|
||||
assert_eq!(r"a\xFFb", escape(b"a\xFFb"));
|
||||
}
|
||||
}
|
||||
@@ -1,171 +0,0 @@
|
||||
use std::error;
|
||||
use std::fmt;
|
||||
use std::io;
|
||||
use std::num::ParseIntError;
|
||||
|
||||
use regex::Regex;
|
||||
|
||||
/// An error that occurs when parsing a human readable size description.
|
||||
///
|
||||
/// This error provides a end user friendly message describing why the
|
||||
/// description coudln't be parsed and what the expected format is.
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
pub struct ParseSizeError {
|
||||
original: String,
|
||||
kind: ParseSizeErrorKind,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
enum ParseSizeErrorKind {
|
||||
InvalidFormat,
|
||||
InvalidInt(ParseIntError),
|
||||
Overflow,
|
||||
}
|
||||
|
||||
impl ParseSizeError {
|
||||
fn format(original: &str) -> ParseSizeError {
|
||||
ParseSizeError {
|
||||
original: original.to_string(),
|
||||
kind: ParseSizeErrorKind::InvalidFormat,
|
||||
}
|
||||
}
|
||||
|
||||
fn int(original: &str, err: ParseIntError) -> ParseSizeError {
|
||||
ParseSizeError {
|
||||
original: original.to_string(),
|
||||
kind: ParseSizeErrorKind::InvalidInt(err),
|
||||
}
|
||||
}
|
||||
|
||||
fn overflow(original: &str) -> ParseSizeError {
|
||||
ParseSizeError {
|
||||
original: original.to_string(),
|
||||
kind: ParseSizeErrorKind::Overflow,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl error::Error for ParseSizeError {
|
||||
fn description(&self) -> &str { "invalid size" }
|
||||
}
|
||||
|
||||
impl fmt::Display for ParseSizeError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
use self::ParseSizeErrorKind::*;
|
||||
|
||||
match self.kind {
|
||||
InvalidFormat => {
|
||||
write!(
|
||||
f,
|
||||
"invalid format for size '{}', which should be a sequence \
|
||||
of digits followed by an optional 'K', 'M' or 'G' \
|
||||
suffix",
|
||||
self.original
|
||||
)
|
||||
}
|
||||
InvalidInt(ref err) => {
|
||||
write!(
|
||||
f,
|
||||
"invalid integer found in size '{}': {}",
|
||||
self.original,
|
||||
err
|
||||
)
|
||||
}
|
||||
Overflow => {
|
||||
write!(f, "size too big in '{}'", self.original)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ParseSizeError> for io::Error {
|
||||
fn from(size_err: ParseSizeError) -> io::Error {
|
||||
io::Error::new(io::ErrorKind::Other, size_err)
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse a human readable size like `2M` into a corresponding number of bytes.
|
||||
///
|
||||
/// Supported size suffixes are `K` (for kilobyte), `M` (for megabyte) and `G`
|
||||
/// (for gigabyte). If a size suffix is missing, then the size is interpreted
|
||||
/// as bytes. If the size is too big to fit into a `u64`, then this returns an
|
||||
/// error.
|
||||
///
|
||||
/// Additional suffixes may be added over time.
|
||||
pub fn parse_human_readable_size(size: &str) -> Result<u64, ParseSizeError> {
|
||||
lazy_static! {
|
||||
// Normally I'd just parse something this simple by hand to avoid the
|
||||
// regex dep, but we bring regex in any way for glob matching, so might
|
||||
// as well use it.
|
||||
static ref RE: Regex = Regex::new(r"^([0-9]+)([KMG])?$").unwrap();
|
||||
}
|
||||
|
||||
let caps = match RE.captures(size) {
|
||||
Some(caps) => caps,
|
||||
None => return Err(ParseSizeError::format(size)),
|
||||
};
|
||||
let value: u64 = caps[1].parse().map_err(|err| {
|
||||
ParseSizeError::int(size, err)
|
||||
})?;
|
||||
let suffix = match caps.get(2) {
|
||||
None => return Ok(value),
|
||||
Some(cap) => cap.as_str(),
|
||||
};
|
||||
let bytes = match suffix {
|
||||
"K" => value.checked_mul(1<<10),
|
||||
"M" => value.checked_mul(1<<20),
|
||||
"G" => value.checked_mul(1<<30),
|
||||
// Because if the regex matches this group, it must be [KMG].
|
||||
_ => unreachable!(),
|
||||
};
|
||||
bytes.ok_or_else(|| ParseSizeError::overflow(size))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn suffix_none() {
|
||||
let x = parse_human_readable_size("123").unwrap();
|
||||
assert_eq!(123, x);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn suffix_k() {
|
||||
let x = parse_human_readable_size("123K").unwrap();
|
||||
assert_eq!(123 * (1<<10), x);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn suffix_m() {
|
||||
let x = parse_human_readable_size("123M").unwrap();
|
||||
assert_eq!(123 * (1<<20), x);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn suffix_g() {
|
||||
let x = parse_human_readable_size("123G").unwrap();
|
||||
assert_eq!(123 * (1<<30), x);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn invalid_empty() {
|
||||
assert!(parse_human_readable_size("").is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn invalid_non_digit() {
|
||||
assert!(parse_human_readable_size("a").is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn invalid_overflow() {
|
||||
assert!(parse_human_readable_size("9999999999999999G").is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn invalid_suffix() {
|
||||
assert!(parse_human_readable_size("123T").is_err());
|
||||
}
|
||||
}
|
||||
@@ -1,252 +0,0 @@
|
||||
/*!
|
||||
This crate provides common routines used in command line applications, with a
|
||||
focus on routines useful for search oriented applications. As a utility
|
||||
library, there is no central type or function. However, a key focus of this
|
||||
crate is to improve failure modes and provide user friendly error messages
|
||||
when things go wrong.
|
||||
|
||||
To the best extent possible, everything in this crate works on Windows, macOS
|
||||
and Linux.
|
||||
|
||||
|
||||
# Standard I/O
|
||||
|
||||
The
|
||||
[`is_readable_stdin`](fn.is_readable_stdin.html),
|
||||
[`is_tty_stderr`](fn.is_tty_stderr.html),
|
||||
[`is_tty_stdin`](fn.is_tty_stdin.html)
|
||||
and
|
||||
[`is_tty_stdout`](fn.is_tty_stdout.html)
|
||||
routines query aspects of standard I/O. `is_readable_stdin` determines whether
|
||||
stdin can be usefully read from, while the `tty` methods determine whether a
|
||||
tty is attached to stdin/stdout/stderr.
|
||||
|
||||
`is_readable_stdin` is useful when writing an application that changes behavior
|
||||
based on whether the application was invoked with data on stdin. For example,
|
||||
`rg foo` might recursively search the current working directory for
|
||||
occurrences of `foo`, but `rg foo < file` might only search the contents of
|
||||
`file`.
|
||||
|
||||
The `tty` methods are useful for similar reasons. Namely, commands like `ls`
|
||||
will change their output depending on whether they are printing to a terminal
|
||||
or not. For example, `ls` shows a file on each line when stdout is redirected
|
||||
to a file or a pipe, but condenses the output to show possibly many files on
|
||||
each line when stdout is connected to a tty.
|
||||
|
||||
|
||||
# Coloring and buffering
|
||||
|
||||
The
|
||||
[`stdout`](fn.stdout.html),
|
||||
[`stdout_buffered_block`](fn.stdout_buffered_block.html)
|
||||
and
|
||||
[`stdout_buffered_line`](fn.stdout_buffered_line.html)
|
||||
routines are alternative constructors for
|
||||
[`StandardStream`](struct.StandardStream.html).
|
||||
A `StandardStream` implements `termcolor::WriteColor`, which provides a way
|
||||
to emit colors to terminals. Its key use is the encapsulation of buffering
|
||||
style. Namely, `stdout` will return a line buffered `StandardStream` if and
|
||||
only if stdout is connected to a tty, and will otherwise return a block
|
||||
buffered `StandardStream`. Line buffering is important for use with a tty
|
||||
because it typically decreases the latency at which the end user sees output.
|
||||
Block buffering is used otherwise because it is faster, and redirecting stdout
|
||||
to a file typically doesn't benefit from the decreased latency that line
|
||||
buffering provides.
|
||||
|
||||
The `stdout_buffered_block` and `stdout_buffered_line` can be used to
|
||||
explicitly set the buffering strategy regardless of whether stdout is connected
|
||||
to a tty or not.
|
||||
|
||||
|
||||
# Escaping
|
||||
|
||||
The
|
||||
[`escape`](fn.escape.html),
|
||||
[`escape_os`](fn.escape_os.html),
|
||||
[`unescape`](fn.unescape.html)
|
||||
and
|
||||
[`unescape_os`](fn.unescape_os.html)
|
||||
routines provide a user friendly way of dealing with UTF-8 encoded strings that
|
||||
can express arbitrary bytes. For example, you might want to accept a string
|
||||
containing arbitrary bytes as a command line argument, but most interactive
|
||||
shells make such strings difficult to type. Instead, we can ask users to use
|
||||
escape sequences.
|
||||
|
||||
For example, `a\xFFz` is itself a valid UTF-8 string corresponding to the
|
||||
following bytes:
|
||||
|
||||
```ignore
|
||||
[b'a', b'\\', b'x', b'F', b'F', b'z']
|
||||
```
|
||||
|
||||
However, we can
|
||||
interpret `\xFF` as an escape sequence with the `unescape`/`unescape_os`
|
||||
routines, which will yield
|
||||
|
||||
```ignore
|
||||
[b'a', b'\xFF', b'z']
|
||||
```
|
||||
|
||||
instead. For example:
|
||||
|
||||
```
|
||||
use grep_cli::unescape;
|
||||
|
||||
// Note the use of a raw string!
|
||||
assert_eq!(vec![b'a', b'\xFF', b'z'], unescape(r"a\xFFz"));
|
||||
```
|
||||
|
||||
The `escape`/`escape_os` routines provide the reverse transformation, which
|
||||
makes it easy to show user friendly error messages involving arbitrary bytes.
|
||||
|
||||
|
||||
# Building patterns
|
||||
|
||||
Typically, regular expression patterns must be valid UTF-8. However, command
|
||||
line arguments aren't guaranteed to be valid UTF-8. Unfortunately, the
|
||||
standard library's UTF-8 conversion functions from `OsStr`s do not provide
|
||||
good error messages. However, the
|
||||
[`pattern_from_bytes`](fn.pattern_from_bytes.html)
|
||||
and
|
||||
[`pattern_from_os`](fn.pattern_from_os.html)
|
||||
do, including reporting exactly where the first invalid UTF-8 byte is seen.
|
||||
|
||||
Additionally, it can be useful to read patterns from a file while reporting
|
||||
good error messages that include line numbers. The
|
||||
[`patterns_from_path`](fn.patterns_from_path.html),
|
||||
[`patterns_from_reader`](fn.patterns_from_reader.html)
|
||||
and
|
||||
[`patterns_from_stdin`](fn.patterns_from_stdin.html)
|
||||
routines do just that. If any pattern is found that is invalid UTF-8, then the
|
||||
error includes the file path (if available) along with the line number and the
|
||||
byte offset at which the first invalid UTF-8 byte was observed.
|
||||
|
||||
|
||||
# Read process output
|
||||
|
||||
Sometimes a command line application needs to execute other processes and read
|
||||
its stdout in a streaming fashion. The
|
||||
[`CommandReader`](struct.CommandReader.html)
|
||||
provides this functionality with an explicit goal of improving failure modes.
|
||||
In particular, if the process exits with an error code, then stderr is read
|
||||
and converted into a normal Rust error to show to end users. This makes the
|
||||
underlying failure modes explicit and gives more information to end users for
|
||||
debugging the problem.
|
||||
|
||||
As a special case,
|
||||
[`DecompressionReader`](struct.DecompressionReader.html)
|
||||
provides a way to decompress arbitrary files by matching their file extensions
|
||||
up with corresponding decompression programs (such as `gzip` and `xz`). This
|
||||
is useful as a means of performing simplistic decompression in a portable
|
||||
manner without binding to specific compression libraries. This does come with
|
||||
some overhead though, so if you need to decompress lots of small files, this
|
||||
may not be an appropriate convenience to use.
|
||||
|
||||
Each reader has a corresponding builder for additional configuration, such as
|
||||
whether to read stderr asynchronously in order to avoid deadlock (which is
|
||||
enabled by default).
|
||||
|
||||
|
||||
# Miscellaneous parsing
|
||||
|
||||
The
|
||||
[`parse_human_readable_size`](fn.parse_human_readable_size.html)
|
||||
routine parses strings like `2M` and converts them to the corresponding number
|
||||
of bytes (`2 * 1<<20` in this case). If an invalid size is found, then a good
|
||||
error message is crafted that typically tells the user how to fix the problem.
|
||||
*/
|
||||
|
||||
#![deny(missing_docs)]
|
||||
|
||||
extern crate atty;
|
||||
extern crate bstr;
|
||||
extern crate globset;
|
||||
#[macro_use]
|
||||
extern crate lazy_static;
|
||||
#[macro_use]
|
||||
extern crate log;
|
||||
extern crate regex;
|
||||
extern crate same_file;
|
||||
extern crate termcolor;
|
||||
#[cfg(windows)]
|
||||
extern crate winapi_util;
|
||||
|
||||
mod decompress;
|
||||
mod escape;
|
||||
mod human;
|
||||
mod pattern;
|
||||
mod process;
|
||||
mod wtr;
|
||||
|
||||
pub use decompress::{
|
||||
DecompressionMatcher, DecompressionMatcherBuilder,
|
||||
DecompressionReader, DecompressionReaderBuilder,
|
||||
};
|
||||
pub use escape::{escape, escape_os, unescape, unescape_os};
|
||||
pub use human::{ParseSizeError, parse_human_readable_size};
|
||||
pub use pattern::{
|
||||
InvalidPatternError,
|
||||
pattern_from_os, pattern_from_bytes,
|
||||
patterns_from_path, patterns_from_reader, patterns_from_stdin,
|
||||
};
|
||||
pub use process::{CommandError, CommandReader, CommandReaderBuilder};
|
||||
pub use wtr::{
|
||||
StandardStream,
|
||||
stdout, stdout_buffered_line, stdout_buffered_block,
|
||||
};
|
||||
|
||||
/// Returns true if and only if stdin is believed to be readable.
|
||||
///
|
||||
/// When stdin is readable, command line programs may choose to behave
|
||||
/// differently than when stdin is not readable. For example, `command foo`
|
||||
/// might search the current directory for occurrences of `foo` where as
|
||||
/// `command foo < some-file` or `cat some-file | command foo` might instead
|
||||
/// only search stdin for occurrences of `foo`.
|
||||
pub fn is_readable_stdin() -> bool {
|
||||
#[cfg(unix)]
|
||||
fn imp() -> bool {
|
||||
use std::os::unix::fs::FileTypeExt;
|
||||
use same_file::Handle;
|
||||
|
||||
let ft = match Handle::stdin().and_then(|h| h.as_file().metadata()) {
|
||||
Err(_) => return false,
|
||||
Ok(md) => md.file_type(),
|
||||
};
|
||||
ft.is_file() || ft.is_fifo()
|
||||
}
|
||||
|
||||
#[cfg(windows)]
|
||||
fn imp() -> bool {
|
||||
use winapi_util as winutil;
|
||||
|
||||
winutil::file::typ(winutil::HandleRef::stdin())
|
||||
.map(|t| t.is_disk() || t.is_pipe())
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
!is_tty_stdin() && imp()
|
||||
}
|
||||
|
||||
/// Returns true if and only if stdin is believed to be connectted to a tty
|
||||
/// or a console.
|
||||
pub fn is_tty_stdin() -> bool {
|
||||
atty::is(atty::Stream::Stdin)
|
||||
}
|
||||
|
||||
/// Returns true if and only if stdout is believed to be connectted to a tty
|
||||
/// or a console.
|
||||
///
|
||||
/// This is useful for when you want your command line program to produce
|
||||
/// different output depending on whether it's printing directly to a user's
|
||||
/// terminal or whether it's being redirected somewhere else. For example,
|
||||
/// implementations of `ls` will often show one item per line when stdout is
|
||||
/// redirected, but will condensed output when printing to a tty.
|
||||
pub fn is_tty_stdout() -> bool {
|
||||
atty::is(atty::Stream::Stdout)
|
||||
}
|
||||
|
||||
/// Returns true if and only if stderr is believed to be connectted to a tty
|
||||
/// or a console.
|
||||
pub fn is_tty_stderr() -> bool {
|
||||
atty::is(atty::Stream::Stderr)
|
||||
}
|
||||
@@ -1,205 +0,0 @@
|
||||
use std::error;
|
||||
use std::ffi::OsStr;
|
||||
use std::fmt;
|
||||
use std::fs::File;
|
||||
use std::io::{self, BufRead};
|
||||
use std::path::Path;
|
||||
use std::str;
|
||||
|
||||
use escape::{escape, escape_os};
|
||||
|
||||
/// An error that occurs when a pattern could not be converted to valid UTF-8.
|
||||
///
|
||||
/// The purpose of this error is to give a more targeted failure mode for
|
||||
/// patterns written by end users that are not valid UTF-8.
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
pub struct InvalidPatternError {
|
||||
original: String,
|
||||
valid_up_to: usize,
|
||||
}
|
||||
|
||||
impl InvalidPatternError {
|
||||
/// Returns the index in the given string up to which valid UTF-8 was
|
||||
/// verified.
|
||||
pub fn valid_up_to(&self) -> usize {
|
||||
self.valid_up_to
|
||||
}
|
||||
}
|
||||
|
||||
impl error::Error for InvalidPatternError {
|
||||
fn description(&self) -> &str { "invalid pattern" }
|
||||
}
|
||||
|
||||
impl fmt::Display for InvalidPatternError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"found invalid UTF-8 in pattern at byte offset {} \
|
||||
(use hex escape sequences to match arbitrary bytes \
|
||||
in a pattern, e.g., \\xFF): '{}'",
|
||||
self.valid_up_to,
|
||||
self.original,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<InvalidPatternError> for io::Error {
|
||||
fn from(paterr: InvalidPatternError) -> io::Error {
|
||||
io::Error::new(io::ErrorKind::Other, paterr)
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert an OS string into a regular expression pattern.
|
||||
///
|
||||
/// This conversion fails if the given pattern is not valid UTF-8, in which
|
||||
/// case, a targeted error with more information about where the invalid UTF-8
|
||||
/// occurs is given. The error also suggests the use of hex escape sequences,
|
||||
/// which are supported by many regex engines.
|
||||
pub fn pattern_from_os(pattern: &OsStr) -> Result<&str, InvalidPatternError> {
|
||||
pattern.to_str().ok_or_else(|| {
|
||||
let valid_up_to = pattern
|
||||
.to_string_lossy()
|
||||
.find('\u{FFFD}')
|
||||
.expect("a Unicode replacement codepoint for invalid UTF-8");
|
||||
InvalidPatternError {
|
||||
original: escape_os(pattern),
|
||||
valid_up_to: valid_up_to,
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Convert arbitrary bytes into a regular expression pattern.
|
||||
///
|
||||
/// This conversion fails if the given pattern is not valid UTF-8, in which
|
||||
/// case, a targeted error with more information about where the invalid UTF-8
|
||||
/// occurs is given. The error also suggests the use of hex escape sequences,
|
||||
/// which are supported by many regex engines.
|
||||
pub fn pattern_from_bytes(
|
||||
pattern: &[u8],
|
||||
) -> Result<&str, InvalidPatternError> {
|
||||
str::from_utf8(pattern).map_err(|err| {
|
||||
InvalidPatternError {
|
||||
original: escape(pattern),
|
||||
valid_up_to: err.valid_up_to(),
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Read patterns from a file path, one per line.
|
||||
///
|
||||
/// If there was a problem reading or if any of the patterns contain invalid
|
||||
/// UTF-8, then an error is returned. If there was a problem with a specific
|
||||
/// pattern, then the error message will include the line number and the file
|
||||
/// path.
|
||||
pub fn patterns_from_path<P: AsRef<Path>>(path: P) -> io::Result<Vec<String>> {
|
||||
let path = path.as_ref();
|
||||
let file = File::open(path).map_err(|err| {
|
||||
io::Error::new(
|
||||
io::ErrorKind::Other,
|
||||
format!("{}: {}", path.display(), err),
|
||||
)
|
||||
})?;
|
||||
patterns_from_reader(file).map_err(|err| {
|
||||
io::Error::new(
|
||||
io::ErrorKind::Other,
|
||||
format!("{}:{}", path.display(), err),
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
/// Read patterns from stdin, one per line.
|
||||
///
|
||||
/// If there was a problem reading or if any of the patterns contain invalid
|
||||
/// UTF-8, then an error is returned. If there was a problem with a specific
|
||||
/// pattern, then the error message will include the line number and the fact
|
||||
/// that it came from stdin.
|
||||
pub fn patterns_from_stdin() -> io::Result<Vec<String>> {
|
||||
let stdin = io::stdin();
|
||||
let locked = stdin.lock();
|
||||
patterns_from_reader(locked).map_err(|err| {
|
||||
io::Error::new(
|
||||
io::ErrorKind::Other,
|
||||
format!("<stdin>:{}", err),
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
/// Read patterns from any reader, one per line.
|
||||
///
|
||||
/// If there was a problem reading or if any of the patterns contain invalid
|
||||
/// UTF-8, then an error is returned. If there was a problem with a specific
|
||||
/// pattern, then the error message will include the line number.
|
||||
///
|
||||
/// Note that this routine uses its own internal buffer, so the caller should
|
||||
/// not provide their own buffered reader if possible.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// This shows how to parse patterns, one per line.
|
||||
///
|
||||
/// ```
|
||||
/// use grep_cli::patterns_from_reader;
|
||||
///
|
||||
/// # fn example() -> Result<(), Box<::std::error::Error>> {
|
||||
/// let patterns = "\
|
||||
/// foo
|
||||
/// bar\\s+foo
|
||||
/// [a-z]{3}
|
||||
/// ";
|
||||
///
|
||||
/// assert_eq!(patterns_from_reader(patterns.as_bytes())?, vec![
|
||||
/// r"foo",
|
||||
/// r"bar\s+foo",
|
||||
/// r"[a-z]{3}",
|
||||
/// ]);
|
||||
/// # Ok(()) }
|
||||
/// ```
|
||||
pub fn patterns_from_reader<R: io::Read>(rdr: R) -> io::Result<Vec<String>> {
|
||||
let mut patterns = vec![];
|
||||
let mut bufrdr = io::BufReader::new(rdr);
|
||||
let mut line = vec![];
|
||||
let mut line_number = 0;
|
||||
while {
|
||||
line.clear();
|
||||
line_number += 1;
|
||||
bufrdr.read_until(b'\n', &mut line)? > 0
|
||||
} {
|
||||
line.pop().unwrap(); // remove trailing '\n'
|
||||
if line.last() == Some(&b'\r') {
|
||||
line.pop().unwrap();
|
||||
}
|
||||
match pattern_from_bytes(&line) {
|
||||
Ok(pattern) => patterns.push(pattern.to_string()),
|
||||
Err(err) => {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::Other,
|
||||
format!("{}: {}", line_number, err),
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(patterns)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn bytes() {
|
||||
let pat = b"abc\xFFxyz";
|
||||
let err = pattern_from_bytes(pat).unwrap_err();
|
||||
assert_eq!(3, err.valid_up_to());
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(unix)]
|
||||
fn os() {
|
||||
use std::os::unix::ffi::OsStrExt;
|
||||
use std::ffi::OsStr;
|
||||
|
||||
let pat = OsStr::from_bytes(b"abc\xFFxyz");
|
||||
let err = pattern_from_os(pat).unwrap_err();
|
||||
assert_eq!(3, err.valid_up_to());
|
||||
}
|
||||
}
|
||||
@@ -1,267 +0,0 @@
|
||||
use std::error;
|
||||
use std::fmt;
|
||||
use std::io::{self, Read};
|
||||
use std::iter;
|
||||
use std::process;
|
||||
use std::thread::{self, JoinHandle};
|
||||
|
||||
/// An error that can occur while running a command and reading its output.
|
||||
///
|
||||
/// This error can be seamlessly converted to an `io::Error` via a `From`
|
||||
/// implementation.
|
||||
#[derive(Debug)]
|
||||
pub struct CommandError {
|
||||
kind: CommandErrorKind,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
enum CommandErrorKind {
|
||||
Io(io::Error),
|
||||
Stderr(Vec<u8>),
|
||||
}
|
||||
|
||||
impl CommandError {
|
||||
/// Create an error from an I/O error.
|
||||
pub(crate) fn io(ioerr: io::Error) -> CommandError {
|
||||
CommandError { kind: CommandErrorKind::Io(ioerr) }
|
||||
}
|
||||
|
||||
/// Create an error from the contents of stderr (which may be empty).
|
||||
pub(crate) fn stderr(bytes: Vec<u8>) -> CommandError {
|
||||
CommandError { kind: CommandErrorKind::Stderr(bytes) }
|
||||
}
|
||||
}
|
||||
|
||||
impl error::Error for CommandError {
|
||||
fn description(&self) -> &str { "command error" }
|
||||
}
|
||||
|
||||
impl fmt::Display for CommandError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self.kind {
|
||||
CommandErrorKind::Io(ref e) => e.fmt(f),
|
||||
CommandErrorKind::Stderr(ref bytes) => {
|
||||
let msg = String::from_utf8_lossy(bytes);
|
||||
if msg.trim().is_empty() {
|
||||
write!(f, "<stderr is empty>")
|
||||
} else {
|
||||
let div = iter::repeat('-').take(79).collect::<String>();
|
||||
write!(f, "\n{div}\n{msg}\n{div}", div=div, msg=msg.trim())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<io::Error> for CommandError {
|
||||
fn from(ioerr: io::Error) -> CommandError {
|
||||
CommandError { kind: CommandErrorKind::Io(ioerr) }
|
||||
}
|
||||
}
|
||||
|
||||
impl From<CommandError> for io::Error {
|
||||
fn from(cmderr: CommandError) -> io::Error {
|
||||
match cmderr.kind {
|
||||
CommandErrorKind::Io(ioerr) => ioerr,
|
||||
CommandErrorKind::Stderr(_) => {
|
||||
io::Error::new(io::ErrorKind::Other, cmderr)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Configures and builds a streaming reader for process output.
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct CommandReaderBuilder {
|
||||
async_stderr: bool,
|
||||
}
|
||||
|
||||
impl CommandReaderBuilder {
|
||||
/// Create a new builder with the default configuration.
|
||||
pub fn new() -> CommandReaderBuilder {
|
||||
CommandReaderBuilder::default()
|
||||
}
|
||||
|
||||
/// Build a new streaming reader for the given command's output.
|
||||
///
|
||||
/// The caller should set everything that's required on the given command
|
||||
/// before building a reader, such as its arguments, environment and
|
||||
/// current working directory. Settings such as the stdout and stderr (but
|
||||
/// not stdin) pipes will be overridden so that they can be controlled by
|
||||
/// the reader.
|
||||
///
|
||||
/// If there was a problem spawning the given command, then its error is
|
||||
/// returned.
|
||||
pub fn build(
|
||||
&self,
|
||||
command: &mut process::Command,
|
||||
) -> Result<CommandReader, CommandError> {
|
||||
let mut child = command
|
||||
.stdout(process::Stdio::piped())
|
||||
.stderr(process::Stdio::piped())
|
||||
.spawn()?;
|
||||
let stdout = child.stdout.take().unwrap();
|
||||
let stderr =
|
||||
if self.async_stderr {
|
||||
StderrReader::async(child.stderr.take().unwrap())
|
||||
} else {
|
||||
StderrReader::sync(child.stderr.take().unwrap())
|
||||
};
|
||||
Ok(CommandReader {
|
||||
child: child,
|
||||
stdout: stdout,
|
||||
stderr: stderr,
|
||||
done: false,
|
||||
})
|
||||
}
|
||||
|
||||
/// When enabled, the reader will asynchronously read the contents of the
|
||||
/// command's stderr output. When disabled, stderr is only read after the
|
||||
/// stdout stream has been exhausted (or if the process quits with an error
|
||||
/// code).
|
||||
///
|
||||
/// Note that when enabled, this may require launching an additional
|
||||
/// thread in order to read stderr. This is done so that the process being
|
||||
/// executed is never blocked from writing to stdout or stderr. If this is
|
||||
/// disabled, then it is possible for the process to fill up the stderr
|
||||
/// buffer and deadlock.
|
||||
///
|
||||
/// This is enabled by default.
|
||||
pub fn async_stderr(&mut self, yes: bool) -> &mut CommandReaderBuilder {
|
||||
self.async_stderr = yes;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
/// A streaming reader for a command's output.
|
||||
///
|
||||
/// The purpose of this reader is to provide an easy way to execute processes
|
||||
/// whose stdout is read in a streaming way while also making the processes'
|
||||
/// stderr available when the process fails with an exit code. This makes it
|
||||
/// possible to execute processes while surfacing the underlying failure mode
|
||||
/// in the case of an error.
|
||||
///
|
||||
/// Moreover, by default, this reader will asynchronously read the processes'
|
||||
/// stderr. This prevents subtle deadlocking bugs for noisy processes that
|
||||
/// write a lot to stderr. Currently, the entire contents of stderr is read
|
||||
/// on to the heap.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// This example shows how to invoke `gzip` to decompress the contents of a
|
||||
/// file. If the `gzip` command reports a failing exit status, then its stderr
|
||||
/// is returned as an error.
|
||||
///
|
||||
/// ```no_run
|
||||
/// use std::io::Read;
|
||||
/// use std::process::Command;
|
||||
/// use grep_cli::CommandReader;
|
||||
///
|
||||
/// # fn example() -> Result<(), Box<::std::error::Error>> {
|
||||
/// let mut cmd = Command::new("gzip");
|
||||
/// cmd.arg("-d").arg("-c").arg("/usr/share/man/man1/ls.1.gz");
|
||||
///
|
||||
/// let mut rdr = CommandReader::new(&mut cmd)?;
|
||||
/// let mut contents = vec![];
|
||||
/// rdr.read_to_end(&mut contents)?;
|
||||
/// # Ok(()) }
|
||||
/// ```
|
||||
#[derive(Debug)]
|
||||
pub struct CommandReader {
|
||||
child: process::Child,
|
||||
stdout: process::ChildStdout,
|
||||
stderr: StderrReader,
|
||||
done: bool,
|
||||
}
|
||||
|
||||
impl CommandReader {
|
||||
/// Create a new streaming reader for the given command using the default
|
||||
/// configuration.
|
||||
///
|
||||
/// The caller should set everything that's required on the given command
|
||||
/// before building a reader, such as its arguments, environment and
|
||||
/// current working directory. Settings such as the stdout and stderr (but
|
||||
/// not stdin) pipes will be overridden so that they can be controlled by
|
||||
/// the reader.
|
||||
///
|
||||
/// If there was a problem spawning the given command, then its error is
|
||||
/// returned.
|
||||
///
|
||||
/// If the caller requires additional configuration for the reader
|
||||
/// returned, then use
|
||||
/// [`CommandReaderBuilder`](struct.CommandReaderBuilder.html).
|
||||
pub fn new(
|
||||
cmd: &mut process::Command,
|
||||
) -> Result<CommandReader, CommandError> {
|
||||
CommandReaderBuilder::new().build(cmd)
|
||||
}
|
||||
}
|
||||
|
||||
impl io::Read for CommandReader {
|
||||
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
|
||||
if self.done {
|
||||
return Ok(0);
|
||||
}
|
||||
let nread = self.stdout.read(buf)?;
|
||||
if nread == 0 {
|
||||
self.done = true;
|
||||
// Reap the child now that we're done reading. If the command
|
||||
// failed, report stderr as an error.
|
||||
if !self.child.wait()?.success() {
|
||||
return Err(io::Error::from(self.stderr.read_to_end()));
|
||||
}
|
||||
}
|
||||
Ok(nread)
|
||||
}
|
||||
}
|
||||
|
||||
/// A reader that encapsulates the asynchronous or synchronous reading of
|
||||
/// stderr.
|
||||
#[derive(Debug)]
|
||||
enum StderrReader {
|
||||
Async(Option<JoinHandle<CommandError>>),
|
||||
Sync(process::ChildStderr),
|
||||
}
|
||||
|
||||
impl StderrReader {
|
||||
/// Create a reader for stderr that reads contents asynchronously.
|
||||
fn async(mut stderr: process::ChildStderr) -> StderrReader {
|
||||
let handle = thread::spawn(move || {
|
||||
stderr_to_command_error(&mut stderr)
|
||||
});
|
||||
StderrReader::Async(Some(handle))
|
||||
}
|
||||
|
||||
/// Create a reader for stderr that reads contents synchronously.
|
||||
fn sync(stderr: process::ChildStderr) -> StderrReader {
|
||||
StderrReader::Sync(stderr)
|
||||
}
|
||||
|
||||
/// Consumes all of stderr on to the heap and returns it as an error.
|
||||
///
|
||||
/// If there was a problem reading stderr itself, then this returns an I/O
|
||||
/// command error.
|
||||
fn read_to_end(&mut self) -> CommandError {
|
||||
match *self {
|
||||
StderrReader::Async(ref mut handle) => {
|
||||
let handle = handle
|
||||
.take()
|
||||
.expect("read_to_end cannot be called more than once");
|
||||
handle
|
||||
.join()
|
||||
.expect("stderr reading thread does not panic")
|
||||
}
|
||||
StderrReader::Sync(ref mut stderr) => {
|
||||
stderr_to_command_error(stderr)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn stderr_to_command_error(stderr: &mut process::ChildStderr) -> CommandError {
|
||||
let mut bytes = vec![];
|
||||
match stderr.read_to_end(&mut bytes) {
|
||||
Ok(_) => CommandError::stderr(bytes),
|
||||
Err(err) => CommandError::io(err),
|
||||
}
|
||||
}
|
||||
@@ -1,133 +0,0 @@
|
||||
use std::io;
|
||||
|
||||
use termcolor;
|
||||
|
||||
use is_tty_stdout;
|
||||
|
||||
/// A writer that supports coloring with either line or block buffering.
|
||||
pub struct StandardStream(StandardStreamKind);
|
||||
|
||||
/// Returns a possibly buffered writer to stdout for the given color choice.
|
||||
///
|
||||
/// The writer returned is either line buffered or block buffered. The decision
|
||||
/// between these two is made automatically based on whether a tty is attached
|
||||
/// to stdout or not. If a tty is attached, then line buffering is used.
|
||||
/// Otherwise, block buffering is used. In general, block buffering is more
|
||||
/// efficient, but may increase the time it takes for the end user to see the
|
||||
/// first bits of output.
|
||||
///
|
||||
/// If you need more fine grained control over the buffering mode, then use one
|
||||
/// of `stdout_buffered_line` or `stdout_buffered_block`.
|
||||
///
|
||||
/// The color choice given is passed along to the underlying writer. To
|
||||
/// completely disable colors in all cases, use `ColorChoice::Never`.
|
||||
pub fn stdout(color_choice: termcolor::ColorChoice) -> StandardStream {
|
||||
if is_tty_stdout() {
|
||||
stdout_buffered_line(color_choice)
|
||||
} else {
|
||||
stdout_buffered_block(color_choice)
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns a line buffered writer to stdout for the given color choice.
|
||||
///
|
||||
/// This writer is useful when printing results directly to a tty such that
|
||||
/// users see output as soon as it's written. The downside of this approach
|
||||
/// is that it can be slower, especially when there is a lot of output.
|
||||
///
|
||||
/// You might consider using
|
||||
/// [`stdout`](fn.stdout.html)
|
||||
/// instead, which chooses the buffering strategy automatically based on
|
||||
/// whether stdout is connected to a tty.
|
||||
pub fn stdout_buffered_line(
|
||||
color_choice: termcolor::ColorChoice,
|
||||
) -> StandardStream {
|
||||
let out = termcolor::StandardStream::stdout(color_choice);
|
||||
StandardStream(StandardStreamKind::LineBuffered(out))
|
||||
}
|
||||
|
||||
/// Returns a block buffered writer to stdout for the given color choice.
|
||||
///
|
||||
/// This writer is useful when printing results to a file since it amortizes
|
||||
/// the cost of writing data. The downside of this approach is that it can
|
||||
/// increase the latency of display output when writing to a tty.
|
||||
///
|
||||
/// You might consider using
|
||||
/// [`stdout`](fn.stdout.html)
|
||||
/// instead, which chooses the buffering strategy automatically based on
|
||||
/// whether stdout is connected to a tty.
|
||||
pub fn stdout_buffered_block(
|
||||
color_choice: termcolor::ColorChoice,
|
||||
) -> StandardStream {
|
||||
let out = termcolor::BufferedStandardStream::stdout(color_choice);
|
||||
StandardStream(StandardStreamKind::BlockBuffered(out))
|
||||
}
|
||||
|
||||
enum StandardStreamKind {
|
||||
LineBuffered(termcolor::StandardStream),
|
||||
BlockBuffered(termcolor::BufferedStandardStream),
|
||||
}
|
||||
|
||||
impl io::Write for StandardStream {
|
||||
#[inline]
|
||||
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
|
||||
use self::StandardStreamKind::*;
|
||||
|
||||
match self.0 {
|
||||
LineBuffered(ref mut w) => w.write(buf),
|
||||
BlockBuffered(ref mut w) => w.write(buf),
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn flush(&mut self) -> io::Result<()> {
|
||||
use self::StandardStreamKind::*;
|
||||
|
||||
match self.0 {
|
||||
LineBuffered(ref mut w) => w.flush(),
|
||||
BlockBuffered(ref mut w) => w.flush(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl termcolor::WriteColor for StandardStream {
|
||||
#[inline]
|
||||
fn supports_color(&self) -> bool {
|
||||
use self::StandardStreamKind::*;
|
||||
|
||||
match self.0 {
|
||||
LineBuffered(ref w) => w.supports_color(),
|
||||
BlockBuffered(ref w) => w.supports_color(),
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn set_color(&mut self, spec: &termcolor::ColorSpec) -> io::Result<()> {
|
||||
use self::StandardStreamKind::*;
|
||||
|
||||
match self.0 {
|
||||
LineBuffered(ref mut w) => w.set_color(spec),
|
||||
BlockBuffered(ref mut w) => w.set_color(spec),
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn reset(&mut self) -> io::Result<()> {
|
||||
use self::StandardStreamKind::*;
|
||||
|
||||
match self.0 {
|
||||
LineBuffered(ref mut w) => w.reset(),
|
||||
BlockBuffered(ref mut w) => w.reset(),
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn is_synchronous(&self) -> bool {
|
||||
use self::StandardStreamKind::*;
|
||||
|
||||
match self.0 {
|
||||
LineBuffered(ref w) => w.is_synchronous(),
|
||||
BlockBuffered(ref w) => w.is_synchronous(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,24 +0,0 @@
|
||||
[package]
|
||||
name = "grep-matcher"
|
||||
version = "0.1.2" #:version
|
||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||
description = """
|
||||
A trait for regular expressions, with a focus on line oriented search.
|
||||
"""
|
||||
documentation = "https://docs.rs/grep-matcher"
|
||||
homepage = "https://github.com/BurntSushi/ripgrep"
|
||||
repository = "https://github.com/BurntSushi/ripgrep"
|
||||
readme = "README.md"
|
||||
keywords = ["regex", "pattern", "trait"]
|
||||
license = "Unlicense/MIT"
|
||||
autotests = false
|
||||
|
||||
[dependencies]
|
||||
memchr = "2.1"
|
||||
|
||||
[dev-dependencies]
|
||||
regex = "1.1"
|
||||
|
||||
[[test]]
|
||||
name = "integration"
|
||||
path = "tests/tests.rs"
|
||||
@@ -1,36 +0,0 @@
|
||||
grep-matcher
|
||||
------------
|
||||
This crate provides a low level interface for describing regular expression
|
||||
matchers. The `grep` crate uses this interface in order to make the regex
|
||||
engine it uses pluggable.
|
||||
|
||||
[](https://travis-ci.org/BurntSushi/ripgrep)
|
||||
[](https://ci.appveyor.com/project/BurntSushi/ripgrep)
|
||||
[](https://crates.io/crates/grep-matcher)
|
||||
|
||||
Dual-licensed under MIT or the [UNLICENSE](http://unlicense.org).
|
||||
|
||||
### Documentation
|
||||
|
||||
[https://docs.rs/grep-matcher](https://docs.rs/grep-matcher)
|
||||
|
||||
**NOTE:** You probably don't want to use this crate directly. Instead, you
|
||||
should prefer the facade defined in the
|
||||
[`grep`](https://docs.rs/grep)
|
||||
crate.
|
||||
|
||||
|
||||
### Usage
|
||||
|
||||
Add this to your `Cargo.toml`:
|
||||
|
||||
```toml
|
||||
[dependencies]
|
||||
grep-matcher = "0.1"
|
||||
```
|
||||
|
||||
and this to your crate root:
|
||||
|
||||
```rust
|
||||
extern crate grep_matcher;
|
||||
```
|
||||
@@ -1,328 +0,0 @@
|
||||
use std::str;
|
||||
|
||||
use memchr::memchr;
|
||||
|
||||
/// Interpolate capture references in `replacement` and write the interpolation
|
||||
/// result to `dst`. References in `replacement` take the form of $N or $name,
|
||||
/// where `N` is a capture group index and `name` is a capture group name. The
|
||||
/// function provided, `name_to_index`, maps capture group names to indices.
|
||||
///
|
||||
/// The `append` function given is responsible for writing the replacement
|
||||
/// to the `dst` buffer. That is, it is called with the capture group index
|
||||
/// of a capture group reference and is expected to resolve the index to its
|
||||
/// corresponding matched text. If no such match exists, then `append` should
|
||||
/// not write anything to its given buffer.
|
||||
pub fn interpolate<A, N>(
|
||||
mut replacement: &[u8],
|
||||
mut append: A,
|
||||
mut name_to_index: N,
|
||||
dst: &mut Vec<u8>,
|
||||
) where
|
||||
A: FnMut(usize, &mut Vec<u8>),
|
||||
N: FnMut(&str) -> Option<usize>
|
||||
{
|
||||
while !replacement.is_empty() {
|
||||
match memchr(b'$', replacement) {
|
||||
None => break,
|
||||
Some(i) => {
|
||||
dst.extend(&replacement[..i]);
|
||||
replacement = &replacement[i..];
|
||||
}
|
||||
}
|
||||
if replacement.get(1).map_or(false, |&b| b == b'$') {
|
||||
dst.push(b'$');
|
||||
replacement = &replacement[2..];
|
||||
continue;
|
||||
}
|
||||
debug_assert!(!replacement.is_empty());
|
||||
let cap_ref = match find_cap_ref(replacement) {
|
||||
Some(cap_ref) => cap_ref,
|
||||
None => {
|
||||
dst.push(b'$');
|
||||
replacement = &replacement[1..];
|
||||
continue;
|
||||
}
|
||||
};
|
||||
replacement = &replacement[cap_ref.end..];
|
||||
match cap_ref.cap {
|
||||
Ref::Number(i) => append(i, dst),
|
||||
Ref::Named(name) => {
|
||||
if let Some(i) = name_to_index(name) {
|
||||
append(i, dst);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
dst.extend(replacement);
|
||||
}
|
||||
|
||||
/// `CaptureRef` represents a reference to a capture group inside some text.
|
||||
/// The reference is either a capture group name or a number.
|
||||
///
|
||||
/// It is also tagged with the position in the text immediately proceding the
|
||||
/// capture reference.
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
||||
struct CaptureRef<'a> {
|
||||
cap: Ref<'a>,
|
||||
end: usize,
|
||||
}
|
||||
|
||||
/// A reference to a capture group in some text.
|
||||
///
|
||||
/// e.g., `$2`, `$foo`, `${foo}`.
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
||||
enum Ref<'a> {
|
||||
Named(&'a str),
|
||||
Number(usize),
|
||||
}
|
||||
|
||||
impl<'a> From<&'a str> for Ref<'a> {
|
||||
fn from(x: &'a str) -> Ref<'a> {
|
||||
Ref::Named(x)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<usize> for Ref<'static> {
|
||||
fn from(x: usize) -> Ref<'static> {
|
||||
Ref::Number(x)
|
||||
}
|
||||
}
|
||||
|
||||
/// Parses a possible reference to a capture group name in the given text,
|
||||
/// starting at the beginning of `replacement`.
|
||||
///
|
||||
/// If no such valid reference could be found, None is returned.
|
||||
fn find_cap_ref(replacement: &[u8]) -> Option<CaptureRef> {
|
||||
let mut i = 0;
|
||||
if replacement.len() <= 1 || replacement[0] != b'$' {
|
||||
return None;
|
||||
}
|
||||
let mut brace = false;
|
||||
i += 1;
|
||||
if replacement[i] == b'{' {
|
||||
brace = true;
|
||||
i += 1;
|
||||
}
|
||||
let mut cap_end = i;
|
||||
while replacement.get(cap_end).map_or(false, is_valid_cap_letter) {
|
||||
cap_end += 1;
|
||||
}
|
||||
if cap_end == i {
|
||||
return None;
|
||||
}
|
||||
// We just verified that the range 0..cap_end is valid ASCII, so it must
|
||||
// therefore be valid UTF-8. If we really cared, we could avoid this UTF-8
|
||||
// check with an unchecked conversion or by parsing the number straight
|
||||
// from &[u8].
|
||||
let cap = str::from_utf8(&replacement[i..cap_end])
|
||||
.expect("valid UTF-8 capture name");
|
||||
if brace {
|
||||
if !replacement.get(cap_end).map_or(false, |&b| b == b'}') {
|
||||
return None;
|
||||
}
|
||||
cap_end += 1;
|
||||
}
|
||||
Some(CaptureRef {
|
||||
cap: match cap.parse::<u32>() {
|
||||
Ok(i) => Ref::Number(i as usize),
|
||||
Err(_) => Ref::Named(cap),
|
||||
},
|
||||
end: cap_end,
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns true if and only if the given byte is allowed in a capture name.
|
||||
fn is_valid_cap_letter(b: &u8) -> bool {
|
||||
match *b {
|
||||
b'0' ... b'9' | b'a' ... b'z' | b'A' ... b'Z' | b'_' => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{CaptureRef, find_cap_ref, interpolate};
|
||||
|
||||
macro_rules! find {
|
||||
($name:ident, $text:expr) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
assert_eq!(None, find_cap_ref($text.as_bytes()));
|
||||
}
|
||||
};
|
||||
($name:ident, $text:expr, $capref:expr) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
assert_eq!(Some($capref), find_cap_ref($text.as_bytes()));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! c {
|
||||
($name_or_number:expr, $pos:expr) => {
|
||||
CaptureRef { cap: $name_or_number.into(), end: $pos }
|
||||
};
|
||||
}
|
||||
|
||||
find!(find_cap_ref1, "$foo", c!("foo", 4));
|
||||
find!(find_cap_ref2, "${foo}", c!("foo", 6));
|
||||
find!(find_cap_ref3, "$0", c!(0, 2));
|
||||
find!(find_cap_ref4, "$5", c!(5, 2));
|
||||
find!(find_cap_ref5, "$10", c!(10, 3));
|
||||
find!(find_cap_ref6, "$42a", c!("42a", 4));
|
||||
find!(find_cap_ref7, "${42}a", c!(42, 5));
|
||||
find!(find_cap_ref8, "${42");
|
||||
find!(find_cap_ref9, "${42 ");
|
||||
find!(find_cap_ref10, " $0 ");
|
||||
find!(find_cap_ref11, "$");
|
||||
find!(find_cap_ref12, " ");
|
||||
find!(find_cap_ref13, "");
|
||||
|
||||
// A convenience routine for using interpolate's unwieldy but flexible API.
|
||||
fn interpolate_string(
|
||||
mut name_to_index: Vec<(&'static str, usize)>,
|
||||
caps: Vec<&'static str>,
|
||||
replacement: &str,
|
||||
) -> String {
|
||||
name_to_index.sort_by_key(|x| x.0);
|
||||
|
||||
let mut dst = vec![];
|
||||
interpolate(
|
||||
replacement.as_bytes(),
|
||||
|i, dst| {
|
||||
if let Some(&s) = caps.get(i) {
|
||||
dst.extend(s.as_bytes());
|
||||
}
|
||||
},
|
||||
|name| -> Option<usize> {
|
||||
name_to_index
|
||||
.binary_search_by_key(&name, |x| x.0)
|
||||
.ok()
|
||||
.map(|i| name_to_index[i].1)
|
||||
},
|
||||
&mut dst,
|
||||
);
|
||||
String::from_utf8(dst).unwrap()
|
||||
}
|
||||
|
||||
macro_rules! interp {
|
||||
($name:ident, $map:expr, $caps:expr, $hay:expr, $expected:expr $(,)*) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
assert_eq!($expected, interpolate_string($map, $caps, $hay));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
interp!(
|
||||
interp1,
|
||||
vec![("foo", 2)],
|
||||
vec!["", "", "xxx"],
|
||||
"test $foo test",
|
||||
"test xxx test",
|
||||
);
|
||||
|
||||
interp!(
|
||||
interp2,
|
||||
vec![("foo", 2)],
|
||||
vec!["", "", "xxx"],
|
||||
"test$footest",
|
||||
"test",
|
||||
);
|
||||
|
||||
interp!(
|
||||
interp3,
|
||||
vec![("foo", 2)],
|
||||
vec!["", "", "xxx"],
|
||||
"test${foo}test",
|
||||
"testxxxtest",
|
||||
);
|
||||
|
||||
interp!(
|
||||
interp4,
|
||||
vec![("foo", 2)],
|
||||
vec!["", "", "xxx"],
|
||||
"test$2test",
|
||||
"test",
|
||||
);
|
||||
|
||||
interp!(
|
||||
interp5,
|
||||
vec![("foo", 2)],
|
||||
vec!["", "", "xxx"],
|
||||
"test${2}test",
|
||||
"testxxxtest",
|
||||
);
|
||||
|
||||
interp!(
|
||||
interp6,
|
||||
vec![("foo", 2)],
|
||||
vec!["", "", "xxx"],
|
||||
"test $$foo test",
|
||||
"test $foo test",
|
||||
);
|
||||
|
||||
interp!(
|
||||
interp7,
|
||||
vec![("foo", 2)],
|
||||
vec!["", "", "xxx"],
|
||||
"test $foo",
|
||||
"test xxx",
|
||||
);
|
||||
|
||||
interp!(
|
||||
interp8,
|
||||
vec![("foo", 2)],
|
||||
vec!["", "", "xxx"],
|
||||
"$foo test",
|
||||
"xxx test",
|
||||
);
|
||||
|
||||
interp!(
|
||||
interp9,
|
||||
vec![("bar", 1), ("foo", 2)],
|
||||
vec!["", "yyy", "xxx"],
|
||||
"test $bar$foo",
|
||||
"test yyyxxx",
|
||||
);
|
||||
|
||||
interp!(
|
||||
interp10,
|
||||
vec![("bar", 1), ("foo", 2)],
|
||||
vec!["", "yyy", "xxx"],
|
||||
"test $ test",
|
||||
"test $ test",
|
||||
);
|
||||
|
||||
interp!(
|
||||
interp11,
|
||||
vec![("bar", 1), ("foo", 2)],
|
||||
vec!["", "yyy", "xxx"],
|
||||
"test ${} test",
|
||||
"test ${} test",
|
||||
);
|
||||
|
||||
interp!(
|
||||
interp12,
|
||||
vec![("bar", 1), ("foo", 2)],
|
||||
vec!["", "yyy", "xxx"],
|
||||
"test ${ } test",
|
||||
"test ${ } test",
|
||||
);
|
||||
|
||||
interp!(
|
||||
interp13,
|
||||
vec![("bar", 1), ("foo", 2)],
|
||||
vec!["", "yyy", "xxx"],
|
||||
"test ${a b} test",
|
||||
"test ${a b} test",
|
||||
);
|
||||
|
||||
interp!(
|
||||
interp14,
|
||||
vec![("bar", 1), ("foo", 2)],
|
||||
vec!["", "yyy", "xxx"],
|
||||
"test ${a} test",
|
||||
"test test",
|
||||
);
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,208 +0,0 @@
|
||||
use grep_matcher::{Captures, Match, Matcher};
|
||||
use regex::bytes::Regex;
|
||||
|
||||
use util::{RegexMatcher, RegexMatcherNoCaps};
|
||||
|
||||
fn matcher(pattern: &str) -> RegexMatcher {
|
||||
RegexMatcher::new(Regex::new(pattern).unwrap())
|
||||
}
|
||||
|
||||
fn matcher_no_caps(pattern: &str) -> RegexMatcherNoCaps {
|
||||
RegexMatcherNoCaps(Regex::new(pattern).unwrap())
|
||||
}
|
||||
|
||||
fn m(start: usize, end: usize) -> Match {
|
||||
Match::new(start, end)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn find() {
|
||||
let matcher = matcher(r"(\w+)\s+(\w+)");
|
||||
assert_eq!(matcher.find(b" homer simpson ").unwrap(), Some(m(1, 14)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn find_iter() {
|
||||
let matcher = matcher(r"(\w+)\s+(\w+)");
|
||||
let mut matches = vec![];
|
||||
matcher.find_iter(b"aa bb cc dd", |m| {
|
||||
matches.push(m);
|
||||
true
|
||||
}).unwrap();
|
||||
assert_eq!(matches, vec![m(0, 5), m(6, 11)]);
|
||||
|
||||
// Test that find_iter respects short circuiting.
|
||||
matches.clear();
|
||||
matcher.find_iter(b"aa bb cc dd", |m| {
|
||||
matches.push(m);
|
||||
false
|
||||
}).unwrap();
|
||||
assert_eq!(matches, vec![m(0, 5)]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn try_find_iter() {
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
struct MyError;
|
||||
|
||||
let matcher = matcher(r"(\w+)\s+(\w+)");
|
||||
let mut matches = vec![];
|
||||
let err = matcher.try_find_iter(b"aa bb cc dd", |m| {
|
||||
if matches.is_empty() {
|
||||
matches.push(m);
|
||||
Ok(true)
|
||||
} else {
|
||||
Err(MyError)
|
||||
}
|
||||
}).unwrap().unwrap_err();
|
||||
assert_eq!(matches, vec![m(0, 5)]);
|
||||
assert_eq!(err, MyError);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn shortest_match() {
|
||||
let matcher = matcher(r"a+");
|
||||
// This tests that the default impl isn't doing anything smart, and simply
|
||||
// defers to `find`.
|
||||
assert_eq!(matcher.shortest_match(b"aaa").unwrap(), Some(3));
|
||||
// The actual underlying regex is smarter.
|
||||
assert_eq!(matcher.re.shortest_match(b"aaa"), Some(1));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn captures() {
|
||||
let matcher = matcher(r"(?P<a>\w+)\s+(?P<b>\w+)");
|
||||
assert_eq!(matcher.capture_count(), 3);
|
||||
assert_eq!(matcher.capture_index("a"), Some(1));
|
||||
assert_eq!(matcher.capture_index("b"), Some(2));
|
||||
assert_eq!(matcher.capture_index("nada"), None);
|
||||
|
||||
let mut caps = matcher.new_captures().unwrap();
|
||||
assert!(matcher.captures(b" homer simpson ", &mut caps).unwrap());
|
||||
assert_eq!(caps.get(0), Some(m(1, 14)));
|
||||
assert_eq!(caps.get(1), Some(m(1, 6)));
|
||||
assert_eq!(caps.get(2), Some(m(7, 14)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn captures_iter() {
|
||||
let matcher = matcher(r"(?P<a>\w+)\s+(?P<b>\w+)");
|
||||
let mut caps = matcher.new_captures().unwrap();
|
||||
let mut matches = vec![];
|
||||
matcher.captures_iter(b"aa bb cc dd", &mut caps, |caps| {
|
||||
matches.push(caps.get(0).unwrap());
|
||||
matches.push(caps.get(1).unwrap());
|
||||
matches.push(caps.get(2).unwrap());
|
||||
true
|
||||
}).unwrap();
|
||||
assert_eq!(matches, vec![
|
||||
m(0, 5), m(0, 2), m(3, 5),
|
||||
m(6, 11), m(6, 8), m(9, 11),
|
||||
]);
|
||||
|
||||
// Test that captures_iter respects short circuiting.
|
||||
matches.clear();
|
||||
matcher.captures_iter(b"aa bb cc dd", &mut caps, |caps| {
|
||||
matches.push(caps.get(0).unwrap());
|
||||
matches.push(caps.get(1).unwrap());
|
||||
matches.push(caps.get(2).unwrap());
|
||||
false
|
||||
}).unwrap();
|
||||
assert_eq!(matches, vec![
|
||||
m(0, 5), m(0, 2), m(3, 5),
|
||||
]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn try_captures_iter() {
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
struct MyError;
|
||||
|
||||
let matcher = matcher(r"(?P<a>\w+)\s+(?P<b>\w+)");
|
||||
let mut caps = matcher.new_captures().unwrap();
|
||||
let mut matches = vec![];
|
||||
let err = matcher.try_captures_iter(b"aa bb cc dd", &mut caps, |caps| {
|
||||
if matches.is_empty() {
|
||||
matches.push(caps.get(0).unwrap());
|
||||
matches.push(caps.get(1).unwrap());
|
||||
matches.push(caps.get(2).unwrap());
|
||||
Ok(true)
|
||||
} else {
|
||||
Err(MyError)
|
||||
}
|
||||
}).unwrap().unwrap_err();
|
||||
assert_eq!(matches, vec![m(0, 5), m(0, 2), m(3, 5)]);
|
||||
assert_eq!(err, MyError);
|
||||
}
|
||||
|
||||
// Test that our default impls for capturing are correct. Namely, when
|
||||
// capturing isn't supported by the underlying matcher, then all of the
|
||||
// various capturing related APIs fail fast.
|
||||
#[test]
|
||||
fn no_captures() {
|
||||
let matcher = matcher_no_caps(r"(?P<a>\w+)\s+(?P<b>\w+)");
|
||||
assert_eq!(matcher.capture_count(), 0);
|
||||
assert_eq!(matcher.capture_index("a"), None);
|
||||
assert_eq!(matcher.capture_index("b"), None);
|
||||
assert_eq!(matcher.capture_index("nada"), None);
|
||||
|
||||
let mut caps = matcher.new_captures().unwrap();
|
||||
assert!(!matcher.captures(b"homer simpson", &mut caps).unwrap());
|
||||
|
||||
let mut called = false;
|
||||
matcher.captures_iter(b"homer simpson", &mut caps, |_| {
|
||||
called = true;
|
||||
true
|
||||
}).unwrap();
|
||||
assert!(!called);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn replace() {
|
||||
let matcher = matcher(r"(\w+)\s+(\w+)");
|
||||
let mut dst = vec![];
|
||||
matcher.replace(b"aa bb cc dd", &mut dst, |_, dst| {
|
||||
dst.push(b'z');
|
||||
true
|
||||
}).unwrap();
|
||||
assert_eq!(dst, b"z z");
|
||||
|
||||
// Test that replacements respect short circuiting.
|
||||
dst.clear();
|
||||
matcher.replace(b"aa bb cc dd", &mut dst, |_, dst| {
|
||||
dst.push(b'z');
|
||||
false
|
||||
}).unwrap();
|
||||
assert_eq!(dst, b"z cc dd");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn replace_with_captures() {
|
||||
let matcher = matcher(r"(\w+)\s+(\w+)");
|
||||
let haystack = b"aa bb cc dd";
|
||||
let mut caps = matcher.new_captures().unwrap();
|
||||
let mut dst = vec![];
|
||||
matcher.replace_with_captures(haystack, &mut caps, &mut dst, |caps, dst| {
|
||||
caps.interpolate(
|
||||
|name| matcher.capture_index(name),
|
||||
haystack,
|
||||
b"$2 $1",
|
||||
dst,
|
||||
);
|
||||
true
|
||||
}).unwrap();
|
||||
assert_eq!(dst, b"bb aa dd cc");
|
||||
|
||||
// Test that replacements respect short circuiting.
|
||||
dst.clear();
|
||||
matcher.replace_with_captures(haystack, &mut caps, &mut dst, |caps, dst| {
|
||||
caps.interpolate(
|
||||
|name| matcher.capture_index(name),
|
||||
haystack,
|
||||
b"$2 $1",
|
||||
dst,
|
||||
);
|
||||
false
|
||||
}).unwrap();
|
||||
assert_eq!(dst, b"bb aa cc dd");
|
||||
}
|
||||
@@ -1,6 +0,0 @@
|
||||
extern crate grep_matcher;
|
||||
extern crate regex;
|
||||
|
||||
mod util;
|
||||
|
||||
mod test_matcher;
|
||||
@@ -1,104 +0,0 @@
|
||||
use std::collections::HashMap;
|
||||
use std::result;
|
||||
|
||||
use grep_matcher::{Captures, Match, Matcher, NoCaptures, NoError};
|
||||
use regex::bytes::{CaptureLocations, Regex};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct RegexMatcher {
|
||||
pub re: Regex,
|
||||
pub names: HashMap<String, usize>,
|
||||
}
|
||||
|
||||
impl RegexMatcher {
|
||||
pub fn new(re: Regex) -> RegexMatcher {
|
||||
let mut names = HashMap::new();
|
||||
for (i, optional_name) in re.capture_names().enumerate() {
|
||||
if let Some(name) = optional_name {
|
||||
names.insert(name.to_string(), i);
|
||||
}
|
||||
}
|
||||
RegexMatcher {
|
||||
re: re,
|
||||
names: names,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type Result<T> = result::Result<T, NoError>;
|
||||
|
||||
impl Matcher for RegexMatcher {
|
||||
type Captures = RegexCaptures;
|
||||
type Error = NoError;
|
||||
|
||||
fn find_at(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
) -> Result<Option<Match>> {
|
||||
Ok(self.re
|
||||
.find_at(haystack, at)
|
||||
.map(|m| Match::new(m.start(), m.end())))
|
||||
}
|
||||
|
||||
fn new_captures(&self) -> Result<RegexCaptures> {
|
||||
Ok(RegexCaptures(self.re.capture_locations()))
|
||||
}
|
||||
|
||||
fn captures_at(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
caps: &mut RegexCaptures,
|
||||
) -> Result<bool> {
|
||||
Ok(self.re.captures_read_at(&mut caps.0, haystack, at).is_some())
|
||||
}
|
||||
|
||||
fn capture_count(&self) -> usize {
|
||||
self.re.captures_len()
|
||||
}
|
||||
|
||||
fn capture_index(&self, name: &str) -> Option<usize> {
|
||||
self.names.get(name).map(|i| *i)
|
||||
}
|
||||
|
||||
// We purposely don't implement any other methods, so that we test the
|
||||
// default impls. The "real" Regex impl for Matcher provides a few more
|
||||
// impls. e.g., Its `find_iter` impl is faster than what we can do here,
|
||||
// since the regex crate avoids synchronization overhead.
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct RegexMatcherNoCaps(pub Regex);
|
||||
|
||||
impl Matcher for RegexMatcherNoCaps {
|
||||
type Captures = NoCaptures;
|
||||
type Error = NoError;
|
||||
|
||||
fn find_at(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
) -> Result<Option<Match>> {
|
||||
Ok(self.0
|
||||
.find_at(haystack, at)
|
||||
.map(|m| Match::new(m.start(), m.end())))
|
||||
}
|
||||
|
||||
fn new_captures(&self) -> Result<NoCaptures> {
|
||||
Ok(NoCaptures::new())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct RegexCaptures(CaptureLocations);
|
||||
|
||||
impl Captures for RegexCaptures {
|
||||
fn len(&self) -> usize {
|
||||
self.0.len()
|
||||
}
|
||||
|
||||
fn get(&self, i: usize) -> Option<Match> {
|
||||
self.0.pos(i).map(|(s, e)| Match::new(s, e))
|
||||
}
|
||||
}
|
||||
@@ -1,17 +0,0 @@
|
||||
[package]
|
||||
name = "grep-pcre2"
|
||||
version = "0.1.3" #:version
|
||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||
description = """
|
||||
Use PCRE2 with the 'grep' crate.
|
||||
"""
|
||||
documentation = "https://docs.rs/grep-pcre2"
|
||||
homepage = "https://github.com/BurntSushi/ripgrep"
|
||||
repository = "https://github.com/BurntSushi/ripgrep"
|
||||
readme = "README.md"
|
||||
keywords = ["regex", "grep", "pcre", "backreference", "look"]
|
||||
license = "Unlicense/MIT"
|
||||
|
||||
[dependencies]
|
||||
grep-matcher = { version = "0.1.2", path = "../grep-matcher" }
|
||||
pcre2 = "0.2.0"
|
||||
@@ -1,21 +0,0 @@
|
||||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2015 Andrew Gallant
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
@@ -1,39 +0,0 @@
|
||||
grep-pcre2
|
||||
----------
|
||||
The `grep-pcre2` crate provides an implementation of the `Matcher` trait from
|
||||
the `grep-matcher` crate. This implementation permits PCRE2 to be used in the
|
||||
`grep` crate for fast line oriented searching.
|
||||
|
||||
[](https://travis-ci.org/BurntSushi/ripgrep)
|
||||
[](https://ci.appveyor.com/project/BurntSushi/ripgrep)
|
||||
[](https://crates.io/crates/grep-pcre2)
|
||||
|
||||
Dual-licensed under MIT or the [UNLICENSE](http://unlicense.org).
|
||||
|
||||
### Documentation
|
||||
|
||||
[https://docs.rs/grep-pcre2](https://docs.rs/grep-pcre2)
|
||||
|
||||
**NOTE:** You probably don't want to use this crate directly. Instead, you
|
||||
should prefer the facade defined in the
|
||||
[`grep`](https://docs.rs/grep)
|
||||
crate.
|
||||
|
||||
If you're looking to just use PCRE2 from Rust, then you probably want the
|
||||
[`pcre2`](https://docs.rs/pcre2)
|
||||
crate, which provide high level safe bindings to PCRE2.
|
||||
|
||||
### Usage
|
||||
|
||||
Add this to your `Cargo.toml`:
|
||||
|
||||
```toml
|
||||
[dependencies]
|
||||
grep-pcre2 = "0.1"
|
||||
```
|
||||
|
||||
and this to your crate root:
|
||||
|
||||
```rust
|
||||
extern crate grep_pcre2;
|
||||
```
|
||||
@@ -1,24 +0,0 @@
|
||||
This is free and unencumbered software released into the public domain.
|
||||
|
||||
Anyone is free to copy, modify, publish, use, compile, sell, or
|
||||
distribute this software, either in source code form or as a compiled
|
||||
binary, for any purpose, commercial or non-commercial, and by any
|
||||
means.
|
||||
|
||||
In jurisdictions that recognize copyright laws, the author or authors
|
||||
of this software dedicate any and all copyright interest in the
|
||||
software to the public domain. We make this dedication for the benefit
|
||||
of the public at large and to the detriment of our heirs and
|
||||
successors. We intend this dedication to be an overt act of
|
||||
relinquishment in perpetuity of all present and future rights to this
|
||||
software under copyright law.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
For more information, please refer to <http://unlicense.org/>
|
||||
@@ -1,59 +0,0 @@
|
||||
use std::error;
|
||||
use std::fmt;
|
||||
|
||||
/// An error that can occur in this crate.
|
||||
///
|
||||
/// Generally, this error corresponds to problems building a regular
|
||||
/// expression, whether it's in parsing, compilation or a problem with
|
||||
/// guaranteeing a configured optimization.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Error {
|
||||
kind: ErrorKind,
|
||||
}
|
||||
|
||||
impl Error {
|
||||
pub(crate) fn regex<E: error::Error>(err: E) -> Error {
|
||||
Error { kind: ErrorKind::Regex(err.to_string()) }
|
||||
}
|
||||
|
||||
/// Return the kind of this error.
|
||||
pub fn kind(&self) -> &ErrorKind {
|
||||
&self.kind
|
||||
}
|
||||
}
|
||||
|
||||
/// The kind of an error that can occur.
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum ErrorKind {
|
||||
/// An error that occurred as a result of parsing a regular expression.
|
||||
/// This can be a syntax error or an error that results from attempting to
|
||||
/// compile a regular expression that is too big.
|
||||
///
|
||||
/// The string here is the underlying error converted to a string.
|
||||
Regex(String),
|
||||
/// Hints that destructuring should not be exhaustive.
|
||||
///
|
||||
/// This enum may grow additional variants, so this makes sure clients
|
||||
/// don't count on exhaustive matching. (Otherwise, adding a new variant
|
||||
/// could break existing code.)
|
||||
#[doc(hidden)]
|
||||
__Nonexhaustive,
|
||||
}
|
||||
|
||||
impl error::Error for Error {
|
||||
fn description(&self) -> &str {
|
||||
match self.kind {
|
||||
ErrorKind::Regex(_) => "regex error",
|
||||
ErrorKind::__Nonexhaustive => unreachable!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Error {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self.kind {
|
||||
ErrorKind::Regex(ref s) => write!(f, "{}", s),
|
||||
ErrorKind::__Nonexhaustive => unreachable!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,16 +0,0 @@
|
||||
/*!
|
||||
An implementation of `grep-matcher`'s `Matcher` trait for
|
||||
[PCRE2](https://www.pcre.org/).
|
||||
*/
|
||||
|
||||
#![deny(missing_docs)]
|
||||
|
||||
extern crate grep_matcher;
|
||||
extern crate pcre2;
|
||||
|
||||
pub use error::{Error, ErrorKind};
|
||||
pub use matcher::{RegexCaptures, RegexMatcher, RegexMatcherBuilder};
|
||||
pub use pcre2::{is_jit_available, version};
|
||||
|
||||
mod error;
|
||||
mod matcher;
|
||||
@@ -1,464 +0,0 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use grep_matcher::{Captures, Match, Matcher};
|
||||
use pcre2::bytes::{CaptureLocations, Regex, RegexBuilder};
|
||||
|
||||
use error::Error;
|
||||
|
||||
/// A builder for configuring the compilation of a PCRE2 regex.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct RegexMatcherBuilder {
|
||||
builder: RegexBuilder,
|
||||
case_smart: bool,
|
||||
word: bool,
|
||||
}
|
||||
|
||||
impl RegexMatcherBuilder {
|
||||
/// Create a new matcher builder with a default configuration.
|
||||
pub fn new() -> RegexMatcherBuilder {
|
||||
RegexMatcherBuilder {
|
||||
builder: RegexBuilder::new(),
|
||||
case_smart: false,
|
||||
word: false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Compile the given pattern into a PCRE matcher using the current
|
||||
/// configuration.
|
||||
///
|
||||
/// If there was a problem compiling the pattern, then an error is
|
||||
/// returned.
|
||||
pub fn build(&self, pattern: &str) -> Result<RegexMatcher, Error> {
|
||||
let mut builder = self.builder.clone();
|
||||
if self.case_smart && !has_uppercase_literal(pattern) {
|
||||
builder.caseless(true);
|
||||
}
|
||||
let res =
|
||||
if self.word {
|
||||
let pattern = format!(r"(?<!\w)(?:{})(?!\w)", pattern);
|
||||
builder.build(&pattern)
|
||||
} else {
|
||||
builder.build(pattern)
|
||||
};
|
||||
res.map_err(Error::regex).map(|regex| {
|
||||
let mut names = HashMap::new();
|
||||
for (i, name) in regex.capture_names().iter().enumerate() {
|
||||
if let Some(ref name) = *name {
|
||||
names.insert(name.to_string(), i);
|
||||
}
|
||||
}
|
||||
RegexMatcher { regex, names }
|
||||
})
|
||||
}
|
||||
|
||||
/// Enables case insensitive matching.
|
||||
///
|
||||
/// If the `utf` option is also set, then Unicode case folding is used
|
||||
/// to determine case insensitivity. When the `utf` option is not set,
|
||||
/// then only standard ASCII case insensitivity is considered.
|
||||
///
|
||||
/// This option corresponds to the `i` flag.
|
||||
pub fn caseless(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
|
||||
self.builder.caseless(yes);
|
||||
self
|
||||
}
|
||||
|
||||
/// Whether to enable "smart case" or not.
|
||||
///
|
||||
/// When smart case is enabled, the builder will automatically enable
|
||||
/// case insensitive matching based on how the pattern is written. Namely,
|
||||
/// case insensitive mode is enabled when both of the following things
|
||||
/// are believed to be true:
|
||||
///
|
||||
/// 1. The pattern contains at least one literal character. For example,
|
||||
/// `a\w` contains a literal (`a`) but `\w` does not.
|
||||
/// 2. Of the literals in the pattern, none of them are considered to be
|
||||
/// uppercase according to Unicode. For example, `foo\pL` has no
|
||||
/// uppercase literals but `Foo\pL` does.
|
||||
///
|
||||
/// Note that the implementation of this is not perfect. Namely, `\p{Ll}`
|
||||
/// will prevent case insensitive matching even though it is part of a meta
|
||||
/// sequence. This bug will probably never be fixed.
|
||||
pub fn case_smart(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
|
||||
self.case_smart = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Enables "dot all" matching.
|
||||
///
|
||||
/// When enabled, the `.` metacharacter in the pattern matches any
|
||||
/// character, include `\n`. When disabled (the default), `.` will match
|
||||
/// any character except for `\n`.
|
||||
///
|
||||
/// This option corresponds to the `s` flag.
|
||||
pub fn dotall(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
|
||||
self.builder.dotall(yes);
|
||||
self
|
||||
}
|
||||
|
||||
/// Enable "extended" mode in the pattern, where whitespace is ignored.
|
||||
///
|
||||
/// This option corresponds to the `x` flag.
|
||||
pub fn extended(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
|
||||
self.builder.extended(yes);
|
||||
self
|
||||
}
|
||||
|
||||
/// Enable multiline matching mode.
|
||||
///
|
||||
/// When enabled, the `^` and `$` anchors will match both at the beginning
|
||||
/// and end of a subject string, in addition to matching at the start of
|
||||
/// a line and the end of a line. When disabled, the `^` and `$` anchors
|
||||
/// will only match at the beginning and end of a subject string.
|
||||
///
|
||||
/// This option corresponds to the `m` flag.
|
||||
pub fn multi_line(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
|
||||
self.builder.multi_line(yes);
|
||||
self
|
||||
}
|
||||
|
||||
/// Enable matching of CRLF as a line terminator.
|
||||
///
|
||||
/// When enabled, anchors such as `^` and `$` will match any of the
|
||||
/// following as a line terminator: `\r`, `\n` or `\r\n`.
|
||||
///
|
||||
/// This is disabled by default, in which case, only `\n` is recognized as
|
||||
/// a line terminator.
|
||||
pub fn crlf(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
|
||||
self.builder.crlf(yes);
|
||||
self
|
||||
}
|
||||
|
||||
/// Require that all matches occur on word boundaries.
|
||||
///
|
||||
/// Enabling this option is subtly different than putting `\b` assertions
|
||||
/// on both sides of your pattern. In particular, a `\b` assertion requires
|
||||
/// that one side of it match a word character while the other match a
|
||||
/// non-word character. This option, in contrast, merely requires that
|
||||
/// one side match a non-word character.
|
||||
///
|
||||
/// For example, `\b-2\b` will not match `foo -2 bar` since `-` is not a
|
||||
/// word character. However, `-2` with this `word` option enabled will
|
||||
/// match the `-2` in `foo -2 bar`.
|
||||
pub fn word(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
|
||||
self.word = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Enable Unicode matching mode.
|
||||
///
|
||||
/// When enabled, the following patterns become Unicode aware: `\b`, `\B`,
|
||||
/// `\d`, `\D`, `\s`, `\S`, `\w`, `\W`.
|
||||
///
|
||||
/// When set, this implies UTF matching mode. It is not possible to enable
|
||||
/// Unicode matching mode without enabling UTF matching mode.
|
||||
///
|
||||
/// This is disabled by default.
|
||||
pub fn ucp(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
|
||||
self.builder.ucp(yes);
|
||||
self
|
||||
}
|
||||
|
||||
/// Enable UTF matching mode.
|
||||
///
|
||||
/// When enabled, characters are treated as sequences of code units that
|
||||
/// make up a single codepoint instead of as single bytes. For example,
|
||||
/// this will cause `.` to match any single UTF-8 encoded codepoint, where
|
||||
/// as when this is disabled, `.` will any single byte (except for `\n` in
|
||||
/// both cases, unless "dot all" mode is enabled).
|
||||
///
|
||||
/// Note that when UTF matching mode is enabled, every search performed
|
||||
/// will do a UTF-8 validation check, which can impact performance. The
|
||||
/// UTF-8 check can be disabled via the `disable_utf_check` option, but it
|
||||
/// is undefined behavior to enable UTF matching mode and search invalid
|
||||
/// UTF-8.
|
||||
///
|
||||
/// This is disabled by default.
|
||||
pub fn utf(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
|
||||
self.builder.utf(yes);
|
||||
self
|
||||
}
|
||||
|
||||
/// When UTF matching mode is enabled, this will disable the UTF checking
|
||||
/// that PCRE2 will normally perform automatically. If UTF matching mode
|
||||
/// is not enabled, then this has no effect.
|
||||
///
|
||||
/// UTF checking is enabled by default when UTF matching mode is enabled.
|
||||
/// If UTF matching mode is enabled and UTF checking is enabled, then PCRE2
|
||||
/// will return an error if you attempt to search a subject string that is
|
||||
/// not valid UTF-8.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// It is undefined behavior to disable the UTF check in UTF matching mode
|
||||
/// and search a subject string that is not valid UTF-8. When the UTF check
|
||||
/// is disabled, callers must guarantee that the subject string is valid
|
||||
/// UTF-8.
|
||||
pub unsafe fn disable_utf_check(&mut self) -> &mut RegexMatcherBuilder {
|
||||
self.builder.disable_utf_check();
|
||||
self
|
||||
}
|
||||
|
||||
/// Enable PCRE2's JIT and return an error if it's not available.
|
||||
///
|
||||
/// This generally speeds up matching quite a bit. The downside is that it
|
||||
/// can increase the time it takes to compile a pattern.
|
||||
///
|
||||
/// If the JIT isn't available or if JIT compilation returns an error, then
|
||||
/// regex compilation will fail with the corresponding error.
|
||||
///
|
||||
/// This is disabled by default, and always overrides `jit_if_available`.
|
||||
pub fn jit(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
|
||||
self.builder.jit(yes);
|
||||
self
|
||||
}
|
||||
|
||||
/// Enable PCRE2's JIT if it's available.
|
||||
///
|
||||
/// This generally speeds up matching quite a bit. The downside is that it
|
||||
/// can increase the time it takes to compile a pattern.
|
||||
///
|
||||
/// If the JIT isn't available or if JIT compilation returns an error,
|
||||
/// then a debug message with the error will be emitted and the regex will
|
||||
/// otherwise silently fall back to non-JIT matching.
|
||||
///
|
||||
/// This is disabled by default, and always overrides `jit`.
|
||||
pub fn jit_if_available(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
|
||||
self.builder.jit_if_available(yes);
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the maximum size of PCRE2's JIT stack, in bytes. If the JIT is
|
||||
/// not enabled, then this has no effect.
|
||||
///
|
||||
/// When `None` is given, no custom JIT stack will be created, and instead,
|
||||
/// the default JIT stack is used. When the default is used, its maximum
|
||||
/// size is 32 KB.
|
||||
///
|
||||
/// When this is set, then a new JIT stack will be created with the given
|
||||
/// maximum size as its limit.
|
||||
///
|
||||
/// Increasing the stack size can be useful for larger regular expressions.
|
||||
///
|
||||
/// By default, this is set to `None`.
|
||||
pub fn max_jit_stack_size(
|
||||
&mut self,
|
||||
bytes: Option<usize>,
|
||||
) -> &mut RegexMatcherBuilder {
|
||||
self.builder.max_jit_stack_size(bytes);
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
/// An implementation of the `Matcher` trait using PCRE2.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct RegexMatcher {
|
||||
regex: Regex,
|
||||
names: HashMap<String, usize>,
|
||||
}
|
||||
|
||||
impl RegexMatcher {
|
||||
/// Create a new matcher from the given pattern using the default
|
||||
/// configuration.
|
||||
pub fn new(pattern: &str) -> Result<RegexMatcher, Error> {
|
||||
RegexMatcherBuilder::new().build(pattern)
|
||||
}
|
||||
}
|
||||
|
||||
impl Matcher for RegexMatcher {
|
||||
type Captures = RegexCaptures;
|
||||
type Error = Error;
|
||||
|
||||
fn find_at(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
) -> Result<Option<Match>, Error> {
|
||||
Ok(self.regex
|
||||
.find_at(haystack, at)
|
||||
.map_err(Error::regex)?
|
||||
.map(|m| Match::new(m.start(), m.end())))
|
||||
}
|
||||
|
||||
fn new_captures(&self) -> Result<RegexCaptures, Error> {
|
||||
Ok(RegexCaptures::new(self.regex.capture_locations()))
|
||||
}
|
||||
|
||||
fn capture_count(&self) -> usize {
|
||||
self.regex.captures_len()
|
||||
}
|
||||
|
||||
fn capture_index(&self, name: &str) -> Option<usize> {
|
||||
self.names.get(name).map(|i| *i)
|
||||
}
|
||||
|
||||
fn try_find_iter<F, E>(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
mut matched: F,
|
||||
) -> Result<Result<(), E>, Error>
|
||||
where F: FnMut(Match) -> Result<bool, E>
|
||||
{
|
||||
for result in self.regex.find_iter(haystack) {
|
||||
let m = result.map_err(Error::regex)?;
|
||||
match matched(Match::new(m.start(), m.end())) {
|
||||
Ok(true) => continue,
|
||||
Ok(false) => return Ok(Ok(())),
|
||||
Err(err) => return Ok(Err(err)),
|
||||
}
|
||||
}
|
||||
Ok(Ok(()))
|
||||
}
|
||||
|
||||
fn captures_at(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
caps: &mut RegexCaptures,
|
||||
) -> Result<bool, Error> {
|
||||
Ok(self.regex
|
||||
.captures_read_at(&mut caps.locs, haystack, at)
|
||||
.map_err(Error::regex)?
|
||||
.is_some())
|
||||
}
|
||||
}
|
||||
|
||||
/// Represents the match offsets of each capturing group in a match.
|
||||
///
|
||||
/// The first, or `0`th capture group, always corresponds to the entire match
|
||||
/// and is guaranteed to be present when a match occurs. The next capture
|
||||
/// group, at index `1`, corresponds to the first capturing group in the regex,
|
||||
/// ordered by the position at which the left opening parenthesis occurs.
|
||||
///
|
||||
/// Note that not all capturing groups are guaranteed to be present in a match.
|
||||
/// For example, in the regex, `(?P<foo>\w)|(?P<bar>\W)`, only one of `foo`
|
||||
/// or `bar` will ever be set in any given match.
|
||||
///
|
||||
/// In order to access a capture group by name, you'll need to first find the
|
||||
/// index of the group using the corresponding matcher's `capture_index`
|
||||
/// method, and then use that index with `RegexCaptures::get`.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct RegexCaptures {
|
||||
/// Where the locations are stored.
|
||||
locs: CaptureLocations,
|
||||
}
|
||||
|
||||
impl Captures for RegexCaptures {
|
||||
fn len(&self) -> usize {
|
||||
self.locs.len()
|
||||
}
|
||||
|
||||
fn get(&self, i: usize) -> Option<Match> {
|
||||
self.locs.get(i).map(|(s, e)| Match::new(s, e))
|
||||
}
|
||||
}
|
||||
|
||||
impl RegexCaptures {
|
||||
pub(crate) fn new(locs: CaptureLocations) -> RegexCaptures {
|
||||
RegexCaptures { locs }
|
||||
}
|
||||
}
|
||||
|
||||
/// Determine whether the pattern contains an uppercase character which should
|
||||
/// negate the effect of the smart-case option.
|
||||
///
|
||||
/// Ideally we would be able to check the AST in order to correctly handle
|
||||
/// things like '\p{Ll}' and '\p{Lu}' (which should be treated as explicitly
|
||||
/// cased), but PCRE doesn't expose enough details for that kind of analysis.
|
||||
/// For now, our 'good enough' solution is to simply perform a semi-naïve
|
||||
/// scan of the input pattern and ignore all characters following a '\'. The
|
||||
/// This at least lets us support the most common cases, like 'foo\w' and
|
||||
/// 'foo\S', in an intuitive manner.
|
||||
fn has_uppercase_literal(pattern: &str) -> bool {
|
||||
let mut chars = pattern.chars();
|
||||
while let Some(c) = chars.next() {
|
||||
if c == '\\' {
|
||||
chars.next();
|
||||
} else if c.is_uppercase() {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use grep_matcher::{LineMatchKind, Matcher};
|
||||
use super::*;
|
||||
|
||||
// Test that enabling word matches does the right thing and demonstrate
|
||||
// the difference between it and surrounding the regex in `\b`.
|
||||
#[test]
|
||||
fn word() {
|
||||
let matcher = RegexMatcherBuilder::new()
|
||||
.word(true)
|
||||
.build(r"-2")
|
||||
.unwrap();
|
||||
assert!(matcher.is_match(b"abc -2 foo").unwrap());
|
||||
|
||||
let matcher = RegexMatcherBuilder::new()
|
||||
.word(false)
|
||||
.build(r"\b-2\b")
|
||||
.unwrap();
|
||||
assert!(!matcher.is_match(b"abc -2 foo").unwrap());
|
||||
}
|
||||
|
||||
// Test that enabling CRLF permits `$` to match at the end of a line.
|
||||
#[test]
|
||||
fn line_terminator_crlf() {
|
||||
// Test normal use of `$` with a `\n` line terminator.
|
||||
let matcher = RegexMatcherBuilder::new()
|
||||
.multi_line(true)
|
||||
.build(r"abc$")
|
||||
.unwrap();
|
||||
assert!(matcher.is_match(b"abc\n").unwrap());
|
||||
|
||||
// Test that `$` doesn't match at `\r\n` boundary normally.
|
||||
let matcher = RegexMatcherBuilder::new()
|
||||
.multi_line(true)
|
||||
.build(r"abc$")
|
||||
.unwrap();
|
||||
assert!(!matcher.is_match(b"abc\r\n").unwrap());
|
||||
|
||||
// Now check the CRLF handling.
|
||||
let matcher = RegexMatcherBuilder::new()
|
||||
.multi_line(true)
|
||||
.crlf(true)
|
||||
.build(r"abc$")
|
||||
.unwrap();
|
||||
assert!(matcher.is_match(b"abc\r\n").unwrap());
|
||||
}
|
||||
|
||||
// Test that smart case works.
|
||||
#[test]
|
||||
fn case_smart() {
|
||||
let matcher = RegexMatcherBuilder::new()
|
||||
.case_smart(true)
|
||||
.build(r"abc")
|
||||
.unwrap();
|
||||
assert!(matcher.is_match(b"ABC").unwrap());
|
||||
|
||||
let matcher = RegexMatcherBuilder::new()
|
||||
.case_smart(true)
|
||||
.build(r"aBc")
|
||||
.unwrap();
|
||||
assert!(!matcher.is_match(b"ABC").unwrap());
|
||||
}
|
||||
|
||||
// Test that finding candidate lines works as expected.
|
||||
#[test]
|
||||
fn candidate_lines() {
|
||||
fn is_confirmed(m: LineMatchKind) -> bool {
|
||||
match m {
|
||||
LineMatchKind::Confirmed(_) => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
let matcher = RegexMatcherBuilder::new()
|
||||
.build(r"\wfoo\s")
|
||||
.unwrap();
|
||||
let m = matcher.find_candidate_line(b"afoo ").unwrap().unwrap();
|
||||
assert!(is_confirmed(m));
|
||||
}
|
||||
}
|
||||
@@ -1,31 +0,0 @@
|
||||
[package]
|
||||
name = "grep-printer"
|
||||
version = "0.1.1" #:version
|
||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||
description = """
|
||||
An implementation of the grep crate's Sink trait that provides standard
|
||||
printing of search results, similar to grep itself.
|
||||
"""
|
||||
documentation = "https://docs.rs/grep-printer"
|
||||
homepage = "https://github.com/BurntSushi/ripgrep"
|
||||
repository = "https://github.com/BurntSushi/ripgrep"
|
||||
readme = "README.md"
|
||||
keywords = ["grep", "pattern", "print", "printer", "sink"]
|
||||
license = "Unlicense/MIT"
|
||||
|
||||
[features]
|
||||
default = ["serde1"]
|
||||
serde1 = ["base64", "serde", "serde_derive", "serde_json"]
|
||||
|
||||
[dependencies]
|
||||
base64 = { version = "0.10.0", optional = true }
|
||||
bstr = "0.1.2"
|
||||
grep-matcher = { version = "0.1.2", path = "../grep-matcher" }
|
||||
grep-searcher = { version = "0.1.4", path = "../grep-searcher" }
|
||||
termcolor = "1.0.4"
|
||||
serde = { version = "1.0.77", optional = true }
|
||||
serde_derive = { version = "1.0.77", optional = true }
|
||||
serde_json = { version = "1.0.27", optional = true }
|
||||
|
||||
[dev-dependencies]
|
||||
grep-regex = { version = "0.1.3", path = "../grep-regex" }
|
||||
@@ -1,21 +0,0 @@
|
||||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2015 Andrew Gallant
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
@@ -1,35 +0,0 @@
|
||||
grep-printer
|
||||
------------
|
||||
Print results from line oriented searching in a human readable, aggregate or
|
||||
JSON Lines format.
|
||||
|
||||
[](https://travis-ci.org/BurntSushi/ripgrep)
|
||||
[](https://ci.appveyor.com/project/BurntSushi/ripgrep)
|
||||
[](https://crates.io/crates/grep-printer)
|
||||
|
||||
Dual-licensed under MIT or the [UNLICENSE](http://unlicense.org).
|
||||
|
||||
### Documentation
|
||||
|
||||
[https://docs.rs/grep-printer](https://docs.rs/grep-printer)
|
||||
|
||||
**NOTE:** You probably don't want to use this crate directly. Instead, you
|
||||
should prefer the facade defined in the
|
||||
[`grep`](https://docs.rs/grep)
|
||||
crate.
|
||||
|
||||
|
||||
### Usage
|
||||
|
||||
Add this to your `Cargo.toml`:
|
||||
|
||||
```toml
|
||||
[dependencies]
|
||||
grep-printer = "0.1"
|
||||
```
|
||||
|
||||
and this to your crate root:
|
||||
|
||||
```rust
|
||||
extern crate grep_printer;
|
||||
```
|
||||
@@ -1,24 +0,0 @@
|
||||
This is free and unencumbered software released into the public domain.
|
||||
|
||||
Anyone is free to copy, modify, publish, use, compile, sell, or
|
||||
distribute this software, either in source code form or as a compiled
|
||||
binary, for any purpose, commercial or non-commercial, and by any
|
||||
means.
|
||||
|
||||
In jurisdictions that recognize copyright laws, the author or authors
|
||||
of this software dedicate any and all copyright interest in the
|
||||
software to the public domain. We make this dedication for the benefit
|
||||
of the public at large and to the detriment of our heirs and
|
||||
successors. We intend this dedication to be an overt act of
|
||||
relinquishment in perpetuity of all present and future rights to this
|
||||
software under copyright law.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
For more information, please refer to <http://unlicense.org/>
|
||||
@@ -1,394 +0,0 @@
|
||||
use std::error;
|
||||
use std::fmt;
|
||||
use std::str::FromStr;
|
||||
|
||||
use termcolor::{Color, ColorSpec, ParseColorError};
|
||||
|
||||
/// Returns a default set of color specifications.
|
||||
///
|
||||
/// This may change over time, but the color choices are meant to be fairly
|
||||
/// conservative that work across terminal themes.
|
||||
///
|
||||
/// Additional color specifications can be added to the list returned. More
|
||||
/// recently added specifications override previously added specifications.
|
||||
pub fn default_color_specs() -> Vec<UserColorSpec> {
|
||||
vec![
|
||||
#[cfg(unix)]
|
||||
"path:fg:magenta".parse().unwrap(),
|
||||
#[cfg(windows)]
|
||||
"path:fg:cyan".parse().unwrap(),
|
||||
"line:fg:green".parse().unwrap(),
|
||||
"match:fg:red".parse().unwrap(),
|
||||
"match:style:bold".parse().unwrap(),
|
||||
]
|
||||
}
|
||||
|
||||
/// An error that can occur when parsing color specifications.
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
pub enum ColorError {
|
||||
/// This occurs when an unrecognized output type is used.
|
||||
UnrecognizedOutType(String),
|
||||
/// This occurs when an unrecognized spec type is used.
|
||||
UnrecognizedSpecType(String),
|
||||
/// This occurs when an unrecognized color name is used.
|
||||
UnrecognizedColor(String, String),
|
||||
/// This occurs when an unrecognized style attribute is used.
|
||||
UnrecognizedStyle(String),
|
||||
/// This occurs when the format of a color specification is invalid.
|
||||
InvalidFormat(String),
|
||||
}
|
||||
|
||||
impl error::Error for ColorError {
|
||||
fn description(&self) -> &str {
|
||||
match *self {
|
||||
ColorError::UnrecognizedOutType(_) => "unrecognized output type",
|
||||
ColorError::UnrecognizedSpecType(_) => "unrecognized spec type",
|
||||
ColorError::UnrecognizedColor(_, _) => "unrecognized color name",
|
||||
ColorError::UnrecognizedStyle(_) => "unrecognized style attribute",
|
||||
ColorError::InvalidFormat(_) => "invalid color spec",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ColorError {
|
||||
fn from_parse_error(err: ParseColorError) -> ColorError {
|
||||
ColorError::UnrecognizedColor(
|
||||
err.invalid().to_string(),
|
||||
err.to_string(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for ColorError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match *self {
|
||||
ColorError::UnrecognizedOutType(ref name) => {
|
||||
write!(
|
||||
f,
|
||||
"unrecognized output type '{}'. Choose from: \
|
||||
path, line, column, match.",
|
||||
name,
|
||||
)
|
||||
}
|
||||
ColorError::UnrecognizedSpecType(ref name) => {
|
||||
write!(
|
||||
f,
|
||||
"unrecognized spec type '{}'. Choose from: \
|
||||
fg, bg, style, none.",
|
||||
name,
|
||||
)
|
||||
}
|
||||
ColorError::UnrecognizedColor(_, ref msg) => {
|
||||
write!(f, "{}", msg)
|
||||
}
|
||||
ColorError::UnrecognizedStyle(ref name) => {
|
||||
write!(
|
||||
f,
|
||||
"unrecognized style attribute '{}'. Choose from: \
|
||||
nobold, bold, nointense, intense, nounderline, \
|
||||
underline.",
|
||||
name,
|
||||
)
|
||||
}
|
||||
ColorError::InvalidFormat(ref original) => {
|
||||
write!(
|
||||
f,
|
||||
"invalid color spec format: '{}'. Valid format \
|
||||
is '(path|line|column|match):(fg|bg|style):(value)'.",
|
||||
original,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A merged set of color specifications.
|
||||
///
|
||||
/// This set of color specifications represents the various color types that
|
||||
/// are supported by the printers in this crate. A set of color specifications
|
||||
/// can be created from a sequence of
|
||||
/// [`UserColorSpec`s](struct.UserColorSpec.html).
|
||||
#[derive(Clone, Debug, Default, Eq, PartialEq)]
|
||||
pub struct ColorSpecs {
|
||||
path: ColorSpec,
|
||||
line: ColorSpec,
|
||||
column: ColorSpec,
|
||||
matched: ColorSpec,
|
||||
}
|
||||
|
||||
/// A single color specification provided by the user.
|
||||
///
|
||||
/// ## Format
|
||||
///
|
||||
/// The format of a `Spec` is a triple: `{type}:{attribute}:{value}`. Each
|
||||
/// component is defined as follows:
|
||||
///
|
||||
/// * `{type}` can be one of `path`, `line`, `column` or `match`.
|
||||
/// * `{attribute}` can be one of `fg`, `bg` or `style`. `{attribute}` may also
|
||||
/// be the special value `none`, in which case, `{value}` can be omitted.
|
||||
/// * `{value}` is either a color name (for `fg`/`bg`) or a style instruction.
|
||||
///
|
||||
/// `{type}` controls which part of the output should be styled.
|
||||
///
|
||||
/// When `{attribute}` is `none`, then this should cause any existing style
|
||||
/// settings to be cleared for the specified `type`.
|
||||
///
|
||||
/// `{value}` should be a color when `{attribute}` is `fg` or `bg`, or it
|
||||
/// should be a style instruction when `{attribute}` is `style`. When
|
||||
/// `{attribute}` is `none`, `{value}` must be omitted.
|
||||
///
|
||||
/// Valid colors are `black`, `blue`, `green`, `red`, `cyan`, `magenta`,
|
||||
/// `yellow`, `white`. Extended colors can also be specified, and are formatted
|
||||
/// as `x` (for 256-bit colors) or `x,x,x` (for 24-bit true color), where
|
||||
/// `x` is a number between 0 and 255 inclusive. `x` may be given as a normal
|
||||
/// decimal number of a hexadecimal number, where the latter is prefixed by
|
||||
/// `0x`.
|
||||
///
|
||||
/// Valid style instructions are `nobold`, `bold`, `intense`, `nointense`,
|
||||
/// `underline`, `nounderline`.
|
||||
///
|
||||
/// ## Example
|
||||
///
|
||||
/// The standard way to build a `UserColorSpec` is to parse it from a string.
|
||||
/// Once multiple `UserColorSpec`s have been constructed, they can be provided
|
||||
/// to the standard printer where they will automatically be applied to the
|
||||
/// output.
|
||||
///
|
||||
/// A `UserColorSpec` can also be converted to a `termcolor::ColorSpec`:
|
||||
///
|
||||
/// ```rust
|
||||
/// extern crate grep_printer;
|
||||
/// extern crate termcolor;
|
||||
///
|
||||
/// # fn main() {
|
||||
/// use termcolor::{Color, ColorSpec};
|
||||
/// use grep_printer::UserColorSpec;
|
||||
///
|
||||
/// let user_spec1: UserColorSpec = "path:fg:blue".parse().unwrap();
|
||||
/// let user_spec2: UserColorSpec = "match:bg:0xff,0x7f,0x00".parse().unwrap();
|
||||
///
|
||||
/// let spec1 = user_spec1.to_color_spec();
|
||||
/// let spec2 = user_spec2.to_color_spec();
|
||||
///
|
||||
/// assert_eq!(spec1.fg(), Some(&Color::Blue));
|
||||
/// assert_eq!(spec2.bg(), Some(&Color::Rgb(0xFF, 0x7F, 0x00)));
|
||||
/// # }
|
||||
/// ```
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
pub struct UserColorSpec {
|
||||
ty: OutType,
|
||||
value: SpecValue,
|
||||
}
|
||||
|
||||
impl UserColorSpec {
|
||||
/// Convert this user provided color specification to a specification that
|
||||
/// can be used with `termcolor`. This drops the type of this specification
|
||||
/// (where the type indicates where the color is applied in the standard
|
||||
/// printer, e.g., to the file path or the line numbers, etc.).
|
||||
pub fn to_color_spec(&self) -> ColorSpec {
|
||||
let mut spec = ColorSpec::default();
|
||||
self.value.merge_into(&mut spec);
|
||||
spec
|
||||
}
|
||||
}
|
||||
|
||||
/// The actual value given by the specification.
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
enum SpecValue {
|
||||
None,
|
||||
Fg(Color),
|
||||
Bg(Color),
|
||||
Style(Style),
|
||||
}
|
||||
|
||||
/// The set of configurable portions of ripgrep's output.
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
enum OutType {
|
||||
Path,
|
||||
Line,
|
||||
Column,
|
||||
Match,
|
||||
}
|
||||
|
||||
/// The specification type.
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
enum SpecType {
|
||||
Fg,
|
||||
Bg,
|
||||
Style,
|
||||
None,
|
||||
}
|
||||
|
||||
/// The set of available styles for use in the terminal.
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
enum Style {
|
||||
Bold,
|
||||
NoBold,
|
||||
Intense,
|
||||
NoIntense,
|
||||
Underline,
|
||||
NoUnderline
|
||||
}
|
||||
|
||||
impl ColorSpecs {
|
||||
/// Create color specifications from a list of user supplied
|
||||
/// specifications.
|
||||
pub fn new(specs: &[UserColorSpec]) -> ColorSpecs {
|
||||
let mut merged = ColorSpecs::default();
|
||||
for spec in specs {
|
||||
match spec.ty {
|
||||
OutType::Path => spec.merge_into(&mut merged.path),
|
||||
OutType::Line => spec.merge_into(&mut merged.line),
|
||||
OutType::Column => spec.merge_into(&mut merged.column),
|
||||
OutType::Match => spec.merge_into(&mut merged.matched),
|
||||
}
|
||||
}
|
||||
merged
|
||||
}
|
||||
|
||||
/// Create a default set of specifications that have color.
|
||||
///
|
||||
/// This is distinct from `ColorSpecs`'s `Default` implementation in that
|
||||
/// this provides a set of default color choices, where as the `Default`
|
||||
/// implementation provides no color choices.
|
||||
pub fn default_with_color() -> ColorSpecs {
|
||||
ColorSpecs::new(&default_color_specs())
|
||||
}
|
||||
|
||||
/// Return the color specification for coloring file paths.
|
||||
pub fn path(&self) -> &ColorSpec {
|
||||
&self.path
|
||||
}
|
||||
|
||||
/// Return the color specification for coloring line numbers.
|
||||
pub fn line(&self) -> &ColorSpec {
|
||||
&self.line
|
||||
}
|
||||
|
||||
/// Return the color specification for coloring column numbers.
|
||||
pub fn column(&self) -> &ColorSpec {
|
||||
&self.column
|
||||
}
|
||||
|
||||
/// Return the color specification for coloring matched text.
|
||||
pub fn matched(&self) -> &ColorSpec {
|
||||
&self.matched
|
||||
}
|
||||
}
|
||||
|
||||
impl UserColorSpec {
|
||||
/// Merge this spec into the given color specification.
|
||||
fn merge_into(&self, cspec: &mut ColorSpec) {
|
||||
self.value.merge_into(cspec);
|
||||
}
|
||||
}
|
||||
|
||||
impl SpecValue {
|
||||
/// Merge this spec value into the given color specification.
|
||||
fn merge_into(&self, cspec: &mut ColorSpec) {
|
||||
match *self {
|
||||
SpecValue::None => cspec.clear(),
|
||||
SpecValue::Fg(ref color) => { cspec.set_fg(Some(color.clone())); }
|
||||
SpecValue::Bg(ref color) => { cspec.set_bg(Some(color.clone())); }
|
||||
SpecValue::Style(ref style) => {
|
||||
match *style {
|
||||
Style::Bold => { cspec.set_bold(true); }
|
||||
Style::NoBold => { cspec.set_bold(false); }
|
||||
Style::Intense => { cspec.set_intense(true); }
|
||||
Style::NoIntense => { cspec.set_intense(false); }
|
||||
Style::Underline => { cspec.set_underline(true); }
|
||||
Style::NoUnderline => { cspec.set_underline(false); }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FromStr for UserColorSpec {
|
||||
type Err = ColorError;
|
||||
|
||||
fn from_str(s: &str) -> Result<UserColorSpec, ColorError> {
|
||||
let pieces: Vec<&str> = s.split(':').collect();
|
||||
if pieces.len() <= 1 || pieces.len() > 3 {
|
||||
return Err(ColorError::InvalidFormat(s.to_string()));
|
||||
}
|
||||
let otype: OutType = pieces[0].parse()?;
|
||||
match pieces[1].parse()? {
|
||||
SpecType::None => {
|
||||
Ok(UserColorSpec {
|
||||
ty: otype,
|
||||
value: SpecValue::None,
|
||||
})
|
||||
}
|
||||
SpecType::Style => {
|
||||
if pieces.len() < 3 {
|
||||
return Err(ColorError::InvalidFormat(s.to_string()));
|
||||
}
|
||||
let style: Style = pieces[2].parse()?;
|
||||
Ok(UserColorSpec { ty: otype, value: SpecValue::Style(style) })
|
||||
}
|
||||
SpecType::Fg => {
|
||||
if pieces.len() < 3 {
|
||||
return Err(ColorError::InvalidFormat(s.to_string()));
|
||||
}
|
||||
let color: Color = pieces[2]
|
||||
.parse()
|
||||
.map_err(ColorError::from_parse_error)?;
|
||||
Ok(UserColorSpec { ty: otype, value: SpecValue::Fg(color) })
|
||||
}
|
||||
SpecType::Bg => {
|
||||
if pieces.len() < 3 {
|
||||
return Err(ColorError::InvalidFormat(s.to_string()));
|
||||
}
|
||||
let color: Color = pieces[2]
|
||||
.parse()
|
||||
.map_err(ColorError::from_parse_error)?;
|
||||
Ok(UserColorSpec { ty: otype, value: SpecValue::Bg(color) })
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FromStr for OutType {
|
||||
type Err = ColorError;
|
||||
|
||||
fn from_str(s: &str) -> Result<OutType, ColorError> {
|
||||
match &*s.to_lowercase() {
|
||||
"path" => Ok(OutType::Path),
|
||||
"line" => Ok(OutType::Line),
|
||||
"column" => Ok(OutType::Column),
|
||||
"match" => Ok(OutType::Match),
|
||||
_ => Err(ColorError::UnrecognizedOutType(s.to_string())),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FromStr for SpecType {
|
||||
type Err = ColorError;
|
||||
|
||||
fn from_str(s: &str) -> Result<SpecType, ColorError> {
|
||||
match &*s.to_lowercase() {
|
||||
"fg" => Ok(SpecType::Fg),
|
||||
"bg" => Ok(SpecType::Bg),
|
||||
"style" => Ok(SpecType::Style),
|
||||
"none" => Ok(SpecType::None),
|
||||
_ => Err(ColorError::UnrecognizedSpecType(s.to_string())),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FromStr for Style {
|
||||
type Err = ColorError;
|
||||
|
||||
fn from_str(s: &str) -> Result<Style, ColorError> {
|
||||
match &*s.to_lowercase() {
|
||||
"bold" => Ok(Style::Bold),
|
||||
"nobold" => Ok(Style::NoBold),
|
||||
"intense" => Ok(Style::Intense),
|
||||
"nointense" => Ok(Style::NoIntense),
|
||||
"underline" => Ok(Style::Underline),
|
||||
"nounderline" => Ok(Style::NoUnderline),
|
||||
_ => Err(ColorError::UnrecognizedStyle(s.to_string())),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,90 +0,0 @@
|
||||
use std::io::{self, Write};
|
||||
|
||||
use termcolor::{ColorSpec, WriteColor};
|
||||
|
||||
/// A writer that counts the number of bytes that have been successfully
|
||||
/// written.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct CounterWriter<W> {
|
||||
wtr: W,
|
||||
count: u64,
|
||||
total_count: u64,
|
||||
}
|
||||
|
||||
impl<W: Write> CounterWriter<W> {
|
||||
pub fn new(wtr: W) -> CounterWriter<W> {
|
||||
CounterWriter { wtr: wtr, count: 0, total_count: 0 }
|
||||
}
|
||||
}
|
||||
|
||||
impl<W> CounterWriter<W> {
|
||||
/// Returns the total number of bytes written since construction or the
|
||||
/// last time `reset` was called.
|
||||
pub fn count(&self) -> u64 {
|
||||
self.count
|
||||
}
|
||||
|
||||
/// Returns the total number of bytes written since construction.
|
||||
pub fn total_count(&self) -> u64 {
|
||||
self.total_count + self.count
|
||||
}
|
||||
|
||||
/// Resets the number of bytes written to `0`.
|
||||
pub fn reset_count(&mut self) {
|
||||
self.total_count += self.count;
|
||||
self.count = 0;
|
||||
}
|
||||
|
||||
/// Clear resets all counting related state for this writer.
|
||||
///
|
||||
/// After this call, the total count of bytes written to the underlying
|
||||
/// writer is erased and reset.
|
||||
#[allow(dead_code)]
|
||||
pub fn clear(&mut self) {
|
||||
self.count = 0;
|
||||
self.total_count = 0;
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub fn get_ref(&self) -> &W {
|
||||
&self.wtr
|
||||
}
|
||||
|
||||
pub fn get_mut(&mut self) -> &mut W {
|
||||
&mut self.wtr
|
||||
}
|
||||
|
||||
pub fn into_inner(self) -> W {
|
||||
self.wtr
|
||||
}
|
||||
}
|
||||
|
||||
impl<W: Write> Write for CounterWriter<W> {
|
||||
fn write(&mut self, buf: &[u8]) -> Result<usize, io::Error> {
|
||||
let n = self.wtr.write(buf)?;
|
||||
self.count += n as u64;
|
||||
Ok(n)
|
||||
}
|
||||
|
||||
fn flush(&mut self) -> Result<(), io::Error> {
|
||||
self.wtr.flush()
|
||||
}
|
||||
}
|
||||
|
||||
impl<W: WriteColor> WriteColor for CounterWriter<W> {
|
||||
fn supports_color(&self) -> bool {
|
||||
self.wtr.supports_color()
|
||||
}
|
||||
|
||||
fn set_color(&mut self, spec: &ColorSpec) -> io::Result<()> {
|
||||
self.wtr.set_color(spec)
|
||||
}
|
||||
|
||||
fn reset(&mut self) -> io::Result<()> {
|
||||
self.wtr.reset()
|
||||
}
|
||||
|
||||
fn is_synchronous(&self) -> bool {
|
||||
self.wtr.is_synchronous()
|
||||
}
|
||||
}
|
||||
@@ -1,963 +0,0 @@
|
||||
use std::io::{self, Write};
|
||||
use std::path::Path;
|
||||
use std::time::Instant;
|
||||
|
||||
use grep_matcher::{Match, Matcher};
|
||||
use grep_searcher::{
|
||||
Searcher,
|
||||
Sink, SinkError, SinkContext, SinkContextKind, SinkFinish, SinkMatch,
|
||||
};
|
||||
use serde_json as json;
|
||||
|
||||
use counter::CounterWriter;
|
||||
use jsont;
|
||||
use stats::Stats;
|
||||
|
||||
/// The configuration for the JSON printer.
|
||||
///
|
||||
/// This is manipulated by the JSONBuilder and then referenced by the actual
|
||||
/// implementation. Once a printer is build, the configuration is frozen and
|
||||
/// cannot changed.
|
||||
#[derive(Debug, Clone)]
|
||||
struct Config {
|
||||
pretty: bool,
|
||||
max_matches: Option<u64>,
|
||||
always_begin_end: bool,
|
||||
}
|
||||
|
||||
impl Default for Config {
|
||||
fn default() -> Config {
|
||||
Config {
|
||||
pretty: false,
|
||||
max_matches: None,
|
||||
always_begin_end: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A builder for a JSON lines printer.
|
||||
///
|
||||
/// The builder permits configuring how the printer behaves. The JSON printer
|
||||
/// has fewer configuration options than the standard printer because it is
|
||||
/// a structured format, and the printer always attempts to find the most
|
||||
/// information possible.
|
||||
///
|
||||
/// Some configuration options, such as whether line numbers are included or
|
||||
/// whether contextual lines are shown, are drawn directly from the
|
||||
/// `grep_searcher::Searcher`'s configuration.
|
||||
///
|
||||
/// Once a `JSON` printer is built, its configuration cannot be changed.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct JSONBuilder {
|
||||
config: Config,
|
||||
}
|
||||
|
||||
impl JSONBuilder {
|
||||
/// Return a new builder for configuring the JSON printer.
|
||||
pub fn new() -> JSONBuilder {
|
||||
JSONBuilder { config: Config::default() }
|
||||
}
|
||||
|
||||
/// Create a JSON printer that writes results to the given writer.
|
||||
pub fn build<W: io::Write>(&self, wtr: W) -> JSON<W> {
|
||||
JSON {
|
||||
config: self.config.clone(),
|
||||
wtr: CounterWriter::new(wtr),
|
||||
matches: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
/// Print JSON in a pretty printed format.
|
||||
///
|
||||
/// Enabling this will no longer produce a "JSON lines" format, in that
|
||||
/// each JSON object printed may span multiple lines.
|
||||
///
|
||||
/// This is disabled by default.
|
||||
pub fn pretty(&mut self, yes: bool) -> &mut JSONBuilder {
|
||||
self.config.pretty = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the maximum amount of matches that are printed.
|
||||
///
|
||||
/// If multi line search is enabled and a match spans multiple lines, then
|
||||
/// that match is counted exactly once for the purposes of enforcing this
|
||||
/// limit, regardless of how many lines it spans.
|
||||
pub fn max_matches(&mut self, limit: Option<u64>) -> &mut JSONBuilder {
|
||||
self.config.max_matches = limit;
|
||||
self
|
||||
}
|
||||
|
||||
/// When enabled, the `begin` and `end` messages are always emitted, even
|
||||
/// when no match is found.
|
||||
///
|
||||
/// When disabled, the `begin` and `end` messages are only shown if there
|
||||
/// is at least one `match` or `context` message.
|
||||
///
|
||||
/// This is disabled by default.
|
||||
pub fn always_begin_end(&mut self, yes: bool) -> &mut JSONBuilder {
|
||||
self.config.always_begin_end = yes;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
/// The JSON printer, which emits results in a JSON lines format.
|
||||
///
|
||||
/// This type is generic over `W`, which represents any implementation of
|
||||
/// the standard library `io::Write` trait.
|
||||
///
|
||||
/// # Format
|
||||
///
|
||||
/// This section describes the JSON format used by this printer.
|
||||
///
|
||||
/// To skip the rigamarole, take a look at the
|
||||
/// [example](#example)
|
||||
/// at the end.
|
||||
///
|
||||
/// ## Overview
|
||||
///
|
||||
/// The format of this printer is the [JSON Lines](http://jsonlines.org/)
|
||||
/// format. Specifically, this printer emits a sequence of messages, where
|
||||
/// each message is encoded as a single JSON value on a single line. There are
|
||||
/// four different types of messages (and this number may expand over time):
|
||||
///
|
||||
/// * **begin** - A message that indicates a file is being searched.
|
||||
/// * **end** - A message the indicates a file is done being searched. This
|
||||
/// message also include summary statistics about the search.
|
||||
/// * **match** - A message that indicates a match was found. This includes
|
||||
/// the text and offsets of the match.
|
||||
/// * **context** - A message that indicates a contextual line was found.
|
||||
/// This includes the text of the line, along with any match information if
|
||||
/// the search was inverted.
|
||||
///
|
||||
/// Every message is encoded in the same envelope format, which includes a tag
|
||||
/// indicating the message type along with an object for the payload:
|
||||
///
|
||||
/// ```json
|
||||
/// {
|
||||
/// "type": "{begin|end|match|context}",
|
||||
/// "data": { ... }
|
||||
/// }
|
||||
/// ```
|
||||
///
|
||||
/// The message itself is encoded in the envelope's `data` key.
|
||||
///
|
||||
/// ## Text encoding
|
||||
///
|
||||
/// Before describing each message format, we first must briefly discuss text
|
||||
/// encoding, since it factors into every type of message. In particular, JSON
|
||||
/// may only be encoded in UTF-8, UTF-16 or UTF-32. For the purposes of this
|
||||
/// printer, we need only worry about UTF-8. The problem here is that searching
|
||||
/// is not limited to UTF-8 exclusively, which in turn implies that matches
|
||||
/// may be reported that contain invalid UTF-8. Moreover, this printer may
|
||||
/// also print file paths, and the encoding of file paths is itself not
|
||||
/// guarnateed to be valid UTF-8. Therefore, this printer must deal with the
|
||||
/// presence of invalid UTF-8 somehow. The printer could silently ignore such
|
||||
/// things completely, or even lossily transcode invalid UTF-8 to valid UTF-8
|
||||
/// by replacing all invalid sequences with the Unicode replacement character.
|
||||
/// However, this would prevent consumers of this format from accessing the
|
||||
/// original data in a non-lossy way.
|
||||
///
|
||||
/// Therefore, this printer will emit valid UTF-8 encoded bytes as normal
|
||||
/// JSON strings and otherwise base64 encode data that isn't valid UTF-8. To
|
||||
/// communicate whether this process occurs or not, strings are keyed by the
|
||||
/// name `text` where as arbitrary bytes are keyed by `bytes`.
|
||||
///
|
||||
/// For example, when a path is included in a message, it is formatted like so,
|
||||
/// if and only if the path is valid UTF-8:
|
||||
///
|
||||
/// ```json
|
||||
/// {
|
||||
/// "path": {
|
||||
/// "text": "/home/ubuntu/lib.rs"
|
||||
/// }
|
||||
/// }
|
||||
/// ```
|
||||
///
|
||||
/// If instead our path was `/home/ubuntu/lib\xFF.rs`, where the `\xFF` byte
|
||||
/// makes it invalid UTF-8, the path would instead be encoded like so:
|
||||
///
|
||||
/// ```json
|
||||
/// {
|
||||
/// "path": {
|
||||
/// "bytes": "L2hvbWUvdWJ1bnR1L2xpYv8ucnM="
|
||||
/// }
|
||||
/// }
|
||||
/// ```
|
||||
///
|
||||
/// This same representation is used for reporting matches as well.
|
||||
///
|
||||
/// The printer guarantees that the `text` field is used whenever the
|
||||
/// underlying bytes are valid UTF-8.
|
||||
///
|
||||
/// ## Wire format
|
||||
///
|
||||
/// This section documents the wire format emitted by this printer, starting
|
||||
/// with the four types of messages.
|
||||
///
|
||||
/// Each message has its own format, and is contained inside an envelope that
|
||||
/// indicates the type of message. The envelope has these fields:
|
||||
///
|
||||
/// * **type** - A string indicating the type of this message. It may be one
|
||||
/// of four possible strings: `begin`, `end`, `match` or `context`. This
|
||||
/// list may expand over time.
|
||||
/// * **data** - The actual message data. The format of this field depends on
|
||||
/// the value of `type`. The possible message formats are
|
||||
/// [`begin`](#message-begin),
|
||||
/// [`end`](#message-end),
|
||||
/// [`match`](#message-match),
|
||||
/// [`context`](#message-context).
|
||||
///
|
||||
/// #### Message: **begin**
|
||||
///
|
||||
/// This message indicates that a search has begun. It has these fields:
|
||||
///
|
||||
/// * **path** - An
|
||||
/// [arbitrary data object](#object-arbitrary-data)
|
||||
/// representing the file path corresponding to the search, if one is
|
||||
/// present. If no file path is available, then this field is `null`.
|
||||
///
|
||||
/// #### Message: **end**
|
||||
///
|
||||
/// This message indicates that a search has finished. It has these fields:
|
||||
///
|
||||
/// * **path** - An
|
||||
/// [arbitrary data object](#object-arbitrary-data)
|
||||
/// representing the file path corresponding to the search, if one is
|
||||
/// present. If no file path is available, then this field is `null`.
|
||||
/// * **binary_offset** - The absolute offset in the data searched
|
||||
/// corresponding to the place at which binary data was detected. If no
|
||||
/// binary data was detected (or if binary detection was disabled), then this
|
||||
/// field is `null`.
|
||||
/// * **stats** - A [`stats` object](#object-stats) that contains summary
|
||||
/// statistics for the previous search.
|
||||
///
|
||||
/// #### Message: **match**
|
||||
///
|
||||
/// This message indicates that a match has been found. A match generally
|
||||
/// corresponds to a single line of text, although it may correspond to
|
||||
/// multiple lines if the search can emit matches over multiple lines. It
|
||||
/// has these fields:
|
||||
///
|
||||
/// * **path** - An
|
||||
/// [arbitrary data object](#object-arbitrary-data)
|
||||
/// representing the file path corresponding to the search, if one is
|
||||
/// present. If no file path is available, then this field is `null`.
|
||||
/// * **lines** - An
|
||||
/// [arbitrary data object](#object-arbitrary-data)
|
||||
/// representing one or more lines contained in this match.
|
||||
/// * **line_number** - If the searcher has been configured to report line
|
||||
/// numbers, then this corresponds to the line number of the first line
|
||||
/// in `lines`. If no line numbers are available, then this is `null`.
|
||||
/// * **absolute_offset** - The absolute byte offset corresponding to the start
|
||||
/// of `lines` in the data being searched.
|
||||
/// * **submatches** - An array of [`submatch` objects](#object-submatch)
|
||||
/// corresponding to matches in `lines`. The offsets included in each
|
||||
/// `submatch` correspond to byte offsets into `lines`. (If `lines` is base64
|
||||
/// encoded, then the byte offsets correspond to the data after base64
|
||||
/// decoding.) The `submatch` objects are guaranteed to be sorted by their
|
||||
/// starting offsets. Note that it is possible for this array to be empty,
|
||||
/// for example, when searching reports inverted matches.
|
||||
///
|
||||
/// #### Message: **context**
|
||||
///
|
||||
/// This message indicates that a contextual line has been found. A contextual
|
||||
/// line is a line that doesn't contain a match, but is generally adjacent to
|
||||
/// a line that does contain a match. The precise way in which contextual lines
|
||||
/// are reported is determined by the searcher. It has these fields, which are
|
||||
/// exactly the same fields found in a [`match`](#message-match):
|
||||
///
|
||||
/// * **path** - An
|
||||
/// [arbitrary data object](#object-arbitrary-data)
|
||||
/// representing the file path corresponding to the search, if one is
|
||||
/// present. If no file path is available, then this field is `null`.
|
||||
/// * **lines** - An
|
||||
/// [arbitrary data object](#object-arbitrary-data)
|
||||
/// representing one or more lines contained in this context. This includes
|
||||
/// line terminators, if they're present.
|
||||
/// * **line_number** - If the searcher has been configured to report line
|
||||
/// numbers, then this corresponds to the line number of the first line
|
||||
/// in `lines`. If no line numbers are available, then this is `null`.
|
||||
/// * **absolute_offset** - The absolute byte offset corresponding to the start
|
||||
/// of `lines` in the data being searched.
|
||||
/// * **submatches** - An array of [`submatch` objects](#object-submatch)
|
||||
/// corresponding to matches in `lines`. The offsets included in each
|
||||
/// `submatch` correspond to byte offsets into `lines`. (If `lines` is base64
|
||||
/// encoded, then the byte offsets correspond to the data after base64
|
||||
/// decoding.) The `submatch` objects are guaranteed to be sorted by
|
||||
/// their starting offsets. Note that it is possible for this array to be
|
||||
/// non-empty, for example, when searching reports inverted matches such that
|
||||
/// the original matcher could match things in the contextual lines.
|
||||
///
|
||||
/// #### Object: **submatch**
|
||||
///
|
||||
/// This object describes submatches found within `match` or `context`
|
||||
/// messages. The `start` and `end` fields indicate the half-open interval on
|
||||
/// which the match occurs (`start` is included, but `end` is not). It is
|
||||
/// guaranteed that `start <= end`. It has these fields:
|
||||
///
|
||||
/// * **match** - An
|
||||
/// [arbitrary data object](#object-arbitrary-data)
|
||||
/// corresponding to the text in this submatch.
|
||||
/// * **start** - A byte offset indicating the start of this match. This offset
|
||||
/// is generally reported in terms of the parent object's data. For example,
|
||||
/// the `lines` field in the
|
||||
/// [`match`](#message-match) or [`context`](#message-context)
|
||||
/// messages.
|
||||
/// * **end** - A byte offset indicating the end of this match. This offset
|
||||
/// is generally reported in terms of the parent object's data. For example,
|
||||
/// the `lines` field in the
|
||||
/// [`match`](#message-match) or [`context`](#message-context)
|
||||
/// messages.
|
||||
///
|
||||
/// #### Object: **stats**
|
||||
///
|
||||
/// This object is included in messages and contains summary statistics about
|
||||
/// a search. It has these fields:
|
||||
///
|
||||
/// * **elapsed** - A [`duration` object](#object-duration) describing the
|
||||
/// length of time that elapsed while performing the search.
|
||||
/// * **searches** - The number of searches that have run. For this printer,
|
||||
/// this value is always `1`. (Implementations may emit additional message
|
||||
/// types that use this same `stats` object that represents summary
|
||||
/// statistics over multiple searches.)
|
||||
/// * **searches_with_match** - The number of searches that have run that have
|
||||
/// found at least one match. This is never more than `searches`.
|
||||
/// * **bytes_searched** - The total number of bytes that have been searched.
|
||||
/// * **bytes_printed** - The total number of bytes that have been printed.
|
||||
/// This includes everything emitted by this printer.
|
||||
/// * **matched_lines** - The total number of lines that participated in a
|
||||
/// match. When matches may contain multiple lines, then this includes every
|
||||
/// line that is part of every match.
|
||||
/// * **matches** - The total number of matches. There may be multiple matches
|
||||
/// per line. When matches may contain multiple lines, each match is counted
|
||||
/// only once, regardless of how many lines it spans.
|
||||
///
|
||||
/// #### Object: **duration**
|
||||
///
|
||||
/// This object includes a few fields for describing a duration. Two of its
|
||||
/// fields, `secs` and `nanos`, can be combined to give nanosecond precision
|
||||
/// on systems that support it. It has these fields:
|
||||
///
|
||||
/// * **secs** - A whole number of seconds indicating the length of this
|
||||
/// duration.
|
||||
/// * **nanos** - A fractional part of this duration represent by nanoseconds.
|
||||
/// If nanosecond precision isn't supported, then this is typically rounded
|
||||
/// up to the nearest number of nanoseconds.
|
||||
/// * **human** - A human readable string describing the length of the
|
||||
/// duration. The format of the string is itself unspecified.
|
||||
///
|
||||
/// #### Object: **arbitrary data**
|
||||
///
|
||||
/// This object is used whenever arbitrary data needs to be represented as a
|
||||
/// JSON value. This object contains two fields, where generally only one of
|
||||
/// the fields is present:
|
||||
///
|
||||
/// * **text** - A normal JSON string that is UTF-8 encoded. This field is
|
||||
/// populated if and only if the underlying data is valid UTF-8.
|
||||
/// * **bytes** - A normal JSON string that is a base64 encoding of the
|
||||
/// underlying bytes.
|
||||
///
|
||||
/// More information on the motivation for this representation can be seen in
|
||||
/// the section [text encoding](#text-encoding) above.
|
||||
///
|
||||
/// ## Example
|
||||
///
|
||||
/// This section shows a small example that includes all message types.
|
||||
///
|
||||
/// Here's the file we want to search, located at `/home/andrew/sherlock`:
|
||||
///
|
||||
/// ```text
|
||||
/// For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
/// Holmeses, success in the province of detective work must always
|
||||
/// be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
/// can extract a clew from a wisp of straw or a flake of cigar ash;
|
||||
/// but Doctor Watson has to have it taken out for him and dusted,
|
||||
/// and exhibited clearly, with a label attached.
|
||||
/// ```
|
||||
///
|
||||
/// Searching for `Watson` with a `before_context` of `1` with line numbers
|
||||
/// enabled shows something like this using the standard printer:
|
||||
///
|
||||
/// ```text
|
||||
/// sherlock:1:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
/// --
|
||||
/// sherlock-4-can extract a clew from a wisp of straw or a flake of cigar ash;
|
||||
/// sherlock:5:but Doctor Watson has to have it taken out for him and dusted,
|
||||
/// ```
|
||||
///
|
||||
/// Here's what the same search looks like using the JSON wire format described
|
||||
/// above, where in we show semi-prettified JSON (instead of a strict JSON
|
||||
/// Lines format), for illustrative purposes:
|
||||
///
|
||||
/// ```json
|
||||
/// {
|
||||
/// "type": "begin",
|
||||
/// "data": {
|
||||
/// "path": {"text": "/home/andrew/sherlock"}}
|
||||
/// }
|
||||
/// }
|
||||
/// {
|
||||
/// "type": "match",
|
||||
/// "data": {
|
||||
/// "path": {"text": "/home/andrew/sherlock"},
|
||||
/// "lines": {"text": "For the Doctor Watsons of this world, as opposed to the Sherlock\n"},
|
||||
/// "line_number": 1,
|
||||
/// "absolute_offset": 0,
|
||||
/// "submatches": [
|
||||
/// {"match": {"text": "Watson"}, "start": 15, "end": 21}
|
||||
/// ]
|
||||
/// }
|
||||
/// }
|
||||
/// {
|
||||
/// "type": "context",
|
||||
/// "data": {
|
||||
/// "path": {"text": "/home/andrew/sherlock"},
|
||||
/// "lines": {"text": "can extract a clew from a wisp of straw or a flake of cigar ash;\n"},
|
||||
/// "line_number": 4,
|
||||
/// "absolute_offset": 193,
|
||||
/// "submatches": []
|
||||
/// }
|
||||
/// }
|
||||
/// {
|
||||
/// "type": "match",
|
||||
/// "data": {
|
||||
/// "path": {"text": "/home/andrew/sherlock"},
|
||||
/// "lines": {"text": "but Doctor Watson has to have it taken out for him and dusted,\n"},
|
||||
/// "line_number": 5,
|
||||
/// "absolute_offset": 258,
|
||||
/// "submatches": [
|
||||
/// {"match": {"text": "Watson"}, "start": 11, "end": 17}
|
||||
/// ]
|
||||
/// }
|
||||
/// }
|
||||
/// {
|
||||
/// "type": "end",
|
||||
/// "data": {
|
||||
/// "path": {"text": "/home/andrew/sherlock"},
|
||||
/// "binary_offset": null,
|
||||
/// "stats": {
|
||||
/// "elapsed": {"secs": 0, "nanos": 36296, "human": "0.0000s"},
|
||||
/// "searches": 1,
|
||||
/// "searches_with_match": 1,
|
||||
/// "bytes_searched": 367,
|
||||
/// "bytes_printed": 1151,
|
||||
/// "matched_lines": 2,
|
||||
/// "matches": 2
|
||||
/// }
|
||||
/// }
|
||||
/// }
|
||||
/// ```
|
||||
#[derive(Debug)]
|
||||
pub struct JSON<W> {
|
||||
config: Config,
|
||||
wtr: CounterWriter<W>,
|
||||
matches: Vec<Match>,
|
||||
}
|
||||
|
||||
impl<W: io::Write> JSON<W> {
|
||||
/// Return a JSON lines printer with a default configuration that writes
|
||||
/// matches to the given writer.
|
||||
pub fn new(wtr: W) -> JSON<W> {
|
||||
JSONBuilder::new().build(wtr)
|
||||
}
|
||||
|
||||
/// Return an implementation of `Sink` for the JSON printer.
|
||||
///
|
||||
/// This does not associate the printer with a file path, which means this
|
||||
/// implementation will never print a file path along with the matches.
|
||||
pub fn sink<'s, M: Matcher>(
|
||||
&'s mut self,
|
||||
matcher: M,
|
||||
) -> JSONSink<'static, 's, M, W> {
|
||||
JSONSink {
|
||||
matcher: matcher,
|
||||
json: self,
|
||||
path: None,
|
||||
start_time: Instant::now(),
|
||||
match_count: 0,
|
||||
after_context_remaining: 0,
|
||||
binary_byte_offset: None,
|
||||
begin_printed: false,
|
||||
stats: Stats::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Return an implementation of `Sink` associated with a file path.
|
||||
///
|
||||
/// When the printer is associated with a path, then it may, depending on
|
||||
/// its configuration, print the path along with the matches found.
|
||||
pub fn sink_with_path<'p, 's, M, P>(
|
||||
&'s mut self,
|
||||
matcher: M,
|
||||
path: &'p P,
|
||||
) -> JSONSink<'p, 's, M, W>
|
||||
where M: Matcher,
|
||||
P: ?Sized + AsRef<Path>,
|
||||
{
|
||||
JSONSink {
|
||||
matcher: matcher,
|
||||
json: self,
|
||||
path: Some(path.as_ref()),
|
||||
start_time: Instant::now(),
|
||||
match_count: 0,
|
||||
after_context_remaining: 0,
|
||||
binary_byte_offset: None,
|
||||
begin_printed: false,
|
||||
stats: Stats::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Write the given message followed by a new line. The new line is
|
||||
/// determined from the configuration of the given searcher.
|
||||
fn write_message(&mut self, message: &jsont::Message) -> io::Result<()> {
|
||||
if self.config.pretty {
|
||||
json::to_writer_pretty(&mut self.wtr, message)?;
|
||||
} else {
|
||||
json::to_writer(&mut self.wtr, message)?;
|
||||
}
|
||||
self.wtr.write(&[b'\n'])?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl<W> JSON<W> {
|
||||
/// Returns true if and only if this printer has written at least one byte
|
||||
/// to the underlying writer during any of the previous searches.
|
||||
pub fn has_written(&self) -> bool {
|
||||
self.wtr.total_count() > 0
|
||||
}
|
||||
|
||||
/// Return a mutable reference to the underlying writer.
|
||||
pub fn get_mut(&mut self) -> &mut W {
|
||||
self.wtr.get_mut()
|
||||
}
|
||||
|
||||
/// Consume this printer and return back ownership of the underlying
|
||||
/// writer.
|
||||
pub fn into_inner(self) -> W {
|
||||
self.wtr.into_inner()
|
||||
}
|
||||
}
|
||||
|
||||
/// An implementation of `Sink` associated with a matcher and an optional file
|
||||
/// path for the JSON printer.
|
||||
///
|
||||
/// This type is generic over a few type parameters:
|
||||
///
|
||||
/// * `'p` refers to the lifetime of the file path, if one is provided. When
|
||||
/// no file path is given, then this is `'static`.
|
||||
/// * `'s` refers to the lifetime of the
|
||||
/// [`JSON`](struct.JSON.html)
|
||||
/// printer that this type borrows.
|
||||
/// * `M` refers to the type of matcher used by
|
||||
/// `grep_searcher::Searcher` that is reporting results to this sink.
|
||||
/// * `W` refers to the underlying writer that this printer is writing its
|
||||
/// output to.
|
||||
#[derive(Debug)]
|
||||
pub struct JSONSink<'p, 's, M: Matcher, W: 's> {
|
||||
matcher: M,
|
||||
json: &'s mut JSON<W>,
|
||||
path: Option<&'p Path>,
|
||||
start_time: Instant,
|
||||
match_count: u64,
|
||||
after_context_remaining: u64,
|
||||
binary_byte_offset: Option<u64>,
|
||||
begin_printed: bool,
|
||||
stats: Stats,
|
||||
}
|
||||
|
||||
impl<'p, 's, M: Matcher, W: io::Write> JSONSink<'p, 's, M, W> {
|
||||
/// Returns true if and only if this printer received a match in the
|
||||
/// previous search.
|
||||
///
|
||||
/// This is unaffected by the result of searches before the previous
|
||||
/// search.
|
||||
pub fn has_match(&self) -> bool {
|
||||
self.match_count > 0
|
||||
}
|
||||
|
||||
/// Return the total number of matches reported to this sink.
|
||||
///
|
||||
/// This corresponds to the number of times `Sink::matched` is called.
|
||||
pub fn match_count(&self) -> u64 {
|
||||
self.match_count
|
||||
}
|
||||
|
||||
/// If binary data was found in the previous search, this returns the
|
||||
/// offset at which the binary data was first detected.
|
||||
///
|
||||
/// The offset returned is an absolute offset relative to the entire
|
||||
/// set of bytes searched.
|
||||
///
|
||||
/// This is unaffected by the result of searches before the previous
|
||||
/// search. e.g., If the search prior to the previous search found binary
|
||||
/// data but the previous search found no binary data, then this will
|
||||
/// return `None`.
|
||||
pub fn binary_byte_offset(&self) -> Option<u64> {
|
||||
self.binary_byte_offset
|
||||
}
|
||||
|
||||
/// Return a reference to the stats produced by the printer for all
|
||||
/// searches executed on this sink.
|
||||
pub fn stats(&self) -> &Stats {
|
||||
&self.stats
|
||||
}
|
||||
|
||||
/// Execute the matcher over the given bytes and record the match
|
||||
/// locations if the current configuration demands match granularity.
|
||||
fn record_matches(&mut self, bytes: &[u8]) -> io::Result<()> {
|
||||
self.json.matches.clear();
|
||||
// If printing requires knowing the location of each individual match,
|
||||
// then compute and stored those right now for use later. While this
|
||||
// adds an extra copy for storing the matches, we do amortize the
|
||||
// allocation for it and this greatly simplifies the printing logic to
|
||||
// the extent that it's easy to ensure that we never do more than
|
||||
// one search to find the matches.
|
||||
let matches = &mut self.json.matches;
|
||||
self.matcher.find_iter(bytes, |m| {
|
||||
matches.push(m);
|
||||
true
|
||||
}).map_err(io::Error::error_message)?;
|
||||
// Don't report empty matches appearing at the end of the bytes.
|
||||
if !matches.is_empty()
|
||||
&& matches.last().unwrap().is_empty()
|
||||
&& matches.last().unwrap().start() >= bytes.len()
|
||||
{
|
||||
matches.pop().unwrap();
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Returns true if this printer should quit.
|
||||
///
|
||||
/// This implements the logic for handling quitting after seeing a certain
|
||||
/// amount of matches. In most cases, the logic is simple, but we must
|
||||
/// permit all "after" contextual lines to print after reaching the limit.
|
||||
fn should_quit(&self) -> bool {
|
||||
let limit = match self.json.config.max_matches {
|
||||
None => return false,
|
||||
Some(limit) => limit,
|
||||
};
|
||||
if self.match_count < limit {
|
||||
return false;
|
||||
}
|
||||
self.after_context_remaining == 0
|
||||
}
|
||||
|
||||
/// Write the "begin" message.
|
||||
fn write_begin_message(&mut self) -> io::Result<()> {
|
||||
if self.begin_printed {
|
||||
return Ok(());
|
||||
}
|
||||
let msg = jsont::Message::Begin(jsont::Begin {
|
||||
path: self.path,
|
||||
});
|
||||
self.json.write_message(&msg)?;
|
||||
self.begin_printed = true;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl<'p, 's, M: Matcher, W: io::Write> Sink for JSONSink<'p, 's, M, W> {
|
||||
type Error = io::Error;
|
||||
|
||||
fn matched(
|
||||
&mut self,
|
||||
searcher: &Searcher,
|
||||
mat: &SinkMatch,
|
||||
) -> Result<bool, io::Error> {
|
||||
self.write_begin_message()?;
|
||||
|
||||
self.match_count += 1;
|
||||
self.after_context_remaining = searcher.after_context() as u64;
|
||||
self.record_matches(mat.bytes())?;
|
||||
self.stats.add_matches(self.json.matches.len() as u64);
|
||||
self.stats.add_matched_lines(mat.lines().count() as u64);
|
||||
|
||||
let submatches = SubMatches::new(mat.bytes(), &self.json.matches);
|
||||
let msg = jsont::Message::Match(jsont::Match {
|
||||
path: self.path,
|
||||
lines: mat.bytes(),
|
||||
line_number: mat.line_number(),
|
||||
absolute_offset: mat.absolute_byte_offset(),
|
||||
submatches: submatches.as_slice(),
|
||||
});
|
||||
self.json.write_message(&msg)?;
|
||||
Ok(!self.should_quit())
|
||||
}
|
||||
|
||||
fn context(
|
||||
&mut self,
|
||||
searcher: &Searcher,
|
||||
ctx: &SinkContext,
|
||||
) -> Result<bool, io::Error> {
|
||||
self.write_begin_message()?;
|
||||
self.json.matches.clear();
|
||||
|
||||
if ctx.kind() == &SinkContextKind::After {
|
||||
self.after_context_remaining =
|
||||
self.after_context_remaining.saturating_sub(1);
|
||||
}
|
||||
let submatches =
|
||||
if searcher.invert_match() {
|
||||
self.record_matches(ctx.bytes())?;
|
||||
SubMatches::new(ctx.bytes(), &self.json.matches)
|
||||
} else {
|
||||
SubMatches::empty()
|
||||
};
|
||||
let msg = jsont::Message::Context(jsont::Context {
|
||||
path: self.path,
|
||||
lines: ctx.bytes(),
|
||||
line_number: ctx.line_number(),
|
||||
absolute_offset: ctx.absolute_byte_offset(),
|
||||
submatches: submatches.as_slice(),
|
||||
});
|
||||
self.json.write_message(&msg)?;
|
||||
Ok(!self.should_quit())
|
||||
}
|
||||
|
||||
fn begin(
|
||||
&mut self,
|
||||
_searcher: &Searcher,
|
||||
) -> Result<bool, io::Error> {
|
||||
self.json.wtr.reset_count();
|
||||
self.start_time = Instant::now();
|
||||
self.match_count = 0;
|
||||
self.after_context_remaining = 0;
|
||||
self.binary_byte_offset = None;
|
||||
if self.json.config.max_matches == Some(0) {
|
||||
return Ok(false);
|
||||
}
|
||||
|
||||
if !self.json.config.always_begin_end {
|
||||
return Ok(true);
|
||||
}
|
||||
self.write_begin_message()?;
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
fn finish(
|
||||
&mut self,
|
||||
_searcher: &Searcher,
|
||||
finish: &SinkFinish,
|
||||
) -> Result<(), io::Error> {
|
||||
if !self.begin_printed {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
self.binary_byte_offset = finish.binary_byte_offset();
|
||||
self.stats.add_elapsed(self.start_time.elapsed());
|
||||
self.stats.add_searches(1);
|
||||
if self.match_count > 0 {
|
||||
self.stats.add_searches_with_match(1);
|
||||
}
|
||||
self.stats.add_bytes_searched(finish.byte_count());
|
||||
self.stats.add_bytes_printed(self.json.wtr.count());
|
||||
|
||||
let msg = jsont::Message::End(jsont::End {
|
||||
path: self.path,
|
||||
binary_offset: finish.binary_byte_offset(),
|
||||
stats: self.stats.clone(),
|
||||
});
|
||||
self.json.write_message(&msg)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// SubMatches represents a set of matches in a contiguous range of bytes.
|
||||
///
|
||||
/// A simpler representation for this would just simply be `Vec<SubMatch>`,
|
||||
/// but the common case is exactly one match per range of bytes, which we
|
||||
/// specialize here using a fixed size array without any allocation.
|
||||
enum SubMatches<'a> {
|
||||
Empty,
|
||||
Small([jsont::SubMatch<'a>; 1]),
|
||||
Big(Vec<jsont::SubMatch<'a>>),
|
||||
}
|
||||
|
||||
impl<'a> SubMatches<'a> {
|
||||
/// Create a new set of match ranges from a set of matches and the
|
||||
/// corresponding bytes that those matches apply to.
|
||||
fn new(bytes: &'a[u8], matches: &[Match]) -> SubMatches<'a> {
|
||||
if matches.len() == 1 {
|
||||
let mat = matches[0];
|
||||
SubMatches::Small([jsont::SubMatch {
|
||||
m: &bytes[mat],
|
||||
start: mat.start(),
|
||||
end: mat.end(),
|
||||
}])
|
||||
} else {
|
||||
let mut match_ranges = vec![];
|
||||
for &mat in matches {
|
||||
match_ranges.push(jsont::SubMatch {
|
||||
m: &bytes[mat],
|
||||
start: mat.start(),
|
||||
end: mat.end(),
|
||||
});
|
||||
}
|
||||
SubMatches::Big(match_ranges)
|
||||
}
|
||||
}
|
||||
|
||||
/// Create an empty set of match ranges.
|
||||
fn empty() -> SubMatches<'static> {
|
||||
SubMatches::Empty
|
||||
}
|
||||
|
||||
/// Return this set of match ranges as a slice.
|
||||
fn as_slice(&self) -> &[jsont::SubMatch] {
|
||||
match *self {
|
||||
SubMatches::Empty => &[],
|
||||
SubMatches::Small(ref x) => x,
|
||||
SubMatches::Big(ref x) => x,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use grep_regex::{RegexMatcher, RegexMatcherBuilder};
|
||||
use grep_matcher::LineTerminator;
|
||||
use grep_searcher::SearcherBuilder;
|
||||
|
||||
use super::{JSON, JSONBuilder};
|
||||
|
||||
const SHERLOCK: &'static [u8] = b"\
|
||||
For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
Holmeses, success in the province of detective work must always
|
||||
be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
can extract a clew from a wisp of straw or a flake of cigar ash;
|
||||
but Doctor Watson has to have it taken out for him and dusted,
|
||||
and exhibited clearly, with a label attached.
|
||||
";
|
||||
|
||||
fn printer_contents(
|
||||
printer: &mut JSON<Vec<u8>>,
|
||||
) -> String {
|
||||
String::from_utf8(printer.get_mut().to_owned()).unwrap()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn binary_detection() {
|
||||
use grep_searcher::BinaryDetection;
|
||||
|
||||
const BINARY: &'static [u8] = b"\
|
||||
For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
Holmeses, success in the province of detective work must always
|
||||
be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
can extract a clew \x00 from a wisp of straw or a flake of cigar ash;
|
||||
but Doctor Watson has to have it taken out for him and dusted,
|
||||
and exhibited clearly, with a label attached.\
|
||||
";
|
||||
|
||||
let matcher = RegexMatcher::new(
|
||||
r"Watson"
|
||||
).unwrap();
|
||||
let mut printer = JSONBuilder::new()
|
||||
.build(vec![]);
|
||||
SearcherBuilder::new()
|
||||
.binary_detection(BinaryDetection::quit(b'\x00'))
|
||||
.heap_limit(Some(80))
|
||||
.build()
|
||||
.search_reader(&matcher, BINARY, printer.sink(&matcher))
|
||||
.unwrap();
|
||||
let got = printer_contents(&mut printer);
|
||||
|
||||
assert_eq!(got.lines().count(), 3);
|
||||
let last = got.lines().last().unwrap();
|
||||
assert!(last.contains(r#""binary_offset":212,"#));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn max_matches() {
|
||||
let matcher = RegexMatcher::new(
|
||||
r"Watson"
|
||||
).unwrap();
|
||||
let mut printer = JSONBuilder::new()
|
||||
.max_matches(Some(1))
|
||||
.build(vec![]);
|
||||
SearcherBuilder::new()
|
||||
.build()
|
||||
.search_reader(&matcher, SHERLOCK, printer.sink(&matcher))
|
||||
.unwrap();
|
||||
let got = printer_contents(&mut printer);
|
||||
|
||||
assert_eq!(got.lines().count(), 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_match() {
|
||||
let matcher = RegexMatcher::new(
|
||||
r"DOES NOT MATCH"
|
||||
).unwrap();
|
||||
let mut printer = JSONBuilder::new()
|
||||
.build(vec![]);
|
||||
SearcherBuilder::new()
|
||||
.build()
|
||||
.search_reader(&matcher, SHERLOCK, printer.sink(&matcher))
|
||||
.unwrap();
|
||||
let got = printer_contents(&mut printer);
|
||||
|
||||
assert!(got.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn always_begin_end_no_match() {
|
||||
let matcher = RegexMatcher::new(
|
||||
r"DOES NOT MATCH"
|
||||
).unwrap();
|
||||
let mut printer = JSONBuilder::new()
|
||||
.always_begin_end(true)
|
||||
.build(vec![]);
|
||||
SearcherBuilder::new()
|
||||
.build()
|
||||
.search_reader(&matcher, SHERLOCK, printer.sink(&matcher))
|
||||
.unwrap();
|
||||
let got = printer_contents(&mut printer);
|
||||
|
||||
assert_eq!(got.lines().count(), 2);
|
||||
assert!(got.contains("begin") && got.contains("end"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn missing_crlf() {
|
||||
let haystack = "test\r\n".as_bytes();
|
||||
|
||||
let matcher = RegexMatcherBuilder::new()
|
||||
.build("test")
|
||||
.unwrap();
|
||||
let mut printer = JSONBuilder::new()
|
||||
.build(vec![]);
|
||||
SearcherBuilder::new()
|
||||
.build()
|
||||
.search_reader(&matcher, haystack, printer.sink(&matcher))
|
||||
.unwrap();
|
||||
let got = printer_contents(&mut printer);
|
||||
assert_eq!(got.lines().count(), 3);
|
||||
assert!(
|
||||
got.lines().nth(1).unwrap().contains(r"test\r\n"),
|
||||
r"missing 'test\r\n' in '{}'",
|
||||
got.lines().nth(1).unwrap(),
|
||||
);
|
||||
|
||||
let matcher = RegexMatcherBuilder::new()
|
||||
.crlf(true)
|
||||
.build("test")
|
||||
.unwrap();
|
||||
let mut printer = JSONBuilder::new()
|
||||
.build(vec![]);
|
||||
SearcherBuilder::new()
|
||||
.line_terminator(LineTerminator::crlf())
|
||||
.build()
|
||||
.search_reader(&matcher, haystack, printer.sink(&matcher))
|
||||
.unwrap();
|
||||
let got = printer_contents(&mut printer);
|
||||
assert_eq!(got.lines().count(), 3);
|
||||
assert!(
|
||||
got.lines().nth(1).unwrap().contains(r"test\r\n"),
|
||||
r"missing 'test\r\n' in '{}'",
|
||||
got.lines().nth(1).unwrap(),
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -1,147 +0,0 @@
|
||||
// This module defines the types we use for JSON serialization. We specifically
|
||||
// omit deserialization, partially because there isn't a clear use case for
|
||||
// them at this time, but also because deserialization will complicate things.
|
||||
// Namely, the types below are designed in a way that permits JSON
|
||||
// serialization with little or no allocation. Allocation is often quite
|
||||
// convenient for deserialization however, so these types would become a bit
|
||||
// more complex.
|
||||
|
||||
use std::borrow::Cow;
|
||||
use std::path::Path;
|
||||
use std::str;
|
||||
|
||||
use base64;
|
||||
use serde::{Serialize, Serializer};
|
||||
|
||||
use stats::Stats;
|
||||
|
||||
#[derive(Serialize)]
|
||||
#[serde(tag = "type", content = "data")]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum Message<'a> {
|
||||
Begin(Begin<'a>),
|
||||
End(End<'a>),
|
||||
Match(Match<'a>),
|
||||
Context(Context<'a>),
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub struct Begin<'a> {
|
||||
#[serde(serialize_with = "ser_path")]
|
||||
pub path: Option<&'a Path>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub struct End<'a> {
|
||||
#[serde(serialize_with = "ser_path")]
|
||||
pub path: Option<&'a Path>,
|
||||
pub binary_offset: Option<u64>,
|
||||
pub stats: Stats,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub struct Match<'a> {
|
||||
#[serde(serialize_with = "ser_path")]
|
||||
pub path: Option<&'a Path>,
|
||||
#[serde(serialize_with = "ser_bytes")]
|
||||
pub lines: &'a [u8],
|
||||
pub line_number: Option<u64>,
|
||||
pub absolute_offset: u64,
|
||||
pub submatches: &'a [SubMatch<'a>],
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub struct Context<'a> {
|
||||
#[serde(serialize_with = "ser_path")]
|
||||
pub path: Option<&'a Path>,
|
||||
#[serde(serialize_with = "ser_bytes")]
|
||||
pub lines: &'a [u8],
|
||||
pub line_number: Option<u64>,
|
||||
pub absolute_offset: u64,
|
||||
pub submatches: &'a [SubMatch<'a>],
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub struct SubMatch<'a> {
|
||||
#[serde(rename = "match")]
|
||||
#[serde(serialize_with = "ser_bytes")]
|
||||
pub m: &'a [u8],
|
||||
pub start: usize,
|
||||
pub end: usize,
|
||||
}
|
||||
|
||||
/// Data represents things that look like strings, but may actually not be
|
||||
/// valid UTF-8. To handle this, `Data` is serialized as an object with one
|
||||
/// of two keys: `text` (for valid UTF-8) or `bytes` (for invalid UTF-8).
|
||||
///
|
||||
/// The happy path is valid UTF-8, which streams right through as-is, since
|
||||
/// it is natively supported by JSON. When invalid UTF-8 is found, then it is
|
||||
/// represented as arbitrary bytes and base64 encoded.
|
||||
#[derive(Clone, Debug, Hash, PartialEq, Eq, Serialize)]
|
||||
#[serde(untagged)]
|
||||
enum Data<'a> {
|
||||
Text { text: Cow<'a, str> },
|
||||
Bytes {
|
||||
#[serde(serialize_with = "to_base64")]
|
||||
bytes: &'a [u8],
|
||||
},
|
||||
}
|
||||
|
||||
impl<'a> Data<'a> {
|
||||
fn from_bytes(bytes: &[u8]) -> Data {
|
||||
match str::from_utf8(bytes) {
|
||||
Ok(text) => Data::Text { text: Cow::Borrowed(text) },
|
||||
Err(_) => Data::Bytes { bytes },
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
fn from_path(path: &Path) -> Data {
|
||||
use std::os::unix::ffi::OsStrExt;
|
||||
|
||||
match path.to_str() {
|
||||
Some(text) => Data::Text { text: Cow::Borrowed(text) },
|
||||
None => Data::Bytes { bytes: path.as_os_str().as_bytes() },
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(unix))]
|
||||
fn from_path(path: &Path) -> Data {
|
||||
// Using lossy conversion means some paths won't round trip precisely,
|
||||
// but it's not clear what we should actually do. Serde rejects
|
||||
// non-UTF-8 paths, and OsStr's are serialized as a sequence of UTF-16
|
||||
// code units on Windows. Neither seem appropriate for this use case,
|
||||
// so we do the easy thing for now.
|
||||
Data::Text { text: path.to_string_lossy() }
|
||||
}
|
||||
}
|
||||
|
||||
fn to_base64<T, S>(
|
||||
bytes: T,
|
||||
ser: S,
|
||||
) -> Result<S::Ok, S::Error>
|
||||
where T: AsRef<[u8]>,
|
||||
S: Serializer
|
||||
{
|
||||
ser.serialize_str(&base64::encode(&bytes))
|
||||
}
|
||||
|
||||
fn ser_bytes<T, S>(
|
||||
bytes: T,
|
||||
ser: S,
|
||||
) -> Result<S::Ok, S::Error>
|
||||
where T: AsRef<[u8]>,
|
||||
S: Serializer
|
||||
{
|
||||
Data::from_bytes(bytes.as_ref()).serialize(ser)
|
||||
}
|
||||
|
||||
fn ser_path<P, S>(
|
||||
path: &Option<P>,
|
||||
ser: S,
|
||||
) -> Result<S::Ok, S::Error>
|
||||
where P: AsRef<Path>,
|
||||
S: Serializer
|
||||
{
|
||||
path.as_ref().map(|p| Data::from_path(p.as_ref())).serialize(ser)
|
||||
}
|
||||
@@ -1,107 +0,0 @@
|
||||
/*!
|
||||
This crate provides featureful and fast printers that interoperate with the
|
||||
[`grep-searcher`](https://docs.rs/grep-searcher)
|
||||
crate.
|
||||
|
||||
# Brief overview
|
||||
|
||||
The [`Standard`](struct.Standard.html) printer shows results in a human
|
||||
readable format, and is modeled after the formats used by standard grep-like
|
||||
tools. Features include, but are not limited to, cross platform terminal
|
||||
coloring, search & replace, multi-line result handling and reporting summary
|
||||
statistics.
|
||||
|
||||
The [`JSON`](struct.JSON.html) printer shows results in a machine readable
|
||||
format. To facilitate a stream of search results, the format uses
|
||||
[JSON Lines](http://jsonlines.org/)
|
||||
by emitting a series of messages as search results are found.
|
||||
|
||||
The [`Summary`](struct.Summary.html) printer shows *aggregate* results for a
|
||||
single search in a human readable format, and is modeled after similar formats
|
||||
found in standard grep-like tools. This printer is useful for showing the total
|
||||
number of matches and/or printing file paths that either contain or don't
|
||||
contain matches.
|
||||
|
||||
# Example
|
||||
|
||||
This example shows how to create a "standard" printer and execute a search.
|
||||
|
||||
```
|
||||
extern crate grep_regex;
|
||||
extern crate grep_printer;
|
||||
extern crate grep_searcher;
|
||||
|
||||
use std::error::Error;
|
||||
|
||||
use grep_regex::RegexMatcher;
|
||||
use grep_printer::Standard;
|
||||
use grep_searcher::Searcher;
|
||||
|
||||
const SHERLOCK: &'static [u8] = b"\
|
||||
For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
Holmeses, success in the province of detective work must always
|
||||
be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
can extract a clew from a wisp of straw or a flake of cigar ash;
|
||||
but Doctor Watson has to have it taken out for him and dusted,
|
||||
and exhibited clearly, with a label attached.
|
||||
";
|
||||
|
||||
# fn main() { example().unwrap(); }
|
||||
fn example() -> Result<(), Box<Error>> {
|
||||
let matcher = RegexMatcher::new(r"Sherlock")?;
|
||||
let mut printer = Standard::new_no_color(vec![]);
|
||||
Searcher::new().search_slice(&matcher, SHERLOCK, printer.sink(&matcher))?;
|
||||
|
||||
// into_inner gives us back the underlying writer we provided to
|
||||
// new_no_color, which is wrapped in a termcolor::NoColor. Thus, a second
|
||||
// into_inner gives us back the actual buffer.
|
||||
let output = String::from_utf8(printer.into_inner().into_inner())?;
|
||||
let expected = "\
|
||||
1:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
3:be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
";
|
||||
assert_eq!(output, expected);
|
||||
Ok(())
|
||||
}
|
||||
```
|
||||
*/
|
||||
|
||||
#![deny(missing_docs)]
|
||||
|
||||
#[cfg(feature = "serde1")]
|
||||
extern crate base64;
|
||||
extern crate bstr;
|
||||
extern crate grep_matcher;
|
||||
#[cfg(test)]
|
||||
extern crate grep_regex;
|
||||
extern crate grep_searcher;
|
||||
#[cfg(feature = "serde1")]
|
||||
extern crate serde;
|
||||
#[cfg(feature = "serde1")]
|
||||
#[macro_use]
|
||||
extern crate serde_derive;
|
||||
#[cfg(feature = "serde1")]
|
||||
extern crate serde_json;
|
||||
extern crate termcolor;
|
||||
|
||||
pub use color::{ColorError, ColorSpecs, UserColorSpec, default_color_specs};
|
||||
#[cfg(feature = "serde1")]
|
||||
pub use json::{JSON, JSONBuilder, JSONSink};
|
||||
pub use standard::{Standard, StandardBuilder, StandardSink};
|
||||
pub use stats::Stats;
|
||||
pub use summary::{Summary, SummaryBuilder, SummaryKind, SummarySink};
|
||||
pub use util::PrinterPath;
|
||||
|
||||
#[macro_use]
|
||||
mod macros;
|
||||
|
||||
mod color;
|
||||
mod counter;
|
||||
#[cfg(feature = "serde1")]
|
||||
mod json;
|
||||
#[cfg(feature = "serde1")]
|
||||
mod jsont;
|
||||
mod standard;
|
||||
mod stats;
|
||||
mod summary;
|
||||
mod util;
|
||||
@@ -1,24 +0,0 @@
|
||||
/// Like assert_eq, but nicer output for long strings.
|
||||
#[cfg(test)]
|
||||
#[macro_export]
|
||||
macro_rules! assert_eq_printed {
|
||||
($expected:expr, $got:expr) => {
|
||||
let expected = &*$expected;
|
||||
let got = &*$got;
|
||||
if expected != got {
|
||||
panic!("
|
||||
printed outputs differ!
|
||||
|
||||
expected:
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
{}
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
got:
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
{}
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
", expected, got);
|
||||
}
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,147 +0,0 @@
|
||||
use std::ops::{Add, AddAssign};
|
||||
use std::time::Duration;
|
||||
|
||||
use util::NiceDuration;
|
||||
|
||||
/// Summary statistics produced at the end of a search.
|
||||
///
|
||||
/// When statistics are reported by a printer, they correspond to all searches
|
||||
/// executed with that printer.
|
||||
#[derive(Clone, Debug, Default, PartialEq, Eq)]
|
||||
#[cfg_attr(feature = "serde1", derive(Serialize))]
|
||||
pub struct Stats {
|
||||
elapsed: NiceDuration,
|
||||
searches: u64,
|
||||
searches_with_match: u64,
|
||||
bytes_searched: u64,
|
||||
bytes_printed: u64,
|
||||
matched_lines: u64,
|
||||
matches: u64,
|
||||
}
|
||||
|
||||
impl Add for Stats {
|
||||
type Output = Stats;
|
||||
|
||||
fn add(self, rhs: Stats) -> Stats {
|
||||
self + &rhs
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Add<&'a Stats> for Stats {
|
||||
type Output = Stats;
|
||||
|
||||
fn add(self, rhs: &'a Stats) -> Stats {
|
||||
Stats {
|
||||
elapsed: NiceDuration(self.elapsed.0 + rhs.elapsed.0),
|
||||
searches: self.searches + rhs.searches,
|
||||
searches_with_match:
|
||||
self.searches_with_match + rhs.searches_with_match,
|
||||
bytes_searched: self.bytes_searched + rhs.bytes_searched,
|
||||
bytes_printed: self.bytes_printed + rhs.bytes_printed,
|
||||
matched_lines: self.matched_lines + rhs.matched_lines,
|
||||
matches: self.matches + rhs.matches,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl AddAssign for Stats {
|
||||
fn add_assign(&mut self, rhs: Stats) {
|
||||
*self += &rhs;
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> AddAssign<&'a Stats> for Stats {
|
||||
fn add_assign(&mut self, rhs: &'a Stats) {
|
||||
self.elapsed.0 += rhs.elapsed.0;
|
||||
self.searches += rhs.searches;
|
||||
self.searches_with_match += rhs.searches_with_match;
|
||||
self.bytes_searched += rhs.bytes_searched;
|
||||
self.bytes_printed += rhs.bytes_printed;
|
||||
self.matched_lines += rhs.matched_lines;
|
||||
self.matches += rhs.matches;
|
||||
}
|
||||
}
|
||||
|
||||
impl Stats {
|
||||
/// Return a new value for tracking aggregate statistics across searches.
|
||||
///
|
||||
/// All statistics are set to `0`.
|
||||
pub fn new() -> Stats {
|
||||
Stats::default()
|
||||
}
|
||||
|
||||
/// Return the total amount of time elapsed.
|
||||
pub fn elapsed(&self) -> Duration {
|
||||
self.elapsed.0
|
||||
}
|
||||
|
||||
/// Return the total number of searches executed.
|
||||
pub fn searches(&self) -> u64 {
|
||||
self.searches
|
||||
}
|
||||
|
||||
/// Return the total number of searches that found at least one match.
|
||||
pub fn searches_with_match(&self) -> u64 {
|
||||
self.searches_with_match
|
||||
}
|
||||
|
||||
/// Return the total number of bytes searched.
|
||||
pub fn bytes_searched(&self) -> u64 {
|
||||
self.bytes_searched
|
||||
}
|
||||
|
||||
/// Return the total number of bytes printed.
|
||||
pub fn bytes_printed(&self) -> u64 {
|
||||
self.bytes_printed
|
||||
}
|
||||
|
||||
/// Return the total number of lines that participated in a match.
|
||||
///
|
||||
/// When matches may contain multiple lines then this includes every line
|
||||
/// that is part of every match.
|
||||
pub fn matched_lines(&self) -> u64 {
|
||||
self.matched_lines
|
||||
}
|
||||
|
||||
/// Return the total number of matches.
|
||||
///
|
||||
/// There may be multiple matches per line.
|
||||
pub fn matches(&self) -> u64 {
|
||||
self.matches
|
||||
}
|
||||
|
||||
/// Add to the elapsed time.
|
||||
pub fn add_elapsed(&mut self, duration: Duration) {
|
||||
self.elapsed.0 += duration;
|
||||
}
|
||||
|
||||
/// Add to the number of searches executed.
|
||||
pub fn add_searches(&mut self, n: u64) {
|
||||
self.searches += n;
|
||||
}
|
||||
|
||||
/// Add to the number of searches that found at least one match.
|
||||
pub fn add_searches_with_match(&mut self, n: u64) {
|
||||
self.searches_with_match += n;
|
||||
}
|
||||
|
||||
/// Add to the total number of bytes searched.
|
||||
pub fn add_bytes_searched(&mut self, n: u64) {
|
||||
self.bytes_searched += n;
|
||||
}
|
||||
|
||||
/// Add to the total number of bytes printed.
|
||||
pub fn add_bytes_printed(&mut self, n: u64) {
|
||||
self.bytes_printed += n;
|
||||
}
|
||||
|
||||
/// Add to the total number of lines that participated in a match.
|
||||
pub fn add_matched_lines(&mut self, n: u64) {
|
||||
self.matched_lines += n;
|
||||
}
|
||||
|
||||
/// Add to the total number of matches.
|
||||
pub fn add_matches(&mut self, n: u64) {
|
||||
self.matches += n;
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,368 +0,0 @@
|
||||
use std::borrow::Cow;
|
||||
use std::fmt;
|
||||
use std::io;
|
||||
use std::path::Path;
|
||||
use std::time;
|
||||
|
||||
use bstr::{BStr, BString};
|
||||
use grep_matcher::{Captures, LineTerminator, Match, Matcher};
|
||||
use grep_searcher::{
|
||||
LineIter,
|
||||
SinkError, SinkContext, SinkContextKind, SinkMatch,
|
||||
};
|
||||
#[cfg(feature = "serde1")]
|
||||
use serde::{Serialize, Serializer};
|
||||
|
||||
/// A type for handling replacements while amortizing allocation.
|
||||
pub struct Replacer<M: Matcher> {
|
||||
space: Option<Space<M>>,
|
||||
}
|
||||
|
||||
struct Space<M: Matcher> {
|
||||
/// The place to store capture locations.
|
||||
caps: M::Captures,
|
||||
/// The place to write a replacement to.
|
||||
dst: Vec<u8>,
|
||||
/// The place to store match offsets in terms of `dst`.
|
||||
matches: Vec<Match>,
|
||||
}
|
||||
|
||||
impl<M: Matcher> fmt::Debug for Replacer<M> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
let (dst, matches) = self.replacement().unwrap_or((&[], &[]));
|
||||
f.debug_struct("Replacer")
|
||||
.field("dst", &dst)
|
||||
.field("matches", &matches)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl<M: Matcher> Replacer<M> {
|
||||
/// Create a new replacer for use with a particular matcher.
|
||||
///
|
||||
/// This constructor does not allocate. Instead, space for dealing with
|
||||
/// replacements is allocated lazily only when needed.
|
||||
pub fn new() -> Replacer<M> {
|
||||
Replacer { space: None }
|
||||
}
|
||||
|
||||
/// Executes a replacement on the given subject string by replacing all
|
||||
/// matches with the given replacement. To access the result of the
|
||||
/// replacement, use the `replacement` method.
|
||||
///
|
||||
/// This can fail if the underlying matcher reports an error.
|
||||
pub fn replace_all<'a>(
|
||||
&'a mut self,
|
||||
matcher: &M,
|
||||
subject: &[u8],
|
||||
replacement: &[u8],
|
||||
) -> io::Result<()> {
|
||||
{
|
||||
let &mut Space {
|
||||
ref mut dst,
|
||||
ref mut caps,
|
||||
ref mut matches,
|
||||
} = self.allocate(matcher)?;
|
||||
dst.clear();
|
||||
matches.clear();
|
||||
|
||||
matcher.replace_with_captures(
|
||||
subject,
|
||||
caps,
|
||||
dst,
|
||||
|caps, dst| {
|
||||
let start = dst.len();
|
||||
caps.interpolate(
|
||||
|name| matcher.capture_index(name),
|
||||
subject,
|
||||
replacement,
|
||||
dst,
|
||||
);
|
||||
let end = dst.len();
|
||||
matches.push(Match::new(start, end));
|
||||
true
|
||||
},
|
||||
).map_err(io::Error::error_message)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Return the result of the prior replacement and the match offsets for
|
||||
/// all replacement occurrences within the returned replacement buffer.
|
||||
///
|
||||
/// If no replacement has occurred then `None` is returned.
|
||||
pub fn replacement<'a>(&'a self) -> Option<(&'a [u8], &'a [Match])> {
|
||||
match self.space {
|
||||
None => None,
|
||||
Some(ref space) => {
|
||||
if space.matches.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some((&space.dst, &space.matches))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Clear space used for performing a replacement.
|
||||
///
|
||||
/// Subsequent calls to `replacement` after calling `clear` (but before
|
||||
/// executing another replacement) will always return `None`.
|
||||
pub fn clear(&mut self) {
|
||||
if let Some(ref mut space) = self.space {
|
||||
space.dst.clear();
|
||||
space.matches.clear();
|
||||
}
|
||||
}
|
||||
|
||||
/// Allocate space for replacements when used with the given matcher and
|
||||
/// return a mutable reference to that space.
|
||||
///
|
||||
/// This can fail if allocating space for capture locations from the given
|
||||
/// matcher fails.
|
||||
fn allocate(&mut self, matcher: &M) -> io::Result<&mut Space<M>> {
|
||||
if self.space.is_none() {
|
||||
let caps = matcher
|
||||
.new_captures()
|
||||
.map_err(io::Error::error_message)?;
|
||||
self.space = Some(Space {
|
||||
caps: caps,
|
||||
dst: vec![],
|
||||
matches: vec![],
|
||||
});
|
||||
}
|
||||
Ok(self.space.as_mut().unwrap())
|
||||
}
|
||||
}
|
||||
|
||||
/// A simple layer of abstraction over either a match or a contextual line
|
||||
/// reported by the searcher.
|
||||
///
|
||||
/// In particular, this provides an API that unions the `SinkMatch` and
|
||||
/// `SinkContext` types while also exposing a list of all individual match
|
||||
/// locations.
|
||||
///
|
||||
/// While this serves as a convenient mechanism to abstract over `SinkMatch`
|
||||
/// and `SinkContext`, this also provides a way to abstract over replacements.
|
||||
/// Namely, after a replacement, a `Sunk` value can be constructed using the
|
||||
/// results of the replacement instead of the bytes reported directly by the
|
||||
/// searcher.
|
||||
#[derive(Debug)]
|
||||
pub struct Sunk<'a> {
|
||||
bytes: &'a [u8],
|
||||
absolute_byte_offset: u64,
|
||||
line_number: Option<u64>,
|
||||
context_kind: Option<&'a SinkContextKind>,
|
||||
matches: &'a [Match],
|
||||
original_matches: &'a [Match],
|
||||
}
|
||||
|
||||
impl<'a> Sunk<'a> {
|
||||
#[inline]
|
||||
pub fn empty() -> Sunk<'static> {
|
||||
Sunk {
|
||||
bytes: &[],
|
||||
absolute_byte_offset: 0,
|
||||
line_number: None,
|
||||
context_kind: None,
|
||||
matches: &[],
|
||||
original_matches: &[],
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn from_sink_match(
|
||||
sunk: &'a SinkMatch<'a>,
|
||||
original_matches: &'a [Match],
|
||||
replacement: Option<(&'a [u8], &'a [Match])>,
|
||||
) -> Sunk<'a> {
|
||||
let (bytes, matches) = replacement.unwrap_or_else(|| {
|
||||
(sunk.bytes(), original_matches)
|
||||
});
|
||||
Sunk {
|
||||
bytes: bytes,
|
||||
absolute_byte_offset: sunk.absolute_byte_offset(),
|
||||
line_number: sunk.line_number(),
|
||||
context_kind: None,
|
||||
matches: matches,
|
||||
original_matches: original_matches,
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn from_sink_context(
|
||||
sunk: &'a SinkContext<'a>,
|
||||
original_matches: &'a [Match],
|
||||
replacement: Option<(&'a [u8], &'a [Match])>,
|
||||
) -> Sunk<'a> {
|
||||
let (bytes, matches) = replacement.unwrap_or_else(|| {
|
||||
(sunk.bytes(), original_matches)
|
||||
});
|
||||
Sunk {
|
||||
bytes: bytes,
|
||||
absolute_byte_offset: sunk.absolute_byte_offset(),
|
||||
line_number: sunk.line_number(),
|
||||
context_kind: Some(sunk.kind()),
|
||||
matches: matches,
|
||||
original_matches: original_matches,
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn context_kind(&self) -> Option<&'a SinkContextKind> {
|
||||
self.context_kind
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn bytes(&self) -> &'a [u8] {
|
||||
self.bytes
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn matches(&self) -> &'a [Match] {
|
||||
self.matches
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn original_matches(&self) -> &'a [Match] {
|
||||
self.original_matches
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn lines(&self, line_term: u8) -> LineIter<'a> {
|
||||
LineIter::new(line_term, self.bytes())
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn absolute_byte_offset(&self) -> u64 {
|
||||
self.absolute_byte_offset
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn line_number(&self) -> Option<u64> {
|
||||
self.line_number
|
||||
}
|
||||
}
|
||||
|
||||
/// A simple encapsulation of a file path used by a printer.
|
||||
///
|
||||
/// This represents any transforms that we might want to perform on the path,
|
||||
/// such as converting it to valid UTF-8 and/or replacing its separator with
|
||||
/// something else. This allows us to amortize work if we are printing the
|
||||
/// file path for every match.
|
||||
///
|
||||
/// In the common case, no transformation is needed, which lets us avoid the
|
||||
/// allocation. Typically, only Windows requires a transform, since we can't
|
||||
/// access the raw bytes of a path directly and first need to lossily convert
|
||||
/// to UTF-8. Windows is also typically where the path separator replacement
|
||||
/// is used, e.g., in cygwin environments to use `/` instead of `\`.
|
||||
///
|
||||
/// Users of this type are expected to construct it from a normal `Path`
|
||||
/// found in the standard library. It can then be written to any `io::Write`
|
||||
/// implementation using the `as_bytes` method. This achieves platform
|
||||
/// portability with a small cost: on Windows, paths that are not valid UTF-16
|
||||
/// will not roundtrip correctly.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct PrinterPath<'a>(Cow<'a, BStr>);
|
||||
|
||||
impl<'a> PrinterPath<'a> {
|
||||
/// Create a new path suitable for printing.
|
||||
pub fn new(path: &'a Path) -> PrinterPath<'a> {
|
||||
PrinterPath(BString::from_path_lossy(path))
|
||||
}
|
||||
|
||||
/// Create a new printer path from the given path which can be efficiently
|
||||
/// written to a writer without allocation.
|
||||
///
|
||||
/// If the given separator is present, then any separators in `path` are
|
||||
/// replaced with it.
|
||||
pub fn with_separator(path: &'a Path, sep: Option<u8>) -> PrinterPath<'a> {
|
||||
let mut ppath = PrinterPath::new(path);
|
||||
if let Some(sep) = sep {
|
||||
ppath.replace_separator(sep);
|
||||
}
|
||||
ppath
|
||||
}
|
||||
|
||||
/// Replace the path separator in this path with the given separator
|
||||
/// and do it in place. On Windows, both `/` and `\` are treated as
|
||||
/// path separators that are both replaced by `new_sep`. In all other
|
||||
/// environments, only `/` is treated as a path separator.
|
||||
fn replace_separator(&mut self, new_sep: u8) {
|
||||
let transformed_path: BString = self.0.bytes().map(|b| {
|
||||
if b == b'/' || (cfg!(windows) && b == b'\\') {
|
||||
new_sep
|
||||
} else {
|
||||
b
|
||||
}
|
||||
}).collect();
|
||||
self.0 = Cow::Owned(transformed_path);
|
||||
}
|
||||
|
||||
/// Return the raw bytes for this path.
|
||||
pub fn as_bytes(&self) -> &[u8] {
|
||||
self.0.as_bytes()
|
||||
}
|
||||
}
|
||||
|
||||
/// A type that provides "nicer" Display and Serialize impls for
|
||||
/// std::time::Duration. The serialization format should actually be compatible
|
||||
/// with the Deserialize impl for std::time::Duration, since this type only
|
||||
/// adds new fields.
|
||||
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
|
||||
pub struct NiceDuration(pub time::Duration);
|
||||
|
||||
impl fmt::Display for NiceDuration {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "{:0.6}s", self.fractional_seconds())
|
||||
}
|
||||
}
|
||||
|
||||
impl NiceDuration {
|
||||
/// Returns the number of seconds in this duration in fraction form.
|
||||
/// The number to the left of the decimal point is the number of seconds,
|
||||
/// and the number to the right is the number of milliseconds.
|
||||
fn fractional_seconds(&self) -> f64 {
|
||||
let fractional = (self.0.subsec_nanos() as f64) / 1_000_000_000.0;
|
||||
self.0.as_secs() as f64 + fractional
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "serde1")]
|
||||
impl Serialize for NiceDuration {
|
||||
fn serialize<S: Serializer>(&self, ser: S) -> Result<S::Ok, S::Error> {
|
||||
use serde::ser::SerializeStruct;
|
||||
|
||||
let mut state = ser.serialize_struct("Duration", 2)?;
|
||||
state.serialize_field("secs", &self.0.as_secs())?;
|
||||
state.serialize_field("nanos", &self.0.subsec_nanos())?;
|
||||
state.serialize_field("human", &format!("{}", self))?;
|
||||
state.end()
|
||||
}
|
||||
}
|
||||
|
||||
/// Trim prefix ASCII spaces from the given slice and return the corresponding
|
||||
/// range.
|
||||
///
|
||||
/// This stops trimming a prefix as soon as it sees non-whitespace or a line
|
||||
/// terminator.
|
||||
pub fn trim_ascii_prefix(
|
||||
line_term: LineTerminator,
|
||||
slice: &[u8],
|
||||
range: Match,
|
||||
) -> Match {
|
||||
fn is_space(b: u8) -> bool {
|
||||
match b {
|
||||
b'\t' | b'\n' | b'\x0B' | b'\x0C' | b'\r' | b' ' => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
let count = slice[range]
|
||||
.iter()
|
||||
.take_while(|&&b| -> bool {
|
||||
is_space(b) && !line_term.as_bytes().contains(&b)
|
||||
})
|
||||
.count();
|
||||
range.with_start(range.start() + count)
|
||||
}
|
||||
@@ -1,22 +0,0 @@
|
||||
[package]
|
||||
name = "grep-regex"
|
||||
version = "0.1.3" #:version
|
||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||
description = """
|
||||
Use Rust's regex library with the 'grep' crate.
|
||||
"""
|
||||
documentation = "https://docs.rs/grep-regex"
|
||||
homepage = "https://github.com/BurntSushi/ripgrep"
|
||||
repository = "https://github.com/BurntSushi/ripgrep"
|
||||
readme = "README.md"
|
||||
keywords = ["regex", "grep", "search", "pattern", "line"]
|
||||
license = "Unlicense/MIT"
|
||||
|
||||
[dependencies]
|
||||
aho-corasick = "0.7.3"
|
||||
grep-matcher = { version = "0.1.2", path = "../grep-matcher" }
|
||||
log = "0.4.5"
|
||||
regex = "1.1"
|
||||
regex-syntax = "0.6.5"
|
||||
thread_local = "0.3.6"
|
||||
utf8-ranges = "1.0.1"
|
||||
@@ -1,21 +0,0 @@
|
||||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2015 Andrew Gallant
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
@@ -1,35 +0,0 @@
|
||||
grep-regex
|
||||
----------
|
||||
The `grep-regex` crate provides an implementation of the `Matcher` trait from
|
||||
the `grep-matcher` crate. This implementation permits Rust's regex engine to
|
||||
be used in the `grep` crate for fast line oriented searching.
|
||||
|
||||
[](https://travis-ci.org/BurntSushi/ripgrep)
|
||||
[](https://ci.appveyor.com/project/BurntSushi/ripgrep)
|
||||
[](https://crates.io/crates/grep-regex)
|
||||
|
||||
Dual-licensed under MIT or the [UNLICENSE](http://unlicense.org).
|
||||
|
||||
### Documentation
|
||||
|
||||
[https://docs.rs/grep-regex](https://docs.rs/grep-regex)
|
||||
|
||||
**NOTE:** You probably don't want to use this crate directly. Instead, you
|
||||
should prefer the facade defined in the
|
||||
[`grep`](https://docs.rs/grep)
|
||||
crate.
|
||||
|
||||
### Usage
|
||||
|
||||
Add this to your `Cargo.toml`:
|
||||
|
||||
```toml
|
||||
[dependencies]
|
||||
grep-regex = "0.1"
|
||||
```
|
||||
|
||||
and this to your crate root:
|
||||
|
||||
```rust
|
||||
extern crate grep_regex;
|
||||
```
|
||||
@@ -1,24 +0,0 @@
|
||||
This is free and unencumbered software released into the public domain.
|
||||
|
||||
Anyone is free to copy, modify, publish, use, compile, sell, or
|
||||
distribute this software, either in source code form or as a compiled
|
||||
binary, for any purpose, commercial or non-commercial, and by any
|
||||
means.
|
||||
|
||||
In jurisdictions that recognize copyright laws, the author or authors
|
||||
of this software dedicate any and all copyright interest in the
|
||||
software to the public domain. We make this dedication for the benefit
|
||||
of the public at large and to the detriment of our heirs and
|
||||
successors. We intend this dedication to be an overt act of
|
||||
relinquishment in perpetuity of all present and future rights to this
|
||||
software under copyright law.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
For more information, please refer to <http://unlicense.org/>
|
||||
@@ -1,263 +0,0 @@
|
||||
use regex_syntax::ast::{self, Ast};
|
||||
use regex_syntax::ast::parse::Parser;
|
||||
|
||||
/// The results of analyzing AST of a regular expression (e.g., for supporting
|
||||
/// smart case).
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct AstAnalysis {
|
||||
/// True if and only if a literal uppercase character occurs in the regex.
|
||||
any_uppercase: bool,
|
||||
/// True if and only if the regex contains any literal at all.
|
||||
any_literal: bool,
|
||||
/// True if and only if the regex consists entirely of a literal and no
|
||||
/// other special regex characters.
|
||||
all_verbatim_literal: bool,
|
||||
}
|
||||
|
||||
impl AstAnalysis {
|
||||
/// Returns a `AstAnalysis` value by doing analysis on the AST of `pattern`.
|
||||
///
|
||||
/// If `pattern` is not a valid regular expression, then `None` is
|
||||
/// returned.
|
||||
#[allow(dead_code)]
|
||||
pub fn from_pattern(pattern: &str) -> Option<AstAnalysis> {
|
||||
Parser::new()
|
||||
.parse(pattern)
|
||||
.map(|ast| AstAnalysis::from_ast(&ast))
|
||||
.ok()
|
||||
}
|
||||
|
||||
/// Perform an AST analysis given the AST.
|
||||
pub fn from_ast(ast: &Ast) -> AstAnalysis {
|
||||
let mut analysis = AstAnalysis::new();
|
||||
analysis.from_ast_impl(ast);
|
||||
analysis
|
||||
}
|
||||
|
||||
/// Returns true if and only if a literal uppercase character occurs in
|
||||
/// the pattern.
|
||||
///
|
||||
/// For example, a pattern like `\pL` contains no uppercase literals,
|
||||
/// even though `L` is uppercase and the `\pL` class contains uppercase
|
||||
/// characters.
|
||||
pub fn any_uppercase(&self) -> bool {
|
||||
self.any_uppercase
|
||||
}
|
||||
|
||||
/// Returns true if and only if the regex contains any literal at all.
|
||||
///
|
||||
/// For example, a pattern like `\pL` reports `false`, but a pattern like
|
||||
/// `\pLfoo` reports `true`.
|
||||
pub fn any_literal(&self) -> bool {
|
||||
self.any_literal
|
||||
}
|
||||
|
||||
/// Returns true if and only if the entire pattern is a verbatim literal
|
||||
/// with no special meta characters.
|
||||
///
|
||||
/// When this is true, then the pattern satisfies the following law:
|
||||
/// `escape(pattern) == pattern`. Notable examples where this returns
|
||||
/// `false` include patterns like `a\u0061` even though `\u0061` is just
|
||||
/// a literal `a`.
|
||||
///
|
||||
/// The purpose of this flag is to determine whether the patterns can be
|
||||
/// given to non-regex substring search algorithms as-is.
|
||||
#[allow(dead_code)]
|
||||
pub fn all_verbatim_literal(&self) -> bool {
|
||||
self.all_verbatim_literal
|
||||
}
|
||||
|
||||
/// Creates a new `AstAnalysis` value with an initial configuration.
|
||||
fn new() -> AstAnalysis {
|
||||
AstAnalysis {
|
||||
any_uppercase: false,
|
||||
any_literal: false,
|
||||
all_verbatim_literal: true,
|
||||
}
|
||||
}
|
||||
|
||||
fn from_ast_impl(&mut self, ast: &Ast) {
|
||||
if self.done() {
|
||||
return;
|
||||
}
|
||||
match *ast {
|
||||
Ast::Empty(_) => {}
|
||||
Ast::Flags(_)
|
||||
| Ast::Dot(_)
|
||||
| Ast::Assertion(_)
|
||||
| Ast::Class(ast::Class::Unicode(_))
|
||||
| Ast::Class(ast::Class::Perl(_)) => {
|
||||
self.all_verbatim_literal = false;
|
||||
}
|
||||
Ast::Literal(ref x) => {
|
||||
self.from_ast_literal(x);
|
||||
}
|
||||
Ast::Class(ast::Class::Bracketed(ref x)) => {
|
||||
self.all_verbatim_literal = false;
|
||||
self.from_ast_class_set(&x.kind);
|
||||
}
|
||||
Ast::Repetition(ref x) => {
|
||||
self.all_verbatim_literal = false;
|
||||
self.from_ast_impl(&x.ast);
|
||||
}
|
||||
Ast::Group(ref x) => {
|
||||
self.all_verbatim_literal = false;
|
||||
self.from_ast_impl(&x.ast);
|
||||
}
|
||||
Ast::Alternation(ref alt) => {
|
||||
self.all_verbatim_literal = false;
|
||||
for x in &alt.asts {
|
||||
self.from_ast_impl(x);
|
||||
}
|
||||
}
|
||||
Ast::Concat(ref alt) => {
|
||||
for x in &alt.asts {
|
||||
self.from_ast_impl(x);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn from_ast_class_set(&mut self, ast: &ast::ClassSet) {
|
||||
if self.done() {
|
||||
return;
|
||||
}
|
||||
match *ast {
|
||||
ast::ClassSet::Item(ref item) => {
|
||||
self.from_ast_class_set_item(item);
|
||||
}
|
||||
ast::ClassSet::BinaryOp(ref x) => {
|
||||
self.from_ast_class_set(&x.lhs);
|
||||
self.from_ast_class_set(&x.rhs);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn from_ast_class_set_item(&mut self, ast: &ast::ClassSetItem) {
|
||||
if self.done() {
|
||||
return;
|
||||
}
|
||||
match *ast {
|
||||
ast::ClassSetItem::Empty(_)
|
||||
| ast::ClassSetItem::Ascii(_)
|
||||
| ast::ClassSetItem::Unicode(_)
|
||||
| ast::ClassSetItem::Perl(_) => {}
|
||||
ast::ClassSetItem::Literal(ref x) => {
|
||||
self.from_ast_literal(x);
|
||||
}
|
||||
ast::ClassSetItem::Range(ref x) => {
|
||||
self.from_ast_literal(&x.start);
|
||||
self.from_ast_literal(&x.end);
|
||||
}
|
||||
ast::ClassSetItem::Bracketed(ref x) => {
|
||||
self.from_ast_class_set(&x.kind);
|
||||
}
|
||||
ast::ClassSetItem::Union(ref union) => {
|
||||
for x in &union.items {
|
||||
self.from_ast_class_set_item(x);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn from_ast_literal(&mut self, ast: &ast::Literal) {
|
||||
if ast.kind != ast::LiteralKind::Verbatim {
|
||||
self.all_verbatim_literal = false;
|
||||
}
|
||||
self.any_literal = true;
|
||||
self.any_uppercase = self.any_uppercase || ast.c.is_uppercase();
|
||||
}
|
||||
|
||||
/// Returns true if and only if the attributes can never change no matter
|
||||
/// what other AST it might see.
|
||||
fn done(&self) -> bool {
|
||||
self.any_uppercase && self.any_literal && !self.all_verbatim_literal
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn analysis(pattern: &str) -> AstAnalysis {
|
||||
AstAnalysis::from_pattern(pattern).unwrap()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn various() {
|
||||
let x = analysis("");
|
||||
assert!(!x.any_uppercase);
|
||||
assert!(!x.any_literal);
|
||||
assert!(x.all_verbatim_literal);
|
||||
|
||||
let x = analysis("foo");
|
||||
assert!(!x.any_uppercase);
|
||||
assert!(x.any_literal);
|
||||
assert!(x.all_verbatim_literal);
|
||||
|
||||
let x = analysis("Foo");
|
||||
assert!(x.any_uppercase);
|
||||
assert!(x.any_literal);
|
||||
assert!(x.all_verbatim_literal);
|
||||
|
||||
let x = analysis("foO");
|
||||
assert!(x.any_uppercase);
|
||||
assert!(x.any_literal);
|
||||
assert!(x.all_verbatim_literal);
|
||||
|
||||
let x = analysis(r"foo\\");
|
||||
assert!(!x.any_uppercase);
|
||||
assert!(x.any_literal);
|
||||
assert!(!x.all_verbatim_literal);
|
||||
|
||||
let x = analysis(r"foo\w");
|
||||
assert!(!x.any_uppercase);
|
||||
assert!(x.any_literal);
|
||||
assert!(!x.all_verbatim_literal);
|
||||
|
||||
let x = analysis(r"foo\S");
|
||||
assert!(!x.any_uppercase);
|
||||
assert!(x.any_literal);
|
||||
assert!(!x.all_verbatim_literal);
|
||||
|
||||
let x = analysis(r"foo\p{Ll}");
|
||||
assert!(!x.any_uppercase);
|
||||
assert!(x.any_literal);
|
||||
assert!(!x.all_verbatim_literal);
|
||||
|
||||
let x = analysis(r"foo[a-z]");
|
||||
assert!(!x.any_uppercase);
|
||||
assert!(x.any_literal);
|
||||
assert!(!x.all_verbatim_literal);
|
||||
|
||||
let x = analysis(r"foo[A-Z]");
|
||||
assert!(x.any_uppercase);
|
||||
assert!(x.any_literal);
|
||||
assert!(!x.all_verbatim_literal);
|
||||
|
||||
let x = analysis(r"foo[\S\t]");
|
||||
assert!(!x.any_uppercase);
|
||||
assert!(x.any_literal);
|
||||
assert!(!x.all_verbatim_literal);
|
||||
|
||||
let x = analysis(r"foo\\S");
|
||||
assert!(x.any_uppercase);
|
||||
assert!(x.any_literal);
|
||||
assert!(!x.all_verbatim_literal);
|
||||
|
||||
let x = analysis(r"\p{Ll}");
|
||||
assert!(!x.any_uppercase);
|
||||
assert!(!x.any_literal);
|
||||
assert!(!x.all_verbatim_literal);
|
||||
|
||||
let x = analysis(r"aBc\w");
|
||||
assert!(x.any_uppercase);
|
||||
assert!(x.any_literal);
|
||||
assert!(!x.all_verbatim_literal);
|
||||
|
||||
let x = analysis(r"a\u0061");
|
||||
assert!(!x.any_uppercase);
|
||||
assert!(x.any_literal);
|
||||
assert!(!x.all_verbatim_literal);
|
||||
}
|
||||
}
|
||||
@@ -1,294 +0,0 @@
|
||||
use grep_matcher::{ByteSet, LineTerminator};
|
||||
use regex::bytes::{Regex, RegexBuilder};
|
||||
use regex_syntax::ast::{self, Ast};
|
||||
use regex_syntax::hir::{self, Hir};
|
||||
|
||||
use ast::AstAnalysis;
|
||||
use crlf::crlfify;
|
||||
use error::Error;
|
||||
use literal::LiteralSets;
|
||||
use multi::alternation_literals;
|
||||
use non_matching::non_matching_bytes;
|
||||
use strip::strip_from_match;
|
||||
|
||||
/// Config represents the configuration of a regex matcher in this crate.
|
||||
/// The configuration is itself a rough combination of the knobs found in
|
||||
/// the `regex` crate itself, along with additional `grep-matcher` specific
|
||||
/// options.
|
||||
///
|
||||
/// The configuration can be used to build a "configured" HIR expression. A
|
||||
/// configured HIR expression is an HIR expression that is aware of the
|
||||
/// configuration which generated it, and provides transformation on that HIR
|
||||
/// such that the configuration is preserved.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Config {
|
||||
pub case_insensitive: bool,
|
||||
pub case_smart: bool,
|
||||
pub multi_line: bool,
|
||||
pub dot_matches_new_line: bool,
|
||||
pub swap_greed: bool,
|
||||
pub ignore_whitespace: bool,
|
||||
pub unicode: bool,
|
||||
pub octal: bool,
|
||||
pub size_limit: usize,
|
||||
pub dfa_size_limit: usize,
|
||||
pub nest_limit: u32,
|
||||
pub line_terminator: Option<LineTerminator>,
|
||||
pub crlf: bool,
|
||||
pub word: bool,
|
||||
}
|
||||
|
||||
impl Default for Config {
|
||||
fn default() -> Config {
|
||||
Config {
|
||||
case_insensitive: false,
|
||||
case_smart: false,
|
||||
multi_line: false,
|
||||
dot_matches_new_line: false,
|
||||
swap_greed: false,
|
||||
ignore_whitespace: false,
|
||||
unicode: true,
|
||||
octal: false,
|
||||
// These size limits are much bigger than what's in the regex
|
||||
// crate.
|
||||
size_limit: 100 * (1<<20),
|
||||
dfa_size_limit: 1000 * (1<<20),
|
||||
nest_limit: 250,
|
||||
line_terminator: None,
|
||||
crlf: false,
|
||||
word: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Config {
|
||||
/// Parse the given pattern and returned its HIR expression along with
|
||||
/// the current configuration.
|
||||
///
|
||||
/// If there was a problem parsing the given expression then an error
|
||||
/// is returned.
|
||||
pub fn hir(&self, pattern: &str) -> Result<ConfiguredHIR, Error> {
|
||||
let ast = self.ast(pattern)?;
|
||||
let analysis = self.analysis(&ast)?;
|
||||
let expr = hir::translate::TranslatorBuilder::new()
|
||||
.allow_invalid_utf8(true)
|
||||
.case_insensitive(self.is_case_insensitive(&analysis))
|
||||
.multi_line(self.multi_line)
|
||||
.dot_matches_new_line(self.dot_matches_new_line)
|
||||
.swap_greed(self.swap_greed)
|
||||
.unicode(self.unicode)
|
||||
.build()
|
||||
.translate(pattern, &ast)
|
||||
.map_err(Error::regex)?;
|
||||
let expr = match self.line_terminator {
|
||||
None => expr,
|
||||
Some(line_term) => strip_from_match(expr, line_term)?,
|
||||
};
|
||||
Ok(ConfiguredHIR {
|
||||
original: pattern.to_string(),
|
||||
config: self.clone(),
|
||||
analysis: analysis,
|
||||
// If CRLF mode is enabled, replace `$` with `(?:\r?$)`.
|
||||
expr: if self.crlf { crlfify(expr) } else { expr },
|
||||
})
|
||||
}
|
||||
|
||||
/// Accounting for the `smart_case` config knob, return true if and only if
|
||||
/// this pattern should be matched case insensitively.
|
||||
fn is_case_insensitive(
|
||||
&self,
|
||||
analysis: &AstAnalysis,
|
||||
) -> bool {
|
||||
if self.case_insensitive {
|
||||
return true;
|
||||
}
|
||||
if !self.case_smart {
|
||||
return false;
|
||||
}
|
||||
analysis.any_literal() && !analysis.any_uppercase()
|
||||
}
|
||||
|
||||
/// Returns true if and only if this config is simple enough such that
|
||||
/// if the pattern is a simple alternation of literals, then it can be
|
||||
/// constructed via a plain Aho-Corasick automaton.
|
||||
///
|
||||
/// Note that it is OK to return true even when settings like `multi_line`
|
||||
/// are enabled, since if multi-line can impact the match semantics of a
|
||||
/// regex, then it is by definition not a simple alternation of literals.
|
||||
pub fn can_plain_aho_corasick(&self) -> bool {
|
||||
!self.word
|
||||
&& !self.case_insensitive
|
||||
&& !self.case_smart
|
||||
}
|
||||
|
||||
/// Perform analysis on the AST of this pattern.
|
||||
///
|
||||
/// This returns an error if the given pattern failed to parse.
|
||||
fn analysis(&self, ast: &Ast) -> Result<AstAnalysis, Error> {
|
||||
Ok(AstAnalysis::from_ast(ast))
|
||||
}
|
||||
|
||||
/// Parse the given pattern into its abstract syntax.
|
||||
///
|
||||
/// This returns an error if the given pattern failed to parse.
|
||||
fn ast(&self, pattern: &str) -> Result<Ast, Error> {
|
||||
ast::parse::ParserBuilder::new()
|
||||
.nest_limit(self.nest_limit)
|
||||
.octal(self.octal)
|
||||
.ignore_whitespace(self.ignore_whitespace)
|
||||
.build()
|
||||
.parse(pattern)
|
||||
.map_err(Error::regex)
|
||||
}
|
||||
}
|
||||
|
||||
/// A "configured" HIR expression, which is aware of the configuration which
|
||||
/// produced this HIR.
|
||||
///
|
||||
/// Since the configuration is tracked, values with this type can be
|
||||
/// transformed into other HIR expressions (or regular expressions) in a way
|
||||
/// that preserves the configuration. For example, the `fast_line_regex`
|
||||
/// method will apply literal extraction to the inner HIR and use that to build
|
||||
/// a new regex that matches the extracted literals in a way that is
|
||||
/// consistent with the configuration that produced this HIR. For example, the
|
||||
/// size limits set on the configured HIR will be propagated out to any
|
||||
/// subsequently constructed HIR or regular expression.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct ConfiguredHIR {
|
||||
original: String,
|
||||
config: Config,
|
||||
analysis: AstAnalysis,
|
||||
expr: Hir,
|
||||
}
|
||||
|
||||
impl ConfiguredHIR {
|
||||
/// Return the configuration for this HIR expression.
|
||||
pub fn config(&self) -> &Config {
|
||||
&self.config
|
||||
}
|
||||
|
||||
/// Compute the set of non-matching bytes for this HIR expression.
|
||||
pub fn non_matching_bytes(&self) -> ByteSet {
|
||||
non_matching_bytes(&self.expr)
|
||||
}
|
||||
|
||||
/// Returns true if and only if this regex needs to have its match offsets
|
||||
/// tweaked because of CRLF support. Specifically, this occurs when the
|
||||
/// CRLF hack is enabled and the regex is line anchored at the end. In
|
||||
/// this case, matches that end with a `\r` have the `\r` stripped.
|
||||
pub fn needs_crlf_stripped(&self) -> bool {
|
||||
self.config.crlf && self.expr.is_line_anchored_end()
|
||||
}
|
||||
|
||||
/// Builds a regular expression from this HIR expression.
|
||||
pub fn regex(&self) -> Result<Regex, Error> {
|
||||
self.pattern_to_regex(&self.expr.to_string())
|
||||
}
|
||||
|
||||
/// If this HIR corresponds to an alternation of literals with no
|
||||
/// capturing groups, then this returns those literals.
|
||||
pub fn alternation_literals(&self) -> Option<Vec<Vec<u8>>> {
|
||||
if !self.config.can_plain_aho_corasick() {
|
||||
return None;
|
||||
}
|
||||
alternation_literals(&self.expr)
|
||||
}
|
||||
|
||||
/// Applies the given function to the concrete syntax of this HIR and then
|
||||
/// generates a new HIR based on the result of the function in a way that
|
||||
/// preserves the configuration.
|
||||
///
|
||||
/// For example, this can be used to wrap a user provided regular
|
||||
/// expression with additional semantics. e.g., See the `WordMatcher`.
|
||||
pub fn with_pattern<F: FnMut(&str) -> String>(
|
||||
&self,
|
||||
mut f: F,
|
||||
) -> Result<ConfiguredHIR, Error>
|
||||
{
|
||||
self.pattern_to_hir(&f(&self.expr.to_string()))
|
||||
}
|
||||
|
||||
/// If the current configuration has a line terminator set and if useful
|
||||
/// literals could be extracted, then a regular expression matching those
|
||||
/// literals is returned. If no line terminator is set, then `None` is
|
||||
/// returned.
|
||||
///
|
||||
/// If compiling the resulting regular expression failed, then an error
|
||||
/// is returned.
|
||||
///
|
||||
/// This method only returns something when a line terminator is set
|
||||
/// because matches from this regex are generally candidates that must be
|
||||
/// confirmed before reporting a match. When performing a line oriented
|
||||
/// search, confirmation is easy: just extend the candidate match to its
|
||||
/// respective line boundaries and then re-search that line for a full
|
||||
/// match. This only works when the line terminator is set because the line
|
||||
/// terminator setting guarantees that the regex itself can never match
|
||||
/// through the line terminator byte.
|
||||
pub fn fast_line_regex(&self) -> Result<Option<Regex>, Error> {
|
||||
if self.config.line_terminator.is_none() {
|
||||
return Ok(None);
|
||||
}
|
||||
match LiteralSets::new(&self.expr).one_regex(self.config.word) {
|
||||
None => Ok(None),
|
||||
Some(pattern) => self.pattern_to_regex(&pattern).map(Some),
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a regex from the given pattern using this HIR's configuration.
|
||||
fn pattern_to_regex(&self, pattern: &str) -> Result<Regex, Error> {
|
||||
// The settings we explicitly set here are intentionally a subset
|
||||
// of the settings we have. The key point here is that our HIR
|
||||
// expression is computed with the settings in mind, such that setting
|
||||
// them here could actually lead to unintended behavior. For example,
|
||||
// consider the pattern `(?U)a+`. This will get folded into the HIR
|
||||
// as a non-greedy repetition operator which will in turn get printed
|
||||
// to the concrete syntax as `a+?`, which is correct. But if we
|
||||
// set the `swap_greed` option again, then we'll wind up with `(?U)a+?`
|
||||
// which is equal to `a+` which is not the same as what we were given.
|
||||
//
|
||||
// We also don't need to apply `case_insensitive` since this gets
|
||||
// folded into the HIR and would just cause us to do redundant work.
|
||||
//
|
||||
// Finally, we don't need to set `ignore_whitespace` since the concrete
|
||||
// syntax emitted by the HIR printer never needs it.
|
||||
//
|
||||
// We set the rest of the options. Some of them are important, such as
|
||||
// the size limit, and some of them are necessary to preserve the
|
||||
// intention of the original pattern. For example, the Unicode flag
|
||||
// will impact how the WordMatcher functions, namely, whether its
|
||||
// word boundaries are Unicode aware or not.
|
||||
RegexBuilder::new(&pattern)
|
||||
.nest_limit(self.config.nest_limit)
|
||||
.octal(self.config.octal)
|
||||
.multi_line(self.config.multi_line)
|
||||
.dot_matches_new_line(self.config.dot_matches_new_line)
|
||||
.unicode(self.config.unicode)
|
||||
.size_limit(self.config.size_limit)
|
||||
.dfa_size_limit(self.config.dfa_size_limit)
|
||||
.build()
|
||||
.map_err(Error::regex)
|
||||
}
|
||||
|
||||
/// Create an HIR expression from the given pattern using this HIR's
|
||||
/// configuration.
|
||||
fn pattern_to_hir(&self, pattern: &str) -> Result<ConfiguredHIR, Error> {
|
||||
// See `pattern_to_regex` comment for explanation of why we only set
|
||||
// a subset of knobs here. e.g., `swap_greed` is explicitly left out.
|
||||
let expr = ::regex_syntax::ParserBuilder::new()
|
||||
.nest_limit(self.config.nest_limit)
|
||||
.octal(self.config.octal)
|
||||
.allow_invalid_utf8(true)
|
||||
.multi_line(self.config.multi_line)
|
||||
.dot_matches_new_line(self.config.dot_matches_new_line)
|
||||
.unicode(self.config.unicode)
|
||||
.build()
|
||||
.parse(pattern)
|
||||
.map_err(Error::regex)?;
|
||||
Ok(ConfiguredHIR {
|
||||
original: self.original.clone(),
|
||||
config: self.config.clone(),
|
||||
analysis: self.analysis.clone(),
|
||||
expr: expr,
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -1,190 +0,0 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use grep_matcher::{Match, Matcher, NoError};
|
||||
use regex::bytes::Regex;
|
||||
use regex_syntax::hir::{self, Hir, HirKind};
|
||||
|
||||
use config::ConfiguredHIR;
|
||||
use error::Error;
|
||||
use matcher::RegexCaptures;
|
||||
|
||||
/// A matcher for implementing "word match" semantics.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct CRLFMatcher {
|
||||
/// The regex.
|
||||
regex: Regex,
|
||||
/// A map from capture group name to capture group index.
|
||||
names: HashMap<String, usize>,
|
||||
}
|
||||
|
||||
impl CRLFMatcher {
|
||||
/// Create a new matcher from the given pattern that strips `\r` from the
|
||||
/// end of every match.
|
||||
///
|
||||
/// This panics if the given expression doesn't need its CRLF stripped.
|
||||
pub fn new(expr: &ConfiguredHIR) -> Result<CRLFMatcher, Error> {
|
||||
assert!(expr.needs_crlf_stripped());
|
||||
|
||||
let regex = expr.regex()?;
|
||||
let mut names = HashMap::new();
|
||||
for (i, optional_name) in regex.capture_names().enumerate() {
|
||||
if let Some(name) = optional_name {
|
||||
names.insert(name.to_string(), i.checked_sub(1).unwrap());
|
||||
}
|
||||
}
|
||||
Ok(CRLFMatcher { regex, names })
|
||||
}
|
||||
|
||||
/// Return the underlying regex used by this matcher.
|
||||
pub fn regex(&self) -> &Regex {
|
||||
&self.regex
|
||||
}
|
||||
}
|
||||
|
||||
impl Matcher for CRLFMatcher {
|
||||
type Captures = RegexCaptures;
|
||||
type Error = NoError;
|
||||
|
||||
fn find_at(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
) -> Result<Option<Match>, NoError> {
|
||||
let m = match self.regex.find_at(haystack, at) {
|
||||
None => return Ok(None),
|
||||
Some(m) => Match::new(m.start(), m.end()),
|
||||
};
|
||||
Ok(Some(adjust_match(haystack, m)))
|
||||
}
|
||||
|
||||
fn new_captures(&self) -> Result<RegexCaptures, NoError> {
|
||||
Ok(RegexCaptures::new(self.regex.capture_locations()))
|
||||
}
|
||||
|
||||
fn capture_count(&self) -> usize {
|
||||
self.regex.captures_len().checked_sub(1).unwrap()
|
||||
}
|
||||
|
||||
fn capture_index(&self, name: &str) -> Option<usize> {
|
||||
self.names.get(name).map(|i| *i)
|
||||
}
|
||||
|
||||
fn captures_at(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
caps: &mut RegexCaptures,
|
||||
) -> Result<bool, NoError> {
|
||||
caps.strip_crlf(false);
|
||||
let r = self.regex.captures_read_at(
|
||||
caps.locations_mut(), haystack, at,
|
||||
);
|
||||
if !r.is_some() {
|
||||
return Ok(false);
|
||||
}
|
||||
|
||||
// If the end of our match includes a `\r`, then strip it from all
|
||||
// capture groups ending at the same location.
|
||||
let end = caps.locations().get(0).unwrap().1;
|
||||
if end > 0 && haystack.get(end - 1) == Some(&b'\r') {
|
||||
caps.strip_crlf(true);
|
||||
}
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
// We specifically do not implement other methods like find_iter or
|
||||
// captures_iter. Namely, the iter methods are guaranteed to be correct
|
||||
// by virtue of implementing find_at and captures_at above.
|
||||
}
|
||||
|
||||
/// If the given match ends with a `\r`, then return a new match that ends
|
||||
/// immediately before the `\r`.
|
||||
pub fn adjust_match(haystack: &[u8], m: Match) -> Match {
|
||||
if m.end() > 0 && haystack.get(m.end() - 1) == Some(&b'\r') {
|
||||
m.with_end(m.end() - 1)
|
||||
} else {
|
||||
m
|
||||
}
|
||||
}
|
||||
|
||||
/// Substitutes all occurrences of multi-line enabled `$` with `(?:\r?$)`.
|
||||
///
|
||||
/// This does not preserve the exact semantics of the given expression,
|
||||
/// however, it does have the useful property that anything that matched the
|
||||
/// given expression will also match the returned expression. The difference is
|
||||
/// that the returned expression can match possibly other things as well.
|
||||
///
|
||||
/// The principle reason why we do this is because the underlying regex engine
|
||||
/// doesn't support CRLF aware `$` look-around. It's planned to fix it at that
|
||||
/// level, but we perform this kludge in the mean time.
|
||||
///
|
||||
/// Note that while the match preserving semantics are nice and neat, the
|
||||
/// match position semantics are quite a bit messier. Namely, `$` only ever
|
||||
/// matches the position between characters where as `\r??` can match a
|
||||
/// character and change the offset. This is regretable, but works out pretty
|
||||
/// nicely in most cases, especially when a match is limited to a single line.
|
||||
pub fn crlfify(expr: Hir) -> Hir {
|
||||
match expr.into_kind() {
|
||||
HirKind::Anchor(hir::Anchor::EndLine) => {
|
||||
let concat = Hir::concat(vec![
|
||||
Hir::repetition(hir::Repetition {
|
||||
kind: hir::RepetitionKind::ZeroOrOne,
|
||||
greedy: false,
|
||||
hir: Box::new(Hir::literal(hir::Literal::Unicode('\r'))),
|
||||
}),
|
||||
Hir::anchor(hir::Anchor::EndLine),
|
||||
]);
|
||||
Hir::group(hir::Group {
|
||||
kind: hir::GroupKind::NonCapturing,
|
||||
hir: Box::new(concat),
|
||||
})
|
||||
}
|
||||
HirKind::Empty => Hir::empty(),
|
||||
HirKind::Literal(x) => Hir::literal(x),
|
||||
HirKind::Class(x) => Hir::class(x),
|
||||
HirKind::Anchor(x) => Hir::anchor(x),
|
||||
HirKind::WordBoundary(x) => Hir::word_boundary(x),
|
||||
HirKind::Repetition(mut x) => {
|
||||
x.hir = Box::new(crlfify(*x.hir));
|
||||
Hir::repetition(x)
|
||||
}
|
||||
HirKind::Group(mut x) => {
|
||||
x.hir = Box::new(crlfify(*x.hir));
|
||||
Hir::group(x)
|
||||
}
|
||||
HirKind::Concat(xs) => {
|
||||
Hir::concat(xs.into_iter().map(crlfify).collect())
|
||||
}
|
||||
HirKind::Alternation(xs) => {
|
||||
Hir::alternation(xs.into_iter().map(crlfify).collect())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use regex_syntax::Parser;
|
||||
use super::crlfify;
|
||||
|
||||
fn roundtrip(pattern: &str) -> String {
|
||||
let expr1 = Parser::new().parse(pattern).unwrap();
|
||||
let expr2 = crlfify(expr1);
|
||||
expr2.to_string()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn various() {
|
||||
assert_eq!(roundtrip(r"(?m)$"), "(?:\r??(?m:$))");
|
||||
assert_eq!(roundtrip(r"(?m)$$"), "(?:\r??(?m:$))(?:\r??(?m:$))");
|
||||
assert_eq!(
|
||||
roundtrip(r"(?m)(?:foo$|bar$)"),
|
||||
"(?:foo(?:\r??(?m:$))|bar(?:\r??(?m:$)))"
|
||||
);
|
||||
assert_eq!(roundtrip(r"(?m)$a"), "(?:\r??(?m:$))a");
|
||||
|
||||
// Not a multiline `$`, so no crlfifying occurs.
|
||||
assert_eq!(roundtrip(r"$"), "\\z");
|
||||
// It's a literal, derp.
|
||||
assert_eq!(roundtrip(r"\$"), "\\$");
|
||||
}
|
||||
}
|
||||
@@ -1,88 +0,0 @@
|
||||
use std::error;
|
||||
use std::fmt;
|
||||
|
||||
use util;
|
||||
|
||||
/// An error that can occur in this crate.
|
||||
///
|
||||
/// Generally, this error corresponds to problems building a regular
|
||||
/// expression, whether it's in parsing, compilation or a problem with
|
||||
/// guaranteeing a configured optimization.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Error {
|
||||
kind: ErrorKind,
|
||||
}
|
||||
|
||||
impl Error {
|
||||
pub(crate) fn new(kind: ErrorKind) -> Error {
|
||||
Error { kind }
|
||||
}
|
||||
|
||||
pub(crate) fn regex<E: error::Error>(err: E) -> Error {
|
||||
Error { kind: ErrorKind::Regex(err.to_string()) }
|
||||
}
|
||||
|
||||
/// Return the kind of this error.
|
||||
pub fn kind(&self) -> &ErrorKind {
|
||||
&self.kind
|
||||
}
|
||||
}
|
||||
|
||||
/// The kind of an error that can occur.
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum ErrorKind {
|
||||
/// An error that occurred as a result of parsing a regular expression.
|
||||
/// This can be a syntax error or an error that results from attempting to
|
||||
/// compile a regular expression that is too big.
|
||||
///
|
||||
/// The string here is the underlying error converted to a string.
|
||||
Regex(String),
|
||||
/// An error that occurs when a building a regex that isn't permitted to
|
||||
/// match a line terminator. In general, building the regex will do its
|
||||
/// best to make matching a line terminator impossible (e.g., by removing
|
||||
/// `\n` from the `\s` character class), but if the regex contains a
|
||||
/// `\n` literal, then there is no reasonable choice that can be made and
|
||||
/// therefore an error is reported.
|
||||
///
|
||||
/// The string is the literal sequence found in the regex that is not
|
||||
/// allowed.
|
||||
NotAllowed(String),
|
||||
/// This error occurs when a non-ASCII line terminator was provided.
|
||||
///
|
||||
/// The invalid byte is included in this error.
|
||||
InvalidLineTerminator(u8),
|
||||
/// Hints that destructuring should not be exhaustive.
|
||||
///
|
||||
/// This enum may grow additional variants, so this makes sure clients
|
||||
/// don't count on exhaustive matching. (Otherwise, adding a new variant
|
||||
/// could break existing code.)
|
||||
#[doc(hidden)]
|
||||
__Nonexhaustive,
|
||||
}
|
||||
|
||||
impl error::Error for Error {
|
||||
fn description(&self) -> &str {
|
||||
match self.kind {
|
||||
ErrorKind::Regex(_) => "regex error",
|
||||
ErrorKind::NotAllowed(_) => "literal not allowed",
|
||||
ErrorKind::InvalidLineTerminator(_) => "invalid line terminator",
|
||||
ErrorKind::__Nonexhaustive => unreachable!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Error {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self.kind {
|
||||
ErrorKind::Regex(ref s) => write!(f, "{}", s),
|
||||
ErrorKind::NotAllowed(ref lit) => {
|
||||
write!(f, "the literal '{:?}' is not allowed in a regex", lit)
|
||||
}
|
||||
ErrorKind::InvalidLineTerminator(byte) => {
|
||||
let x = util::show_bytes(&[byte]);
|
||||
write!(f, "line terminators must be ASCII, but '{}' is not", x)
|
||||
}
|
||||
ErrorKind::__Nonexhaustive => unreachable!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,29 +0,0 @@
|
||||
/*!
|
||||
An implementation of `grep-matcher`'s `Matcher` trait for Rust's regex engine.
|
||||
*/
|
||||
|
||||
#![deny(missing_docs)]
|
||||
|
||||
extern crate aho_corasick;
|
||||
extern crate grep_matcher;
|
||||
#[macro_use]
|
||||
extern crate log;
|
||||
extern crate regex;
|
||||
extern crate regex_syntax;
|
||||
extern crate thread_local;
|
||||
extern crate utf8_ranges;
|
||||
|
||||
pub use error::{Error, ErrorKind};
|
||||
pub use matcher::{RegexCaptures, RegexMatcher, RegexMatcherBuilder};
|
||||
|
||||
mod ast;
|
||||
mod config;
|
||||
mod crlf;
|
||||
mod error;
|
||||
mod literal;
|
||||
mod matcher;
|
||||
mod multi;
|
||||
mod non_matching;
|
||||
mod strip;
|
||||
mod util;
|
||||
mod word;
|
||||
@@ -1,331 +0,0 @@
|
||||
/*
|
||||
This module is responsible for extracting *inner* literals out of the AST of a
|
||||
regular expression. Normally this is the job of the regex engine itself, but
|
||||
the regex engine doesn't look for inner literals. Since we're doing line based
|
||||
searching, we can use them, so we need to do it ourselves.
|
||||
*/
|
||||
|
||||
use std::cmp;
|
||||
|
||||
use regex_syntax::hir::{self, Hir, HirKind};
|
||||
use regex_syntax::hir::literal::{Literal, Literals};
|
||||
|
||||
use util;
|
||||
|
||||
/// Represents prefix, suffix and inner "required" literals for a regular
|
||||
/// expression.
|
||||
///
|
||||
/// Prefixes and suffixes are detected using regex-syntax. The inner required
|
||||
/// literals are detected using something custom (but based on the code in
|
||||
/// regex-syntax).
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct LiteralSets {
|
||||
/// A set of prefix literals.
|
||||
prefixes: Literals,
|
||||
/// A set of suffix literals.
|
||||
suffixes: Literals,
|
||||
/// A set of literals such that at least one of them must appear in every
|
||||
/// match. A literal in this set may be neither a prefix nor a suffix.
|
||||
required: Literals,
|
||||
}
|
||||
|
||||
impl LiteralSets {
|
||||
/// Create a set of literals from the given HIR expression.
|
||||
pub fn new(expr: &Hir) -> LiteralSets {
|
||||
let mut required = Literals::empty();
|
||||
union_required(expr, &mut required);
|
||||
LiteralSets {
|
||||
prefixes: Literals::prefixes(expr),
|
||||
suffixes: Literals::suffixes(expr),
|
||||
required: required,
|
||||
}
|
||||
}
|
||||
|
||||
/// If it is deemed advantageuous to do so (via various suspicious
|
||||
/// heuristics), this will return a single regular expression pattern that
|
||||
/// matches a subset of the language matched by the regular expression that
|
||||
/// generated these literal sets. The idea here is that the pattern
|
||||
/// returned by this method is much cheaper to search for. i.e., It is
|
||||
/// usually a single literal or an alternation of literals.
|
||||
pub fn one_regex(&self, word: bool) -> Option<String> {
|
||||
// TODO: The logic in this function is basically inscrutable. It grew
|
||||
// organically in the old grep 0.1 crate. Ideally, it would be
|
||||
// re-worked. In fact, the entire inner literal extraction should be
|
||||
// re-worked. Actually, most of regex-syntax's literal extraction
|
||||
// should also be re-worked. Alas... only so much time in the day.
|
||||
|
||||
if !word {
|
||||
if self.prefixes.all_complete() && !self.prefixes.is_empty() {
|
||||
debug!("literal prefixes detected: {:?}", self.prefixes);
|
||||
// When this is true, the regex engine will do a literal scan,
|
||||
// so we don't need to return anything. But we only do this
|
||||
// if we aren't doing a word regex, since a word regex adds
|
||||
// a `(?:\W|^)` to the beginning of the regex, thereby
|
||||
// defeating the regex engine's literal detection.
|
||||
return None;
|
||||
}
|
||||
}
|
||||
|
||||
// Out of inner required literals, prefixes and suffixes, which one
|
||||
// is the longest? We pick the longest to do fast literal scan under
|
||||
// the assumption that a longer literal will have a lower false
|
||||
// positive rate.
|
||||
let pre_lcp = self.prefixes.longest_common_prefix();
|
||||
let pre_lcs = self.prefixes.longest_common_suffix();
|
||||
let suf_lcp = self.suffixes.longest_common_prefix();
|
||||
let suf_lcs = self.suffixes.longest_common_suffix();
|
||||
|
||||
let req_lits = self.required.literals();
|
||||
let req = match req_lits.iter().max_by_key(|lit| lit.len()) {
|
||||
None => &[],
|
||||
Some(req) => &***req,
|
||||
};
|
||||
|
||||
let mut lit = pre_lcp;
|
||||
if pre_lcs.len() > lit.len() {
|
||||
lit = pre_lcs;
|
||||
}
|
||||
if suf_lcp.len() > lit.len() {
|
||||
lit = suf_lcp;
|
||||
}
|
||||
if suf_lcs.len() > lit.len() {
|
||||
lit = suf_lcs;
|
||||
}
|
||||
if req_lits.len() == 1 && req.len() > lit.len() {
|
||||
lit = req;
|
||||
}
|
||||
|
||||
// Special case: if we detected an alternation of inner required
|
||||
// literals and its longest literal is bigger than the longest
|
||||
// prefix/suffix, then choose the alternation. In practice, this
|
||||
// helps with case insensitive matching, which can generate lots of
|
||||
// inner required literals.
|
||||
let any_empty = req_lits.iter().any(|lit| lit.is_empty());
|
||||
if req.len() > lit.len() && req_lits.len() > 1 && !any_empty {
|
||||
debug!("required literals found: {:?}", req_lits);
|
||||
let alts: Vec<String> = req_lits
|
||||
.into_iter()
|
||||
.map(|x| util::bytes_to_regex(x))
|
||||
.collect();
|
||||
// We're matching raw bytes, so disable Unicode mode.
|
||||
Some(format!("(?-u:{})", alts.join("|")))
|
||||
} else if lit.is_empty() {
|
||||
None
|
||||
} else {
|
||||
debug!("required literal found: {:?}", util::show_bytes(lit));
|
||||
Some(format!("(?-u:{})", util::bytes_to_regex(&lit)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn union_required(expr: &Hir, lits: &mut Literals) {
|
||||
match *expr.kind() {
|
||||
HirKind::Literal(hir::Literal::Unicode(c)) => {
|
||||
let mut buf = [0u8; 4];
|
||||
lits.cross_add(c.encode_utf8(&mut buf).as_bytes());
|
||||
}
|
||||
HirKind::Literal(hir::Literal::Byte(b)) => {
|
||||
lits.cross_add(&[b]);
|
||||
}
|
||||
HirKind::Class(hir::Class::Unicode(ref cls)) => {
|
||||
if count_unicode_class(cls) >= 5 || !lits.add_char_class(cls) {
|
||||
lits.cut();
|
||||
}
|
||||
}
|
||||
HirKind::Class(hir::Class::Bytes(ref cls)) => {
|
||||
if count_byte_class(cls) >= 5 || !lits.add_byte_class(cls) {
|
||||
lits.cut();
|
||||
}
|
||||
}
|
||||
HirKind::Group(hir::Group { ref hir, .. }) => {
|
||||
union_required(&**hir, lits);
|
||||
}
|
||||
HirKind::Repetition(ref x) => {
|
||||
match x.kind {
|
||||
hir::RepetitionKind::ZeroOrOne => lits.cut(),
|
||||
hir::RepetitionKind::ZeroOrMore => lits.cut(),
|
||||
hir::RepetitionKind::OneOrMore => {
|
||||
union_required(&x.hir, lits);
|
||||
lits.cut();
|
||||
}
|
||||
hir::RepetitionKind::Range(ref rng) => {
|
||||
let (min, max) = match *rng {
|
||||
hir::RepetitionRange::Exactly(m) => (m, Some(m)),
|
||||
hir::RepetitionRange::AtLeast(m) => (m, None),
|
||||
hir::RepetitionRange::Bounded(m, n) => (m, Some(n)),
|
||||
};
|
||||
repeat_range_literals(
|
||||
&x.hir, min, max, x.greedy, lits, union_required);
|
||||
}
|
||||
}
|
||||
}
|
||||
HirKind::Concat(ref es) if es.is_empty() => {}
|
||||
HirKind::Concat(ref es) if es.len() == 1 => {
|
||||
union_required(&es[0], lits)
|
||||
}
|
||||
HirKind::Concat(ref es) => {
|
||||
for e in es {
|
||||
let mut lits2 = lits.to_empty();
|
||||
union_required(e, &mut lits2);
|
||||
if lits2.is_empty() {
|
||||
lits.cut();
|
||||
continue;
|
||||
}
|
||||
if lits2.contains_empty() || !is_simple(&e) {
|
||||
lits.cut();
|
||||
}
|
||||
if !lits.cross_product(&lits2) || !lits2.any_complete() {
|
||||
// If this expression couldn't yield any literal that
|
||||
// could be extended, then we need to quit. Since we're
|
||||
// short-circuiting, we also need to freeze every member.
|
||||
lits.cut();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
HirKind::Alternation(ref es) => {
|
||||
alternate_literals(es, lits, union_required);
|
||||
}
|
||||
_ => lits.cut(),
|
||||
}
|
||||
}
|
||||
|
||||
fn repeat_range_literals<F: FnMut(&Hir, &mut Literals)>(
|
||||
e: &Hir,
|
||||
min: u32,
|
||||
max: Option<u32>,
|
||||
_greedy: bool,
|
||||
lits: &mut Literals,
|
||||
mut f: F,
|
||||
) {
|
||||
if min == 0 {
|
||||
// This is a bit conservative. If `max` is set, then we could
|
||||
// treat this as a finite set of alternations. For now, we
|
||||
// just treat it as `e*`.
|
||||
lits.cut();
|
||||
} else {
|
||||
let n = cmp::min(lits.limit_size(), min as usize);
|
||||
// We only extract literals from a single repetition, even though
|
||||
// we could do more. e.g., `a{3}` will have `a` extracted instead of
|
||||
// `aaa`. The reason is that inner literal extraction can't be unioned
|
||||
// across repetitions. e.g., extracting `foofoofoo` from `(\w+foo){3}`
|
||||
// is wrong.
|
||||
f(e, lits);
|
||||
if n < min as usize {
|
||||
lits.cut();
|
||||
}
|
||||
if max.map_or(true, |max| min < max) {
|
||||
lits.cut();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn alternate_literals<F: FnMut(&Hir, &mut Literals)>(
|
||||
es: &[Hir],
|
||||
lits: &mut Literals,
|
||||
mut f: F,
|
||||
) {
|
||||
let mut lits2 = lits.to_empty();
|
||||
for e in es {
|
||||
let mut lits3 = lits.to_empty();
|
||||
lits3.set_limit_size(lits.limit_size() / 5);
|
||||
f(e, &mut lits3);
|
||||
if lits3.is_empty() || !lits2.union(lits3) {
|
||||
// If we couldn't find suffixes for *any* of the
|
||||
// alternates, then the entire alternation has to be thrown
|
||||
// away and any existing members must be frozen. Similarly,
|
||||
// if the union couldn't complete, stop and freeze.
|
||||
lits.cut();
|
||||
return;
|
||||
}
|
||||
}
|
||||
// All we do at the moment is look for prefixes and suffixes. If both
|
||||
// are empty, then we report nothing. We should be able to do better than
|
||||
// this, but we'll need something more expressive than just a "set of
|
||||
// literals."
|
||||
let lcp = lits2.longest_common_prefix();
|
||||
let lcs = lits2.longest_common_suffix();
|
||||
if !lcp.is_empty() {
|
||||
lits.cross_add(lcp);
|
||||
}
|
||||
lits.cut();
|
||||
if !lcs.is_empty() {
|
||||
lits.add(Literal::empty());
|
||||
lits.add(Literal::new(lcs.to_vec()));
|
||||
}
|
||||
}
|
||||
|
||||
fn is_simple(expr: &Hir) -> bool {
|
||||
match *expr.kind() {
|
||||
HirKind::Empty
|
||||
| HirKind::Literal(_)
|
||||
| HirKind::Class(_)
|
||||
| HirKind::Repetition(_)
|
||||
| HirKind::Concat(_)
|
||||
| HirKind::Alternation(_) => true,
|
||||
HirKind::Anchor(_)
|
||||
| HirKind::WordBoundary(_)
|
||||
| HirKind::Group(_) => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the number of characters in the given class.
|
||||
fn count_unicode_class(cls: &hir::ClassUnicode) -> u32 {
|
||||
cls.iter().map(|r| 1 + (r.end() as u32 - r.start() as u32)).sum()
|
||||
}
|
||||
|
||||
/// Return the number of bytes in the given class.
|
||||
fn count_byte_class(cls: &hir::ClassBytes) -> u32 {
|
||||
cls.iter().map(|r| 1 + (r.end() as u32 - r.start() as u32)).sum()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use regex_syntax::Parser;
|
||||
use super::LiteralSets;
|
||||
|
||||
fn sets(pattern: &str) -> LiteralSets {
|
||||
let hir = Parser::new().parse(pattern).unwrap();
|
||||
LiteralSets::new(&hir)
|
||||
}
|
||||
|
||||
fn one_regex(pattern: &str) -> Option<String> {
|
||||
sets(pattern).one_regex(false)
|
||||
}
|
||||
|
||||
// Put a pattern into the same format as the one returned by `one_regex`.
|
||||
fn pat(pattern: &str) -> Option<String> {
|
||||
Some(format!("(?-u:{})", pattern))
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn various() {
|
||||
// Obviously no literals.
|
||||
assert!(one_regex(r"\w").is_none());
|
||||
assert!(one_regex(r"\pL").is_none());
|
||||
|
||||
// Tantalizingly close.
|
||||
assert!(one_regex(r"\w|foo").is_none());
|
||||
|
||||
// There's a literal, but it's better if the regex engine handles it
|
||||
// internally.
|
||||
assert!(one_regex(r"abc").is_none());
|
||||
|
||||
// Core use cases.
|
||||
assert_eq!(one_regex(r"\wabc\w"), pat("abc"));
|
||||
assert_eq!(one_regex(r"abc\w"), pat("abc"));
|
||||
|
||||
// TODO: Make these pass. We're missing some potentially big wins
|
||||
// without these.
|
||||
// assert_eq!(one_regex(r"\w(foo|bar|baz)"), pat("foo|bar|baz"));
|
||||
// assert_eq!(one_regex(r"\w(foo|bar|baz)\w"), pat("foo|bar|baz"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn regression_1064() {
|
||||
// Regression from:
|
||||
// https://github.com/BurntSushi/ripgrep/issues/1064
|
||||
// assert_eq!(one_regex(r"a.*c"), pat("a"));
|
||||
assert_eq!(one_regex(r"a(.*c)"), pat("a"));
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,127 +0,0 @@
|
||||
use aho_corasick::{AhoCorasick, AhoCorasickBuilder, MatchKind};
|
||||
use grep_matcher::{Matcher, Match, NoError};
|
||||
use regex_syntax::hir::Hir;
|
||||
|
||||
use error::Error;
|
||||
use matcher::RegexCaptures;
|
||||
|
||||
/// A matcher for an alternation of literals.
|
||||
///
|
||||
/// Ideally, this optimization would be pushed down into the regex engine, but
|
||||
/// making this work correctly there would require quite a bit of refactoring.
|
||||
/// Moreover, doing it one layer above lets us do thing like, "if we
|
||||
/// specifically only want to search for literals, then don't bother with
|
||||
/// regex parsing at all."
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct MultiLiteralMatcher {
|
||||
/// The Aho-Corasick automaton.
|
||||
ac: AhoCorasick,
|
||||
}
|
||||
|
||||
impl MultiLiteralMatcher {
|
||||
/// Create a new multi-literal matcher from the given literals.
|
||||
pub fn new<B: AsRef<[u8]>>(
|
||||
literals: &[B],
|
||||
) -> Result<MultiLiteralMatcher, Error> {
|
||||
let ac = AhoCorasickBuilder::new()
|
||||
.match_kind(MatchKind::LeftmostFirst)
|
||||
.auto_configure(literals)
|
||||
.build_with_size::<usize, _, _>(literals)
|
||||
.map_err(Error::regex)?;
|
||||
Ok(MultiLiteralMatcher { ac })
|
||||
}
|
||||
}
|
||||
|
||||
impl Matcher for MultiLiteralMatcher {
|
||||
type Captures = RegexCaptures;
|
||||
type Error = NoError;
|
||||
|
||||
fn find_at(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
) -> Result<Option<Match>, NoError> {
|
||||
match self.ac.find(&haystack[at..]) {
|
||||
None => Ok(None),
|
||||
Some(m) => Ok(Some(Match::new(at + m.start(), at + m.end()))),
|
||||
}
|
||||
}
|
||||
|
||||
fn new_captures(&self) -> Result<RegexCaptures, NoError> {
|
||||
Ok(RegexCaptures::simple())
|
||||
}
|
||||
|
||||
fn capture_count(&self) -> usize {
|
||||
1
|
||||
}
|
||||
|
||||
fn capture_index(&self, _: &str) -> Option<usize> {
|
||||
None
|
||||
}
|
||||
|
||||
fn captures_at(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
caps: &mut RegexCaptures,
|
||||
) -> Result<bool, NoError> {
|
||||
caps.set_simple(None);
|
||||
let mat = self.find_at(haystack, at)?;
|
||||
caps.set_simple(mat);
|
||||
Ok(mat.is_some())
|
||||
}
|
||||
|
||||
// We specifically do not implement other methods like find_iter. Namely,
|
||||
// the iter methods are guaranteed to be correct by virtue of implementing
|
||||
// find_at above.
|
||||
}
|
||||
|
||||
/// Alternation literals checks if the given HIR is a simple alternation of
|
||||
/// literals, and if so, returns them. Otherwise, this returns None.
|
||||
pub fn alternation_literals(expr: &Hir) -> Option<Vec<Vec<u8>>> {
|
||||
use regex_syntax::hir::{HirKind, Literal};
|
||||
|
||||
// This is pretty hacky, but basically, if `is_alternation_literal` is
|
||||
// true, then we can make several assumptions about the structure of our
|
||||
// HIR. This is what justifies the `unreachable!` statements below.
|
||||
|
||||
if !expr.is_alternation_literal() {
|
||||
return None;
|
||||
}
|
||||
let alts = match *expr.kind() {
|
||||
HirKind::Alternation(ref alts) => alts,
|
||||
_ => return None, // one literal isn't worth it
|
||||
};
|
||||
|
||||
let extendlit = |lit: &Literal, dst: &mut Vec<u8>| {
|
||||
match *lit {
|
||||
Literal::Unicode(c) => {
|
||||
let mut buf = [0; 4];
|
||||
dst.extend_from_slice(c.encode_utf8(&mut buf).as_bytes());
|
||||
}
|
||||
Literal::Byte(b) => {
|
||||
dst.push(b);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let mut lits = vec![];
|
||||
for alt in alts {
|
||||
let mut lit = vec![];
|
||||
match *alt.kind() {
|
||||
HirKind::Empty => {}
|
||||
HirKind::Literal(ref x) => extendlit(x, &mut lit),
|
||||
HirKind::Concat(ref exprs) => {
|
||||
for e in exprs {
|
||||
match *e.kind() {
|
||||
HirKind::Literal(ref x) => extendlit(x, &mut lit),
|
||||
_ => unreachable!("expected literal, got {:?}", e),
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => unreachable!("expected literal or concat, got {:?}", alt),
|
||||
}
|
||||
lits.push(lit);
|
||||
}
|
||||
Some(lits)
|
||||
}
|
||||
@@ -1,128 +0,0 @@
|
||||
use grep_matcher::ByteSet;
|
||||
use regex_syntax::hir::{self, Hir, HirKind};
|
||||
use utf8_ranges::Utf8Sequences;
|
||||
|
||||
/// Return a confirmed set of non-matching bytes from the given expression.
|
||||
pub fn non_matching_bytes(expr: &Hir) -> ByteSet {
|
||||
let mut set = ByteSet::full();
|
||||
remove_matching_bytes(expr, &mut set);
|
||||
set
|
||||
}
|
||||
|
||||
/// Remove any bytes from the given set that can occur in a matched produced by
|
||||
/// the given expression.
|
||||
fn remove_matching_bytes(
|
||||
expr: &Hir,
|
||||
set: &mut ByteSet,
|
||||
) {
|
||||
match *expr.kind() {
|
||||
HirKind::Empty
|
||||
| HirKind::Anchor(_)
|
||||
| HirKind::WordBoundary(_) => {}
|
||||
HirKind::Literal(hir::Literal::Unicode(c)) => {
|
||||
for &b in c.encode_utf8(&mut [0; 4]).as_bytes() {
|
||||
set.remove(b);
|
||||
}
|
||||
}
|
||||
HirKind::Literal(hir::Literal::Byte(b)) => {
|
||||
set.remove(b);
|
||||
}
|
||||
HirKind::Class(hir::Class::Unicode(ref cls)) => {
|
||||
for range in cls.iter() {
|
||||
// This is presumably faster than encoding every codepoint
|
||||
// to UTF-8 and then removing those bytes from the set.
|
||||
for seq in Utf8Sequences::new(range.start(), range.end()) {
|
||||
for byte_range in seq.as_slice() {
|
||||
set.remove_all(byte_range.start, byte_range.end);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
HirKind::Class(hir::Class::Bytes(ref cls)) => {
|
||||
for range in cls.iter() {
|
||||
set.remove_all(range.start(), range.end());
|
||||
}
|
||||
}
|
||||
HirKind::Repetition(ref x) => {
|
||||
remove_matching_bytes(&x.hir, set);
|
||||
}
|
||||
HirKind::Group(ref x) => {
|
||||
remove_matching_bytes(&x.hir, set);
|
||||
}
|
||||
HirKind::Concat(ref xs) => {
|
||||
for x in xs {
|
||||
remove_matching_bytes(x, set);
|
||||
}
|
||||
}
|
||||
HirKind::Alternation(ref xs) => {
|
||||
for x in xs {
|
||||
remove_matching_bytes(x, set);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use grep_matcher::ByteSet;
|
||||
use regex_syntax::ParserBuilder;
|
||||
|
||||
use super::non_matching_bytes;
|
||||
|
||||
fn extract(pattern: &str) -> ByteSet {
|
||||
let expr = ParserBuilder::new()
|
||||
.allow_invalid_utf8(true)
|
||||
.build()
|
||||
.parse(pattern)
|
||||
.unwrap();
|
||||
non_matching_bytes(&expr)
|
||||
}
|
||||
|
||||
fn sparse(set: &ByteSet) -> Vec<u8> {
|
||||
let mut sparse_set = vec![];
|
||||
for b in (0..256).map(|b| b as u8) {
|
||||
if set.contains(b) {
|
||||
sparse_set.push(b);
|
||||
}
|
||||
}
|
||||
sparse_set
|
||||
}
|
||||
|
||||
fn sparse_except(except: &[u8]) -> Vec<u8> {
|
||||
let mut except_set = vec![false; 256];
|
||||
for &b in except {
|
||||
except_set[b as usize] = true;
|
||||
}
|
||||
|
||||
let mut set = vec![];
|
||||
for b in (0..256).map(|b| b as u8) {
|
||||
if !except_set[b as usize] {
|
||||
set.push(b);
|
||||
}
|
||||
}
|
||||
set
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dot() {
|
||||
assert_eq!(sparse(&extract(".")), vec![
|
||||
b'\n',
|
||||
192, 193, 245, 246, 247, 248, 249,
|
||||
250, 251, 252, 253, 254, 255,
|
||||
]);
|
||||
assert_eq!(sparse(&extract("(?s).")), vec![
|
||||
192, 193, 245, 246, 247, 248, 249,
|
||||
250, 251, 252, 253, 254, 255,
|
||||
]);
|
||||
assert_eq!(sparse(&extract("(?-u).")), vec![b'\n']);
|
||||
assert_eq!(sparse(&extract("(?s-u).")), vec![]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn literal() {
|
||||
assert_eq!(sparse(&extract("a")), sparse_except(&[b'a']));
|
||||
assert_eq!(sparse(&extract("☃")), sparse_except(&[0xE2, 0x98, 0x83]));
|
||||
assert_eq!(sparse(&extract(r"\xFF")), sparse_except(&[0xC3, 0xBF]));
|
||||
assert_eq!(sparse(&extract(r"(?-u)\xFF")), sparse_except(&[0xFF]));
|
||||
}
|
||||
}
|
||||
@@ -1,154 +0,0 @@
|
||||
use grep_matcher::LineTerminator;
|
||||
use regex_syntax::hir::{self, Hir, HirKind};
|
||||
|
||||
use error::{Error, ErrorKind};
|
||||
|
||||
/// Return an HIR that is guaranteed to never match the given line terminator,
|
||||
/// if possible.
|
||||
///
|
||||
/// If the transformation isn't possible, then an error is returned.
|
||||
///
|
||||
/// In general, if a literal line terminator occurs anywhere in the HIR, then
|
||||
/// this will return an error. However, if the line terminator occurs within
|
||||
/// a character class with at least one other character (that isn't also a line
|
||||
/// terminator), then the line terminator is simply stripped from that class.
|
||||
///
|
||||
/// If the given line terminator is not ASCII, then this function returns an
|
||||
/// error.
|
||||
pub fn strip_from_match(
|
||||
expr: Hir,
|
||||
line_term: LineTerminator,
|
||||
) -> Result<Hir, Error> {
|
||||
if line_term.is_crlf() {
|
||||
let expr1 = strip_from_match_ascii(expr, b'\r')?;
|
||||
strip_from_match_ascii(expr1, b'\n')
|
||||
} else {
|
||||
let b = line_term.as_byte();
|
||||
if b > 0x7F {
|
||||
return Err(Error::new(ErrorKind::InvalidLineTerminator(b)));
|
||||
}
|
||||
strip_from_match_ascii(expr, b)
|
||||
}
|
||||
}
|
||||
|
||||
/// The implementation of strip_from_match. The given byte must be ASCII. This
|
||||
/// function panics otherwise.
|
||||
fn strip_from_match_ascii(
|
||||
expr: Hir,
|
||||
byte: u8,
|
||||
) -> Result<Hir, Error> {
|
||||
assert!(byte <= 0x7F);
|
||||
let chr = byte as char;
|
||||
assert_eq!(chr.len_utf8(), 1);
|
||||
|
||||
let invalid = || Err(Error::new(ErrorKind::NotAllowed(chr.to_string())));
|
||||
|
||||
Ok(match expr.into_kind() {
|
||||
HirKind::Empty => Hir::empty(),
|
||||
HirKind::Literal(hir::Literal::Unicode(c)) => {
|
||||
if c == chr {
|
||||
return invalid();
|
||||
}
|
||||
Hir::literal(hir::Literal::Unicode(c))
|
||||
}
|
||||
HirKind::Literal(hir::Literal::Byte(b)) => {
|
||||
if b as char == chr {
|
||||
return invalid();
|
||||
}
|
||||
Hir::literal(hir::Literal::Byte(b))
|
||||
}
|
||||
HirKind::Class(hir::Class::Unicode(mut cls)) => {
|
||||
let remove = hir::ClassUnicode::new(Some(
|
||||
hir::ClassUnicodeRange::new(chr, chr),
|
||||
));
|
||||
cls.difference(&remove);
|
||||
if cls.ranges().is_empty() {
|
||||
return invalid();
|
||||
}
|
||||
Hir::class(hir::Class::Unicode(cls))
|
||||
}
|
||||
HirKind::Class(hir::Class::Bytes(mut cls)) => {
|
||||
let remove = hir::ClassBytes::new(Some(
|
||||
hir::ClassBytesRange::new(byte, byte),
|
||||
));
|
||||
cls.difference(&remove);
|
||||
if cls.ranges().is_empty() {
|
||||
return invalid();
|
||||
}
|
||||
Hir::class(hir::Class::Bytes(cls))
|
||||
}
|
||||
HirKind::Anchor(x) => Hir::anchor(x),
|
||||
HirKind::WordBoundary(x) => Hir::word_boundary(x),
|
||||
HirKind::Repetition(mut x) => {
|
||||
x.hir = Box::new(strip_from_match_ascii(*x.hir, byte)?);
|
||||
Hir::repetition(x)
|
||||
}
|
||||
HirKind::Group(mut x) => {
|
||||
x.hir = Box::new(strip_from_match_ascii(*x.hir, byte)?);
|
||||
Hir::group(x)
|
||||
}
|
||||
HirKind::Concat(xs) => {
|
||||
let xs = xs.into_iter()
|
||||
.map(|e| strip_from_match_ascii(e, byte))
|
||||
.collect::<Result<Vec<Hir>, Error>>()?;
|
||||
Hir::concat(xs)
|
||||
}
|
||||
HirKind::Alternation(xs) => {
|
||||
let xs = xs.into_iter()
|
||||
.map(|e| strip_from_match_ascii(e, byte))
|
||||
.collect::<Result<Vec<Hir>, Error>>()?;
|
||||
Hir::alternation(xs)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use regex_syntax::Parser;
|
||||
|
||||
use error::Error;
|
||||
use super::{LineTerminator, strip_from_match};
|
||||
|
||||
fn roundtrip(pattern: &str, byte: u8) -> String {
|
||||
roundtrip_line_term(pattern, LineTerminator::byte(byte)).unwrap()
|
||||
}
|
||||
|
||||
fn roundtrip_crlf(pattern: &str) -> String {
|
||||
roundtrip_line_term(pattern, LineTerminator::crlf()).unwrap()
|
||||
}
|
||||
|
||||
fn roundtrip_err(pattern: &str, byte: u8) -> Result<String, Error> {
|
||||
roundtrip_line_term(pattern, LineTerminator::byte(byte))
|
||||
}
|
||||
|
||||
fn roundtrip_line_term(
|
||||
pattern: &str,
|
||||
line_term: LineTerminator,
|
||||
) -> Result<String, Error> {
|
||||
let expr1 = Parser::new().parse(pattern).unwrap();
|
||||
let expr2 = strip_from_match(expr1, line_term)?;
|
||||
Ok(expr2.to_string())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn various() {
|
||||
assert_eq!(roundtrip(r"[a\n]", b'\n'), "[a]");
|
||||
assert_eq!(roundtrip(r"[a\n]", b'a'), "[\n]");
|
||||
assert_eq!(roundtrip_crlf(r"[a\n]"), "[a]");
|
||||
assert_eq!(roundtrip_crlf(r"[a\r]"), "[a]");
|
||||
assert_eq!(roundtrip_crlf(r"[a\r\n]"), "[a]");
|
||||
|
||||
assert_eq!(roundtrip(r"(?-u)\s", b'a'), r"(?-u:[\x09-\x0D\x20])");
|
||||
assert_eq!(roundtrip(r"(?-u)\s", b'\n'), r"(?-u:[\x09\x0B-\x0D\x20])");
|
||||
|
||||
assert!(roundtrip_err(r"\n", b'\n').is_err());
|
||||
assert!(roundtrip_err(r"abc\n", b'\n').is_err());
|
||||
assert!(roundtrip_err(r"\nabc", b'\n').is_err());
|
||||
assert!(roundtrip_err(r"abc\nxyz", b'\n').is_err());
|
||||
assert!(roundtrip_err(r"\x0A", b'\n').is_err());
|
||||
assert!(roundtrip_err(r"\u000A", b'\n').is_err());
|
||||
assert!(roundtrip_err(r"\U0000000A", b'\n').is_err());
|
||||
assert!(roundtrip_err(r"\u{A}", b'\n').is_err());
|
||||
assert!(roundtrip_err("\n", b'\n').is_err());
|
||||
}
|
||||
}
|
||||
@@ -1,29 +0,0 @@
|
||||
/// Converts an arbitrary sequence of bytes to a literal suitable for building
|
||||
/// a regular expression.
|
||||
pub fn bytes_to_regex(bs: &[u8]) -> String {
|
||||
use std::fmt::Write;
|
||||
use regex_syntax::is_meta_character;
|
||||
|
||||
let mut s = String::with_capacity(bs.len());
|
||||
for &b in bs {
|
||||
if b <= 0x7F && !is_meta_character(b as char) {
|
||||
write!(s, r"{}", b as char).unwrap();
|
||||
} else {
|
||||
write!(s, r"\x{:02x}", b).unwrap();
|
||||
}
|
||||
}
|
||||
s
|
||||
}
|
||||
|
||||
/// Converts arbitrary bytes to a nice string.
|
||||
pub fn show_bytes(bs: &[u8]) -> String {
|
||||
use std::ascii::escape_default;
|
||||
use std::str;
|
||||
|
||||
let mut nice = String::new();
|
||||
for &b in bs {
|
||||
let part: Vec<u8> = escape_default(b).collect();
|
||||
nice.push_str(str::from_utf8(&part).unwrap());
|
||||
}
|
||||
nice
|
||||
}
|
||||
@@ -1,203 +0,0 @@
|
||||
use std::collections::HashMap;
|
||||
use std::cell::RefCell;
|
||||
use std::sync::Arc;
|
||||
|
||||
use grep_matcher::{Match, Matcher, NoError};
|
||||
use regex::bytes::{CaptureLocations, Regex};
|
||||
use thread_local::CachedThreadLocal;
|
||||
|
||||
use config::ConfiguredHIR;
|
||||
use error::Error;
|
||||
use matcher::RegexCaptures;
|
||||
|
||||
/// A matcher for implementing "word match" semantics.
|
||||
#[derive(Debug)]
|
||||
pub struct WordMatcher {
|
||||
/// The regex which is roughly `(?:^|\W)(<original pattern>)(?:$|\W)`.
|
||||
regex: Regex,
|
||||
/// A map from capture group name to capture group index.
|
||||
names: HashMap<String, usize>,
|
||||
/// A reusable buffer for finding the match location of the inner group.
|
||||
locs: Arc<CachedThreadLocal<RefCell<CaptureLocations>>>,
|
||||
}
|
||||
|
||||
impl Clone for WordMatcher {
|
||||
fn clone(&self) -> WordMatcher {
|
||||
// We implement Clone manually so that we get a fresh CachedThreadLocal
|
||||
// such that it can set its own thread owner. This permits each thread
|
||||
// usings `locs` to hit the fast path.
|
||||
WordMatcher {
|
||||
regex: self.regex.clone(),
|
||||
names: self.names.clone(),
|
||||
locs: Arc::new(CachedThreadLocal::new()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl WordMatcher {
|
||||
/// Create a new matcher from the given pattern that only produces matches
|
||||
/// that are considered "words."
|
||||
///
|
||||
/// The given options are used to construct the regular expression
|
||||
/// internally.
|
||||
pub fn new(expr: &ConfiguredHIR) -> Result<WordMatcher, Error> {
|
||||
let word_expr = expr.with_pattern(|pat| {
|
||||
format!(r"(?:(?m:^)|\W)({})(?:(?m:$)|\W)", pat)
|
||||
})?;
|
||||
let regex = word_expr.regex()?;
|
||||
let locs = Arc::new(CachedThreadLocal::new());
|
||||
|
||||
let mut names = HashMap::new();
|
||||
for (i, optional_name) in regex.capture_names().enumerate() {
|
||||
if let Some(name) = optional_name {
|
||||
names.insert(name.to_string(), i.checked_sub(1).unwrap());
|
||||
}
|
||||
}
|
||||
Ok(WordMatcher { regex, names, locs })
|
||||
}
|
||||
|
||||
/// Return the underlying regex used by this matcher.
|
||||
pub fn regex(&self) -> &Regex {
|
||||
&self.regex
|
||||
}
|
||||
}
|
||||
|
||||
impl Matcher for WordMatcher {
|
||||
type Captures = RegexCaptures;
|
||||
type Error = NoError;
|
||||
|
||||
fn find_at(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
) -> Result<Option<Match>, NoError> {
|
||||
// To make this easy to get right, we extract captures here instead of
|
||||
// calling `find_at`. The actual match is at capture group `1` instead
|
||||
// of `0`. We *could* use `find_at` here and then trim the match after
|
||||
// the fact, but that's a bit harder to get right, and it's not clear
|
||||
// if it's worth it.
|
||||
|
||||
let cell = self.locs.get_or(|| {
|
||||
Box::new(RefCell::new(self.regex.capture_locations()))
|
||||
});
|
||||
let mut caps = cell.borrow_mut();
|
||||
self.regex.captures_read_at(&mut caps, haystack, at);
|
||||
Ok(caps.get(1).map(|m| Match::new(m.0, m.1)))
|
||||
}
|
||||
|
||||
fn new_captures(&self) -> Result<RegexCaptures, NoError> {
|
||||
Ok(RegexCaptures::with_offset(self.regex.capture_locations(), 1))
|
||||
}
|
||||
|
||||
fn capture_count(&self) -> usize {
|
||||
self.regex.captures_len().checked_sub(1).unwrap()
|
||||
}
|
||||
|
||||
fn capture_index(&self, name: &str) -> Option<usize> {
|
||||
self.names.get(name).map(|i| *i)
|
||||
}
|
||||
|
||||
fn captures_at(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
caps: &mut RegexCaptures,
|
||||
) -> Result<bool, NoError> {
|
||||
let r = self.regex.captures_read_at(
|
||||
caps.locations_mut(), haystack, at,
|
||||
);
|
||||
Ok(r.is_some())
|
||||
}
|
||||
|
||||
// We specifically do not implement other methods like find_iter or
|
||||
// captures_iter. Namely, the iter methods are guaranteed to be correct
|
||||
// by virtue of implementing find_at and captures_at above.
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use grep_matcher::{Captures, Match, Matcher};
|
||||
use config::Config;
|
||||
use super::WordMatcher;
|
||||
|
||||
fn matcher(pattern: &str) -> WordMatcher {
|
||||
let chir = Config::default().hir(pattern).unwrap();
|
||||
WordMatcher::new(&chir).unwrap()
|
||||
}
|
||||
|
||||
fn find(pattern: &str, haystack: &str) -> Option<(usize, usize)> {
|
||||
matcher(pattern)
|
||||
.find(haystack.as_bytes())
|
||||
.unwrap()
|
||||
.map(|m| (m.start(), m.end()))
|
||||
}
|
||||
|
||||
fn find_by_caps(pattern: &str, haystack: &str) -> Option<(usize, usize)> {
|
||||
let m = matcher(pattern);
|
||||
let mut caps = m.new_captures().unwrap();
|
||||
if !m.captures(haystack.as_bytes(), &mut caps).unwrap() {
|
||||
None
|
||||
} else {
|
||||
caps.get(0).map(|m| (m.start(), m.end()))
|
||||
}
|
||||
}
|
||||
|
||||
// Test that the standard `find` API reports offsets correctly.
|
||||
#[test]
|
||||
fn various_find() {
|
||||
assert_eq!(Some((0, 3)), find(r"foo", "foo"));
|
||||
assert_eq!(Some((0, 3)), find(r"foo", "foo("));
|
||||
assert_eq!(Some((1, 4)), find(r"foo", "!foo("));
|
||||
assert_eq!(None, find(r"foo", "!afoo("));
|
||||
|
||||
assert_eq!(Some((0, 3)), find(r"foo", "foo☃"));
|
||||
assert_eq!(None, find(r"foo", "fooб"));
|
||||
// assert_eq!(Some((0, 3)), find(r"foo", "fooб"));
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/389
|
||||
assert_eq!(Some((0, 2)), find(r"-2", "-2"));
|
||||
}
|
||||
|
||||
// Test that the captures API also reports offsets correctly, just as
|
||||
// find does. This exercises a different path in the code since captures
|
||||
// are handled differently.
|
||||
#[test]
|
||||
fn various_captures() {
|
||||
assert_eq!(Some((0, 3)), find_by_caps(r"foo", "foo"));
|
||||
assert_eq!(Some((0, 3)), find_by_caps(r"foo", "foo("));
|
||||
assert_eq!(Some((1, 4)), find_by_caps(r"foo", "!foo("));
|
||||
assert_eq!(None, find_by_caps(r"foo", "!afoo("));
|
||||
|
||||
assert_eq!(Some((0, 3)), find_by_caps(r"foo", "foo☃"));
|
||||
assert_eq!(None, find_by_caps(r"foo", "fooб"));
|
||||
// assert_eq!(Some((0, 3)), find_by_caps(r"foo", "fooб"));
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/389
|
||||
assert_eq!(Some((0, 2)), find_by_caps(r"-2", "-2"));
|
||||
}
|
||||
|
||||
// Test that the capture reporting methods work as advertised.
|
||||
#[test]
|
||||
fn capture_indexing() {
|
||||
let m = matcher(r"(a)(?P<foo>b)(c)");
|
||||
assert_eq!(4, m.capture_count());
|
||||
assert_eq!(Some(2), m.capture_index("foo"));
|
||||
|
||||
let mut caps = m.new_captures().unwrap();
|
||||
assert_eq!(4, caps.len());
|
||||
|
||||
assert!(m.captures(b"abc", &mut caps).unwrap());
|
||||
assert_eq!(caps.get(0), Some(Match::new(0, 3)));
|
||||
assert_eq!(caps.get(1), Some(Match::new(0, 1)));
|
||||
assert_eq!(caps.get(2), Some(Match::new(1, 2)));
|
||||
assert_eq!(caps.get(3), Some(Match::new(2, 3)));
|
||||
assert_eq!(caps.get(4), None);
|
||||
|
||||
assert!(m.captures(b"#abc#", &mut caps).unwrap());
|
||||
assert_eq!(caps.get(0), Some(Match::new(1, 4)));
|
||||
assert_eq!(caps.get(1), Some(Match::new(1, 2)));
|
||||
assert_eq!(caps.get(2), Some(Match::new(2, 3)));
|
||||
assert_eq!(caps.get(3), Some(Match::new(3, 4)));
|
||||
assert_eq!(caps.get(4), None);
|
||||
}
|
||||
}
|
||||
@@ -1,33 +0,0 @@
|
||||
[package]
|
||||
name = "grep-searcher"
|
||||
version = "0.1.4" #:version
|
||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||
description = """
|
||||
Fast line oriented regex searching as a library.
|
||||
"""
|
||||
documentation = "https://docs.rs/grep-searcher"
|
||||
homepage = "https://github.com/BurntSushi/ripgrep"
|
||||
repository = "https://github.com/BurntSushi/ripgrep"
|
||||
readme = "README.md"
|
||||
keywords = ["regex", "grep", "egrep", "search", "pattern"]
|
||||
license = "Unlicense/MIT"
|
||||
|
||||
[dependencies]
|
||||
bstr = { version = "0.1.2", default-features = false, features = ["std"] }
|
||||
bytecount = "0.5"
|
||||
encoding_rs = "0.8.14"
|
||||
encoding_rs_io = "0.1.6"
|
||||
grep-matcher = { version = "0.1.2", path = "../grep-matcher" }
|
||||
log = "0.4.5"
|
||||
memmap = "0.7"
|
||||
|
||||
[dev-dependencies]
|
||||
grep-regex = { version = "0.1.3", path = "../grep-regex" }
|
||||
regex = "1.1"
|
||||
|
||||
[features]
|
||||
default = ["bytecount/runtime-dispatch-simd"]
|
||||
simd-accel = ["encoding_rs/simd-accel"]
|
||||
|
||||
# This feature is DEPRECATED. Runtime dispatch is used for SIMD now.
|
||||
avx-accel = []
|
||||
@@ -1,21 +0,0 @@
|
||||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2015 Andrew Gallant
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
@@ -1,37 +0,0 @@
|
||||
grep-searcher
|
||||
-------------
|
||||
A high level library for executing fast line oriented searches. This handles
|
||||
things like reporting contextual lines, counting lines, inverting a search,
|
||||
detecting binary data, automatic UTF-16 transcoding and deciding whether or not
|
||||
to use memory maps.
|
||||
|
||||
[](https://travis-ci.org/BurntSushi/ripgrep)
|
||||
[](https://ci.appveyor.com/project/BurntSushi/ripgrep)
|
||||
[](https://crates.io/crates/grep-searcher)
|
||||
|
||||
Dual-licensed under MIT or the [UNLICENSE](http://unlicense.org).
|
||||
|
||||
### Documentation
|
||||
|
||||
[https://docs.rs/grep-searcher](https://docs.rs/grep-searcher)
|
||||
|
||||
**NOTE:** You probably don't want to use this crate directly. Instead, you
|
||||
should prefer the facade defined in the
|
||||
[`grep`](https://docs.rs/grep)
|
||||
crate.
|
||||
|
||||
|
||||
### Usage
|
||||
|
||||
Add this to your `Cargo.toml`:
|
||||
|
||||
```toml
|
||||
[dependencies]
|
||||
grep-searcher = "0.1"
|
||||
```
|
||||
|
||||
and this to your crate root:
|
||||
|
||||
```rust
|
||||
extern crate grep_searcher;
|
||||
```
|
||||
@@ -1,24 +0,0 @@
|
||||
This is free and unencumbered software released into the public domain.
|
||||
|
||||
Anyone is free to copy, modify, publish, use, compile, sell, or
|
||||
distribute this software, either in source code form or as a compiled
|
||||
binary, for any purpose, commercial or non-commercial, and by any
|
||||
means.
|
||||
|
||||
In jurisdictions that recognize copyright laws, the author or authors
|
||||
of this software dedicate any and all copyright interest in the
|
||||
software to the public domain. We make this dedication for the benefit
|
||||
of the public at large and to the detriment of our heirs and
|
||||
successors. We intend this dedication to be an overt act of
|
||||
relinquishment in perpetuity of all present and future rights to this
|
||||
software under copyright law.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
For more information, please refer to <http://unlicense.org/>
|
||||
@@ -1,33 +0,0 @@
|
||||
extern crate grep_regex;
|
||||
extern crate grep_searcher;
|
||||
|
||||
use std::env;
|
||||
use std::error::Error;
|
||||
use std::io;
|
||||
use std::process;
|
||||
|
||||
use grep_regex::RegexMatcher;
|
||||
use grep_searcher::Searcher;
|
||||
use grep_searcher::sinks::UTF8;
|
||||
|
||||
fn main() {
|
||||
if let Err(err) = example() {
|
||||
eprintln!("{}", err);
|
||||
process::exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
fn example() -> Result<(), Box<Error>> {
|
||||
let pattern = match env::args().nth(1) {
|
||||
Some(pattern) => pattern,
|
||||
None => return Err(From::from(format!(
|
||||
"Usage: search-stdin <pattern>"
|
||||
))),
|
||||
};
|
||||
let matcher = RegexMatcher::new(&pattern)?;
|
||||
Searcher::new().search_reader(&matcher, io::stdin(), UTF8(|lnum, line| {
|
||||
print!("{}:{}", lnum, line);
|
||||
Ok(true)
|
||||
}))?;
|
||||
Ok(())
|
||||
}
|
||||
@@ -1,132 +0,0 @@
|
||||
/*!
|
||||
This crate provides an implementation of line oriented search, with optional
|
||||
support for multi-line search.
|
||||
|
||||
# Brief overview
|
||||
|
||||
The principle type in this crate is a
|
||||
[`Searcher`](struct.Searcher.html),
|
||||
which can be configured and built by a
|
||||
[`SearcherBuilder`](struct.SearcherBuilder.html).
|
||||
A `Searcher` is responsible for reading bytes from a source (e.g., a file),
|
||||
executing a search of those bytes using a `Matcher` (e.g., a regex) and then
|
||||
reporting the results of that search to a
|
||||
[`Sink`](trait.Sink.html)
|
||||
(e.g., stdout). The `Searcher` itself is principally responsible for managing
|
||||
the consumption of bytes from a source and applying a `Matcher` over those
|
||||
bytes in an efficient way. The `Searcher` is also responsible for inverting
|
||||
a search, counting lines, reporting contextual lines, detecting binary data
|
||||
and even deciding whether or not to use memory maps.
|
||||
|
||||
A `Matcher` (which is defined in the
|
||||
[`grep-matcher`](https://crates.io/crates/grep-matcher)
|
||||
crate) is a trait for describing the lowest levels of pattern search in a
|
||||
generic way. The interface itself is very similar to the interface of a regular
|
||||
expression. For example, the
|
||||
[`grep-regex`](https://crates.io/crates/grep-regex)
|
||||
crate provides an implementation of the `Matcher` trait using Rust's
|
||||
[`regex`](https://crates.io/crates/regex)
|
||||
crate.
|
||||
|
||||
Finally, a `Sink` describes how callers receive search results producer by a
|
||||
`Searcher`. This includes routines that are called at the beginning and end of
|
||||
a search, in addition to routines that are called when matching or contextual
|
||||
lines are found by the `Searcher`. Implementations of `Sink` can be trivially
|
||||
simple, or extraordinarily complex, such as the
|
||||
`Standard` printer found in the
|
||||
[`grep-printer`](https://crates.io/crates/grep-printer)
|
||||
crate, which effectively implements grep-like output.
|
||||
This crate also provides convenience `Sink` implementations in the
|
||||
[`sinks`](sinks/index.html)
|
||||
sub-module for easy searching with closures.
|
||||
|
||||
# Example
|
||||
|
||||
This example shows how to execute the searcher and read the search results
|
||||
using the
|
||||
[`UTF8`](sinks/struct.UTF8.html)
|
||||
implementation of `Sink`.
|
||||
|
||||
```
|
||||
extern crate grep_matcher;
|
||||
extern crate grep_regex;
|
||||
extern crate grep_searcher;
|
||||
|
||||
use std::error::Error;
|
||||
|
||||
use grep_matcher::Matcher;
|
||||
use grep_regex::RegexMatcher;
|
||||
use grep_searcher::Searcher;
|
||||
use grep_searcher::sinks::UTF8;
|
||||
|
||||
const SHERLOCK: &'static [u8] = b"\
|
||||
For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
Holmeses, success in the province of detective work must always
|
||||
be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
can extract a clew from a wisp of straw or a flake of cigar ash;
|
||||
but Doctor Watson has to have it taken out for him and dusted,
|
||||
and exhibited clearly, with a label attached.
|
||||
";
|
||||
|
||||
# fn main() { example().unwrap() }
|
||||
fn example() -> Result<(), Box<Error>> {
|
||||
let matcher = RegexMatcher::new(r"Doctor \w+")?;
|
||||
let mut matches: Vec<(u64, String)> = vec![];
|
||||
Searcher::new().search_slice(&matcher, SHERLOCK, UTF8(|lnum, line| {
|
||||
// We are guaranteed to find a match, so the unwrap is OK.
|
||||
let mymatch = matcher.find(line.as_bytes())?.unwrap();
|
||||
matches.push((lnum, line[mymatch].to_string()));
|
||||
Ok(true)
|
||||
}))?;
|
||||
|
||||
assert_eq!(matches.len(), 2);
|
||||
assert_eq!(
|
||||
matches[0],
|
||||
(1, "Doctor Watsons".to_string())
|
||||
);
|
||||
assert_eq!(
|
||||
matches[1],
|
||||
(5, "Doctor Watson".to_string())
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
```
|
||||
|
||||
See also `examples/search-stdin.rs` from the root of this crate's directory
|
||||
to see a similar example that accepts a pattern on the command line and
|
||||
searches stdin.
|
||||
*/
|
||||
|
||||
#![deny(missing_docs)]
|
||||
|
||||
extern crate bstr;
|
||||
extern crate bytecount;
|
||||
extern crate encoding_rs;
|
||||
extern crate encoding_rs_io;
|
||||
extern crate grep_matcher;
|
||||
#[macro_use]
|
||||
extern crate log;
|
||||
extern crate memmap;
|
||||
#[cfg(test)]
|
||||
extern crate regex;
|
||||
|
||||
pub use lines::{LineIter, LineStep};
|
||||
pub use searcher::{
|
||||
BinaryDetection, ConfigError, Encoding, MmapChoice,
|
||||
Searcher, SearcherBuilder,
|
||||
};
|
||||
pub use sink::{
|
||||
Sink, SinkError,
|
||||
SinkContext, SinkContextKind, SinkFinish, SinkMatch,
|
||||
};
|
||||
pub use sink::sinks;
|
||||
|
||||
#[macro_use]
|
||||
mod macros;
|
||||
|
||||
mod line_buffer;
|
||||
mod lines;
|
||||
mod searcher;
|
||||
mod sink;
|
||||
#[cfg(test)]
|
||||
mod testutil;
|
||||
@@ -1,947 +0,0 @@
|
||||
use std::cmp;
|
||||
use std::io;
|
||||
|
||||
use bstr::{BStr, BString};
|
||||
|
||||
/// The default buffer capacity that we use for the line buffer.
|
||||
pub(crate) const DEFAULT_BUFFER_CAPACITY: usize = 8 * (1<<10); // 8 KB
|
||||
|
||||
/// The behavior of a searcher in the face of long lines and big contexts.
|
||||
///
|
||||
/// When searching data incrementally using a fixed size buffer, this controls
|
||||
/// the amount of *additional* memory to allocate beyond the size of the buffer
|
||||
/// to accommodate lines (which may include the lines in a context window, when
|
||||
/// enabled) that do not fit in the buffer.
|
||||
///
|
||||
/// The default is to eagerly allocate without a limit.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub enum BufferAllocation {
|
||||
/// Attempt to expand the size of the buffer until either at least the next
|
||||
/// line fits into memory or until all available memory is exhausted.
|
||||
///
|
||||
/// This is the default.
|
||||
Eager,
|
||||
/// Limit the amount of additional memory allocated to the given size. If
|
||||
/// a line is found that requires more memory than is allowed here, then
|
||||
/// stop reading and return an error.
|
||||
Error(usize),
|
||||
}
|
||||
|
||||
impl Default for BufferAllocation {
|
||||
fn default() -> BufferAllocation {
|
||||
BufferAllocation::Eager
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a new error to be used when a configured allocation limit has been
|
||||
/// reached.
|
||||
pub fn alloc_error(limit: usize) -> io::Error {
|
||||
let msg = format!("configured allocation limit ({}) exceeded", limit);
|
||||
io::Error::new(io::ErrorKind::Other, msg)
|
||||
}
|
||||
|
||||
/// The behavior of binary detection in the line buffer.
|
||||
///
|
||||
/// Binary detection is the process of _heuristically_ identifying whether a
|
||||
/// given chunk of data is binary or not, and then taking an action based on
|
||||
/// the result of that heuristic. The motivation behind detecting binary data
|
||||
/// is that binary data often indicates data that is undesirable to search
|
||||
/// using textual patterns. Of course, there are many cases in which this isn't
|
||||
/// true, which is why binary detection is disabled by default.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub enum BinaryDetection {
|
||||
/// No binary detection is performed. Data reported by the line buffer may
|
||||
/// contain arbitrary bytes.
|
||||
None,
|
||||
/// The given byte is searched in all contents read by the line buffer. If
|
||||
/// it occurs, then the data is considered binary and the line buffer acts
|
||||
/// as if it reached EOF. The line buffer guarantees that this byte will
|
||||
/// never be observable by callers.
|
||||
Quit(u8),
|
||||
/// The given byte is searched in all contents read by the line buffer. If
|
||||
/// it occurs, then it is replaced by the line terminator. The line buffer
|
||||
/// guarantees that this byte will never be observable by callers.
|
||||
Convert(u8),
|
||||
}
|
||||
|
||||
impl Default for BinaryDetection {
|
||||
fn default() -> BinaryDetection {
|
||||
BinaryDetection::None
|
||||
}
|
||||
}
|
||||
|
||||
impl BinaryDetection {
|
||||
/// Returns true if and only if the detection heuristic demands that
|
||||
/// the line buffer stop read data once binary data is observed.
|
||||
fn is_quit(&self) -> bool {
|
||||
match *self {
|
||||
BinaryDetection::Quit(_) => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The configuration of a buffer. This contains options that are fixed once
|
||||
/// a buffer has been constructed.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
struct Config {
|
||||
/// The number of bytes to attempt to read at a time.
|
||||
capacity: usize,
|
||||
/// The line terminator.
|
||||
lineterm: u8,
|
||||
/// The behavior for handling long lines.
|
||||
buffer_alloc: BufferAllocation,
|
||||
/// When set, the presence of the given byte indicates binary content.
|
||||
binary: BinaryDetection,
|
||||
}
|
||||
|
||||
impl Default for Config {
|
||||
fn default() -> Config {
|
||||
Config {
|
||||
capacity: DEFAULT_BUFFER_CAPACITY,
|
||||
lineterm: b'\n',
|
||||
buffer_alloc: BufferAllocation::default(),
|
||||
binary: BinaryDetection::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A builder for constructing line buffers.
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct LineBufferBuilder {
|
||||
config: Config,
|
||||
}
|
||||
|
||||
impl LineBufferBuilder {
|
||||
/// Create a new builder for a buffer.
|
||||
pub fn new() -> LineBufferBuilder {
|
||||
LineBufferBuilder { config: Config::default() }
|
||||
}
|
||||
|
||||
/// Create a new line buffer from this builder's configuration.
|
||||
pub fn build(&self) -> LineBuffer {
|
||||
LineBuffer {
|
||||
config: self.config,
|
||||
buf: BString::from(vec![0; self.config.capacity]),
|
||||
pos: 0,
|
||||
last_lineterm: 0,
|
||||
end: 0,
|
||||
absolute_byte_offset: 0,
|
||||
binary_byte_offset: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Set the default capacity to use for a buffer.
|
||||
///
|
||||
/// In general, the capacity of a buffer corresponds to the amount of data
|
||||
/// to hold in memory, and the size of the reads to make to the underlying
|
||||
/// reader.
|
||||
///
|
||||
/// This is set to a reasonable default and probably shouldn't be changed
|
||||
/// unless there's a specific reason to do so.
|
||||
pub fn capacity(&mut self, capacity: usize) -> &mut LineBufferBuilder {
|
||||
self.config.capacity = capacity;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the line terminator for the buffer.
|
||||
///
|
||||
/// Every buffer has a line terminator, and this line terminator is used
|
||||
/// to determine how to roll the buffer forward. For example, when a read
|
||||
/// to the buffer's underlying reader occurs, the end of the data that is
|
||||
/// read is likely to correspond to an incomplete line. As a line buffer,
|
||||
/// callers should not access this data since it is incomplete. The line
|
||||
/// terminator is how the line buffer determines the part of the read that
|
||||
/// is incomplete.
|
||||
///
|
||||
/// By default, this is set to `b'\n'`.
|
||||
pub fn line_terminator(&mut self, lineterm: u8) -> &mut LineBufferBuilder {
|
||||
self.config.lineterm = lineterm;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the maximum amount of additional memory to allocate for long lines.
|
||||
///
|
||||
/// In order to enable line oriented search, a fundamental requirement is
|
||||
/// that, at a minimum, each line must be able to fit into memory. This
|
||||
/// setting controls how big that line is allowed to be. By default, this
|
||||
/// is set to `BufferAllocation::Eager`, which means a line buffer will
|
||||
/// attempt to allocate as much memory as possible to fit a line, and will
|
||||
/// only be limited by available memory.
|
||||
///
|
||||
/// Note that this setting only applies to the amount of *additional*
|
||||
/// memory to allocate, beyond the capacity of the buffer. That means that
|
||||
/// a value of `0` is sensible, and in particular, will guarantee that a
|
||||
/// line buffer will never allocate additional memory beyond its initial
|
||||
/// capacity.
|
||||
pub fn buffer_alloc(
|
||||
&mut self,
|
||||
behavior: BufferAllocation,
|
||||
) -> &mut LineBufferBuilder {
|
||||
self.config.buffer_alloc = behavior;
|
||||
self
|
||||
}
|
||||
|
||||
/// Whether to enable binary detection or not. Depending on the setting,
|
||||
/// this can either cause the line buffer to report EOF early or it can
|
||||
/// cause the line buffer to clean the data.
|
||||
///
|
||||
/// By default, this is disabled. In general, binary detection should be
|
||||
/// viewed as an imperfect heuristic.
|
||||
pub fn binary_detection(
|
||||
&mut self,
|
||||
detection: BinaryDetection,
|
||||
) -> &mut LineBufferBuilder {
|
||||
self.config.binary = detection;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
/// A line buffer reader efficiently reads a line oriented buffer from an
|
||||
/// arbitrary reader.
|
||||
#[derive(Debug)]
|
||||
pub struct LineBufferReader<'b, R> {
|
||||
rdr: R,
|
||||
line_buffer: &'b mut LineBuffer,
|
||||
}
|
||||
|
||||
impl<'b, R: io::Read> LineBufferReader<'b, R> {
|
||||
/// Create a new buffered reader that reads from `rdr` and uses the given
|
||||
/// `line_buffer` as an intermediate buffer.
|
||||
///
|
||||
/// This does not change the binary detection behavior of the given line
|
||||
/// buffer.
|
||||
pub fn new(
|
||||
rdr: R,
|
||||
line_buffer: &'b mut LineBuffer,
|
||||
) -> LineBufferReader<'b, R> {
|
||||
line_buffer.clear();
|
||||
LineBufferReader { rdr, line_buffer }
|
||||
}
|
||||
|
||||
/// The absolute byte offset which corresponds to the starting offsets
|
||||
/// of the data returned by `buffer` relative to the beginning of the
|
||||
/// underlying reader's contents. As such, this offset does not generally
|
||||
/// correspond to an offset in memory. It is typically used for reporting
|
||||
/// purposes. It can also be used for counting the number of bytes that
|
||||
/// have been searched.
|
||||
pub fn absolute_byte_offset(&self) -> u64 {
|
||||
self.line_buffer.absolute_byte_offset()
|
||||
}
|
||||
|
||||
/// If binary data was detected, then this returns the absolute byte offset
|
||||
/// at which binary data was initially found.
|
||||
pub fn binary_byte_offset(&self) -> Option<u64> {
|
||||
self.line_buffer.binary_byte_offset()
|
||||
}
|
||||
|
||||
/// Fill the contents of this buffer by discarding the part of the buffer
|
||||
/// that has been consumed. The free space created by discarding the
|
||||
/// consumed part of the buffer is then filled with new data from the
|
||||
/// reader.
|
||||
///
|
||||
/// If EOF is reached, then `false` is returned. Otherwise, `true` is
|
||||
/// returned. (Note that if this line buffer's binary detection is set to
|
||||
/// `Quit`, then the presence of binary data will cause this buffer to
|
||||
/// behave as if it had seen EOF at the first occurrence of binary data.)
|
||||
///
|
||||
/// This forwards any errors returned by the underlying reader, and will
|
||||
/// also return an error if the buffer must be expanded past its allocation
|
||||
/// limit, as governed by the buffer allocation strategy.
|
||||
pub fn fill(&mut self) -> Result<bool, io::Error> {
|
||||
self.line_buffer.fill(&mut self.rdr)
|
||||
}
|
||||
|
||||
/// Return the contents of this buffer.
|
||||
pub fn buffer(&self) -> &[u8] {
|
||||
self.line_buffer.buffer().as_bytes()
|
||||
}
|
||||
|
||||
/// Return the underlying buffer as a byte string. Used for tests only.
|
||||
#[cfg(test)]
|
||||
fn bstr(&self) -> &BStr {
|
||||
self.line_buffer.buffer()
|
||||
}
|
||||
|
||||
/// Consume the number of bytes provided. This must be less than or equal
|
||||
/// to the number of bytes returned by `buffer`.
|
||||
pub fn consume(&mut self, amt: usize) {
|
||||
self.line_buffer.consume(amt);
|
||||
}
|
||||
|
||||
/// Consumes the remainder of the buffer. Subsequent calls to `buffer` are
|
||||
/// guaranteed to return an empty slice until the buffer is refilled.
|
||||
///
|
||||
/// This is a convenience function for `consume(buffer.len())`.
|
||||
#[cfg(test)]
|
||||
fn consume_all(&mut self) {
|
||||
self.line_buffer.consume_all();
|
||||
}
|
||||
}
|
||||
|
||||
/// A line buffer manages a (typically fixed) buffer for holding lines.
|
||||
///
|
||||
/// Callers should create line buffers sparingly and reuse them when possible.
|
||||
/// Line buffers cannot be used directly, but instead must be used via the
|
||||
/// LineBufferReader.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct LineBuffer {
|
||||
/// The configuration of this buffer.
|
||||
config: Config,
|
||||
/// The primary buffer with which to hold data.
|
||||
buf: BString,
|
||||
/// The current position of this buffer. This is always a valid sliceable
|
||||
/// index into `buf`, and its maximum value is the length of `buf`.
|
||||
pos: usize,
|
||||
/// The end position of searchable content in this buffer. This is either
|
||||
/// set to just after the final line terminator in the buffer, or to just
|
||||
/// after the end of the last byte emitted by the reader when the reader
|
||||
/// has been exhausted.
|
||||
last_lineterm: usize,
|
||||
/// The end position of the buffer. This is always greater than or equal to
|
||||
/// last_lineterm. The bytes between last_lineterm and end, if any, always
|
||||
/// correspond to a partial line.
|
||||
end: usize,
|
||||
/// The absolute byte offset corresponding to `pos`. This is most typically
|
||||
/// not a valid index into addressable memory, but rather, an offset that
|
||||
/// is relative to all data that passes through a line buffer (since
|
||||
/// construction or since the last time `clear` was called).
|
||||
///
|
||||
/// When the line buffer reaches EOF, this is set to the position just
|
||||
/// after the last byte read from the underlying reader. That is, it
|
||||
/// becomes the total count of bytes that have been read.
|
||||
absolute_byte_offset: u64,
|
||||
/// If binary data was found, this records the absolute byte offset at
|
||||
/// which it was first detected.
|
||||
binary_byte_offset: Option<u64>,
|
||||
}
|
||||
|
||||
impl LineBuffer {
|
||||
/// Set the binary detection method used on this line buffer.
|
||||
///
|
||||
/// This permits dynamically changing the binary detection strategy on
|
||||
/// an existing line buffer without needing to create a new one.
|
||||
pub fn set_binary_detection(&mut self, binary: BinaryDetection) {
|
||||
self.config.binary = binary;
|
||||
}
|
||||
|
||||
/// Reset this buffer, such that it can be used with a new reader.
|
||||
fn clear(&mut self) {
|
||||
self.pos = 0;
|
||||
self.last_lineterm = 0;
|
||||
self.end = 0;
|
||||
self.absolute_byte_offset = 0;
|
||||
self.binary_byte_offset = None;
|
||||
}
|
||||
|
||||
/// The absolute byte offset which corresponds to the starting offsets
|
||||
/// of the data returned by `buffer` relative to the beginning of the
|
||||
/// reader's contents. As such, this offset does not generally correspond
|
||||
/// to an offset in memory. It is typically used for reporting purposes,
|
||||
/// particularly in error messages.
|
||||
///
|
||||
/// This is reset to `0` when `clear` is called.
|
||||
fn absolute_byte_offset(&self) -> u64 {
|
||||
self.absolute_byte_offset
|
||||
}
|
||||
|
||||
/// If binary data was detected, then this returns the absolute byte offset
|
||||
/// at which binary data was initially found.
|
||||
fn binary_byte_offset(&self) -> Option<u64> {
|
||||
self.binary_byte_offset
|
||||
}
|
||||
|
||||
/// Return the contents of this buffer.
|
||||
fn buffer(&self) -> &BStr {
|
||||
&self.buf[self.pos..self.last_lineterm]
|
||||
}
|
||||
|
||||
/// Return the contents of the free space beyond the end of the buffer as
|
||||
/// a mutable slice.
|
||||
fn free_buffer(&mut self) -> &mut BStr {
|
||||
&mut self.buf[self.end..]
|
||||
}
|
||||
|
||||
/// Consume the number of bytes provided. This must be less than or equal
|
||||
/// to the number of bytes returned by `buffer`.
|
||||
fn consume(&mut self, amt: usize) {
|
||||
assert!(amt <= self.buffer().len());
|
||||
self.pos += amt;
|
||||
self.absolute_byte_offset += amt as u64;
|
||||
}
|
||||
|
||||
/// Consumes the remainder of the buffer. Subsequent calls to `buffer` are
|
||||
/// guaranteed to return an empty slice until the buffer is refilled.
|
||||
///
|
||||
/// This is a convenience function for `consume(buffer.len())`.
|
||||
#[cfg(test)]
|
||||
fn consume_all(&mut self) {
|
||||
let amt = self.buffer().len();
|
||||
self.consume(amt);
|
||||
}
|
||||
|
||||
/// Fill the contents of this buffer by discarding the part of the buffer
|
||||
/// that has been consumed. The free space created by discarding the
|
||||
/// consumed part of the buffer is then filled with new data from the given
|
||||
/// reader.
|
||||
///
|
||||
/// Callers should provide the same reader to this line buffer in
|
||||
/// subsequent calls to fill. A different reader can only be used
|
||||
/// immediately following a call to `clear`.
|
||||
///
|
||||
/// If EOF is reached, then `false` is returned. Otherwise, `true` is
|
||||
/// returned. (Note that if this line buffer's binary detection is set to
|
||||
/// `Quit`, then the presence of binary data will cause this buffer to
|
||||
/// behave as if it had seen EOF.)
|
||||
///
|
||||
/// This forwards any errors returned by `rdr`, and will also return an
|
||||
/// error if the buffer must be expanded past its allocation limit, as
|
||||
/// governed by the buffer allocation strategy.
|
||||
fn fill<R: io::Read>(&mut self, mut rdr: R) -> Result<bool, io::Error> {
|
||||
// If the binary detection heuristic tells us to quit once binary data
|
||||
// has been observed, then we no longer read new data and reach EOF
|
||||
// once the current buffer has been consumed.
|
||||
if self.config.binary.is_quit() && self.binary_byte_offset.is_some() {
|
||||
return Ok(!self.buffer().is_empty());
|
||||
}
|
||||
|
||||
self.roll();
|
||||
assert_eq!(self.pos, 0);
|
||||
loop {
|
||||
self.ensure_capacity()?;
|
||||
let readlen = rdr.read(self.free_buffer().as_bytes_mut())?;
|
||||
if readlen == 0 {
|
||||
// We're only done reading for good once the caller has
|
||||
// consumed everything.
|
||||
self.last_lineterm = self.end;
|
||||
return Ok(!self.buffer().is_empty());
|
||||
}
|
||||
|
||||
// Get a mutable view into the bytes we've just read. These are
|
||||
// the bytes that we do binary detection on, and also the bytes we
|
||||
// search to find the last line terminator. We need a mutable slice
|
||||
// in the case of binary conversion.
|
||||
let oldend = self.end;
|
||||
self.end += readlen;
|
||||
let newbytes = &mut self.buf[oldend..self.end];
|
||||
|
||||
// Binary detection.
|
||||
match self.config.binary {
|
||||
BinaryDetection::None => {} // nothing to do
|
||||
BinaryDetection::Quit(byte) => {
|
||||
if let Some(i) = newbytes.find_byte(byte) {
|
||||
self.end = oldend + i;
|
||||
self.last_lineterm = self.end;
|
||||
self.binary_byte_offset =
|
||||
Some(self.absolute_byte_offset + self.end as u64);
|
||||
// If the first byte in our buffer is a binary byte,
|
||||
// then our buffer is empty and we should report as
|
||||
// such to the caller.
|
||||
return Ok(self.pos < self.end);
|
||||
}
|
||||
}
|
||||
BinaryDetection::Convert(byte) => {
|
||||
if let Some(i) = replace_bytes(
|
||||
newbytes,
|
||||
byte,
|
||||
self.config.lineterm,
|
||||
) {
|
||||
// Record only the first binary offset.
|
||||
if self.binary_byte_offset.is_none() {
|
||||
self.binary_byte_offset =
|
||||
Some(self.absolute_byte_offset
|
||||
+ (oldend + i) as u64);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Update our `last_lineterm` positions if we read one.
|
||||
if let Some(i) = newbytes.rfind_byte(self.config.lineterm) {
|
||||
self.last_lineterm = oldend + i + 1;
|
||||
return Ok(true);
|
||||
}
|
||||
// At this point, if we couldn't find a line terminator, then we
|
||||
// don't have a complete line. Therefore, we try to read more!
|
||||
}
|
||||
}
|
||||
|
||||
/// Roll the unconsumed parts of the buffer to the front.
|
||||
///
|
||||
/// This operation is idempotent.
|
||||
///
|
||||
/// After rolling, `last_lineterm` and `end` point to the same location,
|
||||
/// and `pos` is always set to `0`.
|
||||
fn roll(&mut self) {
|
||||
if self.pos == self.end {
|
||||
self.pos = 0;
|
||||
self.last_lineterm = 0;
|
||||
self.end = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
let roll_len = self.end - self.pos;
|
||||
self.buf.copy_within(self.pos.., 0);
|
||||
self.pos = 0;
|
||||
self.last_lineterm = roll_len;
|
||||
self.end = roll_len;
|
||||
}
|
||||
|
||||
/// Ensures that the internal buffer has a non-zero amount of free space
|
||||
/// in which to read more data. If there is no free space, then more is
|
||||
/// allocated. If the allocation must exceed the configured limit, then
|
||||
/// this returns an error.
|
||||
fn ensure_capacity(&mut self) -> Result<(), io::Error> {
|
||||
if !self.free_buffer().is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
// `len` is used for computing the next allocation size. The capacity
|
||||
// is permitted to start at `0`, so we make sure it's at least `1`.
|
||||
let len = cmp::max(1, self.buf.len());
|
||||
let additional = match self.config.buffer_alloc {
|
||||
BufferAllocation::Eager => len * 2,
|
||||
BufferAllocation::Error(limit) => {
|
||||
let used = self.buf.len() - self.config.capacity;
|
||||
let n = cmp::min(len * 2, limit - used);
|
||||
if n == 0 {
|
||||
return Err(alloc_error(self.config.capacity + limit));
|
||||
}
|
||||
n
|
||||
}
|
||||
};
|
||||
assert!(additional > 0);
|
||||
let newlen = self.buf.len() + additional;
|
||||
self.buf.resize(newlen, 0);
|
||||
assert!(!self.free_buffer().is_empty());
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Replaces `src` with `replacement` in bytes, and return the offset of the
|
||||
/// first replacement, if one exists.
|
||||
fn replace_bytes(bytes: &mut BStr, src: u8, replacement: u8) -> Option<usize> {
|
||||
if src == replacement {
|
||||
return None;
|
||||
}
|
||||
let mut first_pos = None;
|
||||
let mut pos = 0;
|
||||
while let Some(i) = bytes[pos..].find_byte(src).map(|i| pos + i) {
|
||||
if first_pos.is_none() {
|
||||
first_pos = Some(i);
|
||||
}
|
||||
bytes[i] = replacement;
|
||||
pos = i + 1;
|
||||
while bytes.get(pos) == Some(&src) {
|
||||
bytes[pos] = replacement;
|
||||
pos += 1;
|
||||
}
|
||||
}
|
||||
first_pos
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::str;
|
||||
use bstr::BString;
|
||||
use super::*;
|
||||
|
||||
const SHERLOCK: &'static str = "\
|
||||
For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
Holmeses, success in the province of detective work must always
|
||||
be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
can extract a clew from a wisp of straw or a flake of cigar ash;
|
||||
but Doctor Watson has to have it taken out for him and dusted,
|
||||
and exhibited clearly, with a label attached.\
|
||||
";
|
||||
|
||||
fn s(slice: &str) -> String {
|
||||
slice.to_string()
|
||||
}
|
||||
|
||||
fn replace_str(
|
||||
slice: &str,
|
||||
src: u8,
|
||||
replacement: u8,
|
||||
) -> (String, Option<usize>) {
|
||||
let mut dst = BString::from(slice);
|
||||
let result = replace_bytes(&mut dst, src, replacement);
|
||||
(dst.into_string().unwrap(), result)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn replace() {
|
||||
assert_eq!(replace_str("abc", b'b', b'z'), (s("azc"), Some(1)));
|
||||
assert_eq!(replace_str("abb", b'b', b'z'), (s("azz"), Some(1)));
|
||||
assert_eq!(replace_str("aba", b'a', b'z'), (s("zbz"), Some(0)));
|
||||
assert_eq!(replace_str("bbb", b'b', b'z'), (s("zzz"), Some(0)));
|
||||
assert_eq!(replace_str("bac", b'b', b'z'), (s("zac"), Some(0)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn buffer_basics1() {
|
||||
let bytes = "homer\nlisa\nmaggie";
|
||||
let mut linebuf = LineBufferBuilder::new().build();
|
||||
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
|
||||
|
||||
assert!(rdr.buffer().is_empty());
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(rdr.bstr(), "homer\nlisa\n");
|
||||
assert_eq!(rdr.absolute_byte_offset(), 0);
|
||||
rdr.consume(5);
|
||||
assert_eq!(rdr.absolute_byte_offset(), 5);
|
||||
rdr.consume_all();
|
||||
assert_eq!(rdr.absolute_byte_offset(), 11);
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(rdr.bstr(), "maggie");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(!rdr.fill().unwrap());
|
||||
assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64);
|
||||
assert_eq!(rdr.binary_byte_offset(), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn buffer_basics2() {
|
||||
let bytes = "homer\nlisa\nmaggie\n";
|
||||
let mut linebuf = LineBufferBuilder::new().build();
|
||||
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(rdr.bstr(), "homer\nlisa\nmaggie\n");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(!rdr.fill().unwrap());
|
||||
assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64);
|
||||
assert_eq!(rdr.binary_byte_offset(), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn buffer_basics3() {
|
||||
let bytes = "\n";
|
||||
let mut linebuf = LineBufferBuilder::new().build();
|
||||
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(rdr.bstr(), "\n");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(!rdr.fill().unwrap());
|
||||
assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64);
|
||||
assert_eq!(rdr.binary_byte_offset(), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn buffer_basics4() {
|
||||
let bytes = "\n\n";
|
||||
let mut linebuf = LineBufferBuilder::new().build();
|
||||
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(rdr.bstr(), "\n\n");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(!rdr.fill().unwrap());
|
||||
assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64);
|
||||
assert_eq!(rdr.binary_byte_offset(), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn buffer_empty() {
|
||||
let bytes = "";
|
||||
let mut linebuf = LineBufferBuilder::new().build();
|
||||
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
|
||||
|
||||
assert!(!rdr.fill().unwrap());
|
||||
assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64);
|
||||
assert_eq!(rdr.binary_byte_offset(), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn buffer_zero_capacity() {
|
||||
let bytes = "homer\nlisa\nmaggie";
|
||||
let mut linebuf = LineBufferBuilder::new().capacity(0).build();
|
||||
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
|
||||
|
||||
while rdr.fill().unwrap() {
|
||||
rdr.consume_all();
|
||||
}
|
||||
assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64);
|
||||
assert_eq!(rdr.binary_byte_offset(), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn buffer_small_capacity() {
|
||||
let bytes = "homer\nlisa\nmaggie";
|
||||
let mut linebuf = LineBufferBuilder::new().capacity(1).build();
|
||||
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
|
||||
|
||||
let mut got = BString::new();
|
||||
while rdr.fill().unwrap() {
|
||||
got.push(rdr.buffer());
|
||||
rdr.consume_all();
|
||||
}
|
||||
assert_eq!(bytes, got);
|
||||
assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64);
|
||||
assert_eq!(rdr.binary_byte_offset(), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn buffer_limited_capacity1() {
|
||||
let bytes = "homer\nlisa\nmaggie";
|
||||
let mut linebuf = LineBufferBuilder::new()
|
||||
.capacity(1)
|
||||
.buffer_alloc(BufferAllocation::Error(5))
|
||||
.build();
|
||||
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(rdr.bstr(), "homer\n");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(rdr.bstr(), "lisa\n");
|
||||
rdr.consume_all();
|
||||
|
||||
// This returns an error because while we have just enough room to
|
||||
// store maggie in the buffer, we *don't* have enough room to read one
|
||||
// more byte, so we don't know whether we're at EOF or not, and
|
||||
// therefore must give up.
|
||||
assert!(rdr.fill().is_err());
|
||||
|
||||
// We can mush on though!
|
||||
assert_eq!(rdr.bstr(), "m");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(rdr.bstr(), "aggie");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(!rdr.fill().unwrap());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn buffer_limited_capacity2() {
|
||||
let bytes = "homer\nlisa\nmaggie";
|
||||
let mut linebuf = LineBufferBuilder::new()
|
||||
.capacity(1)
|
||||
.buffer_alloc(BufferAllocation::Error(6))
|
||||
.build();
|
||||
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(rdr.bstr(), "homer\n");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(rdr.bstr(), "lisa\n");
|
||||
rdr.consume_all();
|
||||
|
||||
// We have just enough space.
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(rdr.bstr(), "maggie");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(!rdr.fill().unwrap());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn buffer_limited_capacity3() {
|
||||
let bytes = "homer\nlisa\nmaggie";
|
||||
let mut linebuf = LineBufferBuilder::new()
|
||||
.capacity(1)
|
||||
.buffer_alloc(BufferAllocation::Error(0))
|
||||
.build();
|
||||
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
|
||||
|
||||
assert!(rdr.fill().is_err());
|
||||
assert_eq!(rdr.bstr(), "");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn buffer_binary_none() {
|
||||
let bytes = "homer\nli\x00sa\nmaggie\n";
|
||||
let mut linebuf = LineBufferBuilder::new().build();
|
||||
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
|
||||
|
||||
assert!(rdr.buffer().is_empty());
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(rdr.bstr(), "homer\nli\x00sa\nmaggie\n");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(!rdr.fill().unwrap());
|
||||
assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64);
|
||||
assert_eq!(rdr.binary_byte_offset(), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn buffer_binary_quit1() {
|
||||
let bytes = "homer\nli\x00sa\nmaggie\n";
|
||||
let mut linebuf = LineBufferBuilder::new()
|
||||
.binary_detection(BinaryDetection::Quit(b'\x00'))
|
||||
.build();
|
||||
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
|
||||
|
||||
assert!(rdr.buffer().is_empty());
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(rdr.bstr(), "homer\nli");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(!rdr.fill().unwrap());
|
||||
assert_eq!(rdr.absolute_byte_offset(), 8);
|
||||
assert_eq!(rdr.binary_byte_offset(), Some(8));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn buffer_binary_quit2() {
|
||||
let bytes = "\x00homer\nlisa\nmaggie\n";
|
||||
let mut linebuf = LineBufferBuilder::new()
|
||||
.binary_detection(BinaryDetection::Quit(b'\x00'))
|
||||
.build();
|
||||
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
|
||||
|
||||
assert!(!rdr.fill().unwrap());
|
||||
assert_eq!(rdr.bstr(), "");
|
||||
assert_eq!(rdr.absolute_byte_offset(), 0);
|
||||
assert_eq!(rdr.binary_byte_offset(), Some(0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn buffer_binary_quit3() {
|
||||
let bytes = "homer\nlisa\nmaggie\n\x00";
|
||||
let mut linebuf = LineBufferBuilder::new()
|
||||
.binary_detection(BinaryDetection::Quit(b'\x00'))
|
||||
.build();
|
||||
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
|
||||
|
||||
assert!(rdr.buffer().is_empty());
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(rdr.bstr(), "homer\nlisa\nmaggie\n");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(!rdr.fill().unwrap());
|
||||
assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64 - 1);
|
||||
assert_eq!(rdr.binary_byte_offset(), Some(bytes.len() as u64 - 1));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn buffer_binary_quit4() {
|
||||
let bytes = "homer\nlisa\nmaggie\x00\n";
|
||||
let mut linebuf = LineBufferBuilder::new()
|
||||
.binary_detection(BinaryDetection::Quit(b'\x00'))
|
||||
.build();
|
||||
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
|
||||
|
||||
assert!(rdr.buffer().is_empty());
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(rdr.bstr(), "homer\nlisa\nmaggie");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(!rdr.fill().unwrap());
|
||||
assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64 - 2);
|
||||
assert_eq!(rdr.binary_byte_offset(), Some(bytes.len() as u64 - 2));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn buffer_binary_quit5() {
|
||||
let mut linebuf = LineBufferBuilder::new()
|
||||
.binary_detection(BinaryDetection::Quit(b'u'))
|
||||
.build();
|
||||
let mut rdr = LineBufferReader::new(SHERLOCK.as_bytes(), &mut linebuf);
|
||||
|
||||
assert!(rdr.buffer().is_empty());
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(rdr.bstr(), "\
|
||||
For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
Holmeses, s\
|
||||
");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(!rdr.fill().unwrap());
|
||||
assert_eq!(rdr.absolute_byte_offset(), 76);
|
||||
assert_eq!(rdr.binary_byte_offset(), Some(76));
|
||||
assert_eq!(SHERLOCK.as_bytes()[76], b'u');
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn buffer_binary_convert1() {
|
||||
let bytes = "homer\nli\x00sa\nmaggie\n";
|
||||
let mut linebuf = LineBufferBuilder::new()
|
||||
.binary_detection(BinaryDetection::Convert(b'\x00'))
|
||||
.build();
|
||||
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
|
||||
|
||||
assert!(rdr.buffer().is_empty());
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(rdr.bstr(), "homer\nli\nsa\nmaggie\n");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(!rdr.fill().unwrap());
|
||||
assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64);
|
||||
assert_eq!(rdr.binary_byte_offset(), Some(8));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn buffer_binary_convert2() {
|
||||
let bytes = "\x00homer\nlisa\nmaggie\n";
|
||||
let mut linebuf = LineBufferBuilder::new()
|
||||
.binary_detection(BinaryDetection::Convert(b'\x00'))
|
||||
.build();
|
||||
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
|
||||
|
||||
assert!(rdr.buffer().is_empty());
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(rdr.bstr(), "\nhomer\nlisa\nmaggie\n");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(!rdr.fill().unwrap());
|
||||
assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64);
|
||||
assert_eq!(rdr.binary_byte_offset(), Some(0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn buffer_binary_convert3() {
|
||||
let bytes = "homer\nlisa\nmaggie\n\x00";
|
||||
let mut linebuf = LineBufferBuilder::new()
|
||||
.binary_detection(BinaryDetection::Convert(b'\x00'))
|
||||
.build();
|
||||
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
|
||||
|
||||
assert!(rdr.buffer().is_empty());
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(rdr.bstr(), "homer\nlisa\nmaggie\n\n");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(!rdr.fill().unwrap());
|
||||
assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64);
|
||||
assert_eq!(rdr.binary_byte_offset(), Some(bytes.len() as u64 - 1));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn buffer_binary_convert4() {
|
||||
let bytes = "homer\nlisa\nmaggie\x00\n";
|
||||
let mut linebuf = LineBufferBuilder::new()
|
||||
.binary_detection(BinaryDetection::Convert(b'\x00'))
|
||||
.build();
|
||||
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
|
||||
|
||||
assert!(rdr.buffer().is_empty());
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(rdr.bstr(), "homer\nlisa\nmaggie\n\n");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(!rdr.fill().unwrap());
|
||||
assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64);
|
||||
assert_eq!(rdr.binary_byte_offset(), Some(bytes.len() as u64 - 2));
|
||||
}
|
||||
}
|
||||
@@ -1,464 +0,0 @@
|
||||
/*!
|
||||
A collection of routines for performing operations on lines.
|
||||
*/
|
||||
|
||||
use bstr::B;
|
||||
use bytecount;
|
||||
use grep_matcher::{LineTerminator, Match};
|
||||
|
||||
/// An iterator over lines in a particular slice of bytes.
|
||||
///
|
||||
/// Line terminators are considered part of the line they terminate. All lines
|
||||
/// yielded by the iterator are guaranteed to be non-empty.
|
||||
///
|
||||
/// `'b` refers to the lifetime of the underlying bytes.
|
||||
#[derive(Debug)]
|
||||
pub struct LineIter<'b> {
|
||||
bytes: &'b [u8],
|
||||
stepper: LineStep,
|
||||
}
|
||||
|
||||
impl<'b> LineIter<'b> {
|
||||
/// Create a new line iterator that yields lines in the given bytes that
|
||||
/// are terminated by `line_term`.
|
||||
pub fn new(line_term: u8, bytes: &'b [u8]) -> LineIter<'b> {
|
||||
LineIter {
|
||||
bytes: bytes,
|
||||
stepper: LineStep::new(line_term, 0, bytes.len()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'b> Iterator for LineIter<'b> {
|
||||
type Item = &'b [u8];
|
||||
|
||||
fn next(&mut self) -> Option<&'b [u8]> {
|
||||
self.stepper.next_match(self.bytes).map(|m| &self.bytes[m])
|
||||
}
|
||||
}
|
||||
|
||||
/// An explicit iterator over lines in a particular slice of bytes.
|
||||
///
|
||||
/// This iterator avoids borrowing the bytes themselves, and instead requires
|
||||
/// callers to explicitly provide the bytes when moving through the iterator.
|
||||
/// While not idiomatic, this provides a simple way of iterating over lines
|
||||
/// that doesn't require borrowing the slice itself, which can be convenient.
|
||||
///
|
||||
/// Line terminators are considered part of the line they terminate. All lines
|
||||
/// yielded by the iterator are guaranteed to be non-empty.
|
||||
#[derive(Debug)]
|
||||
pub struct LineStep {
|
||||
line_term: u8,
|
||||
pos: usize,
|
||||
end: usize,
|
||||
}
|
||||
|
||||
impl LineStep {
|
||||
/// Create a new line iterator over the given range of bytes using the
|
||||
/// given line terminator.
|
||||
///
|
||||
/// Callers should provide the actual bytes for each call to `next`. The
|
||||
/// same slice must be provided to each call.
|
||||
///
|
||||
/// This panics if `start` is not less than or equal to `end`.
|
||||
pub fn new(line_term: u8, start: usize, end: usize) -> LineStep {
|
||||
LineStep { line_term, pos: start, end: end }
|
||||
}
|
||||
|
||||
/// Return the start and end position of the next line in the given bytes.
|
||||
///
|
||||
/// The caller must past exactly the same slice of bytes for each call to
|
||||
/// `next`.
|
||||
///
|
||||
/// The range returned includes the line terminator. Ranges are always
|
||||
/// non-empty.
|
||||
pub fn next(&mut self, bytes: &[u8]) -> Option<(usize, usize)> {
|
||||
self.next_impl(bytes)
|
||||
}
|
||||
|
||||
/// Like next, but returns a `Match` instead of a tuple.
|
||||
#[inline(always)]
|
||||
pub(crate) fn next_match(&mut self, bytes: &[u8]) -> Option<Match> {
|
||||
self.next_impl(bytes).map(|(s, e)| Match::new(s, e))
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn next_impl(&mut self, mut bytes: &[u8]) -> Option<(usize, usize)> {
|
||||
bytes = &bytes[..self.end];
|
||||
match B(&bytes[self.pos..]).find_byte(self.line_term) {
|
||||
None => {
|
||||
if self.pos < bytes.len() {
|
||||
let m = (self.pos, bytes.len());
|
||||
assert!(m.0 <= m.1);
|
||||
|
||||
self.pos = m.1;
|
||||
Some(m)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
Some(line_end) => {
|
||||
let m = (self.pos, self.pos + line_end + 1);
|
||||
assert!(m.0 <= m.1);
|
||||
|
||||
self.pos = m.1;
|
||||
Some(m)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Count the number of occurrences of `line_term` in `bytes`.
|
||||
pub fn count(bytes: &[u8], line_term: u8) -> u64 {
|
||||
bytecount::count(bytes, line_term) as u64
|
||||
}
|
||||
|
||||
/// Given a line that possibly ends with a terminator, return that line without
|
||||
/// the terminator.
|
||||
#[inline(always)]
|
||||
pub fn without_terminator(bytes: &[u8], line_term: LineTerminator) -> &[u8] {
|
||||
let line_term = line_term.as_bytes();
|
||||
let start = bytes.len().saturating_sub(line_term.len());
|
||||
if bytes.get(start..) == Some(line_term) {
|
||||
return &bytes[..bytes.len() - line_term.len()];
|
||||
}
|
||||
bytes
|
||||
}
|
||||
|
||||
/// Return the start and end offsets of the lines containing the given range
|
||||
/// of bytes.
|
||||
///
|
||||
/// Line terminators are considered part of the line they terminate.
|
||||
#[inline(always)]
|
||||
pub fn locate(
|
||||
bytes: &[u8],
|
||||
line_term: u8,
|
||||
range: Match,
|
||||
) -> Match {
|
||||
let line_start = B(&bytes[..range.start()])
|
||||
.rfind_byte(line_term)
|
||||
.map_or(0, |i| i + 1);
|
||||
let line_end =
|
||||
if range.end() > line_start && bytes[range.end() - 1] == line_term {
|
||||
range.end()
|
||||
} else {
|
||||
B(&bytes[range.end()..])
|
||||
.find_byte(line_term)
|
||||
.map_or(bytes.len(), |i| range.end() + i + 1)
|
||||
};
|
||||
Match::new(line_start, line_end)
|
||||
}
|
||||
|
||||
/// Returns the minimal starting offset of the line that occurs `count` lines
|
||||
/// before the last line in `bytes`.
|
||||
///
|
||||
/// Lines are terminated by `line_term`. If `count` is zero, then this returns
|
||||
/// the starting offset of the last line in `bytes`.
|
||||
///
|
||||
/// If `bytes` ends with a line terminator, then the terminator itself is
|
||||
/// considered part of the last line.
|
||||
pub fn preceding(bytes: &[u8], line_term: u8, count: usize) -> usize {
|
||||
preceding_by_pos(bytes, bytes.len(), line_term, count)
|
||||
}
|
||||
|
||||
/// Returns the minimal starting offset of the line that occurs `count` lines
|
||||
/// before the line containing `pos`. Lines are terminated by `line_term`.
|
||||
/// If `count` is zero, then this returns the starting offset of the line
|
||||
/// containing `pos`.
|
||||
///
|
||||
/// If `pos` points just past a line terminator, then it is considered part of
|
||||
/// the line that it terminates. For example, given `bytes = b"abc\nxyz\n"`
|
||||
/// and `pos = 7`, `preceding(bytes, pos, b'\n', 0)` returns `4` (as does `pos
|
||||
/// = 8`) and `preceding(bytes, pos, `b'\n', 1)` returns `0`.
|
||||
fn preceding_by_pos(
|
||||
bytes: &[u8],
|
||||
mut pos: usize,
|
||||
line_term: u8,
|
||||
mut count: usize,
|
||||
) -> usize {
|
||||
if pos == 0 {
|
||||
return 0;
|
||||
} else if bytes[pos - 1] == line_term {
|
||||
pos -= 1;
|
||||
}
|
||||
loop {
|
||||
match B(&bytes[..pos]).rfind_byte(line_term) {
|
||||
None => {
|
||||
return 0;
|
||||
}
|
||||
Some(i) => {
|
||||
if count == 0 {
|
||||
return i + 1;
|
||||
} else if i == 0 {
|
||||
return 0;
|
||||
}
|
||||
count -= 1;
|
||||
pos = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::ops::Range;
|
||||
use std::str;
|
||||
use grep_matcher::Match;
|
||||
use super::*;
|
||||
|
||||
const SHERLOCK: &'static str = "\
|
||||
For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
Holmeses, success in the province of detective work must always
|
||||
be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
can extract a clew from a wisp of straw or a flake of cigar ash;
|
||||
but Doctor Watson has to have it taken out for him and dusted,
|
||||
and exhibited clearly, with a label attached.\
|
||||
";
|
||||
|
||||
fn m(start: usize, end: usize) -> Match {
|
||||
Match::new(start, end)
|
||||
}
|
||||
|
||||
fn lines(text: &str) -> Vec<&str> {
|
||||
let mut results = vec![];
|
||||
let mut it = LineStep::new(b'\n', 0, text.len());
|
||||
while let Some(m) = it.next_match(text.as_bytes()) {
|
||||
results.push(&text[m]);
|
||||
}
|
||||
results
|
||||
}
|
||||
|
||||
fn line_ranges(text: &str) -> Vec<Range<usize>> {
|
||||
let mut results = vec![];
|
||||
let mut it = LineStep::new(b'\n', 0, text.len());
|
||||
while let Some(m) = it.next_match(text.as_bytes()) {
|
||||
results.push(m.start()..m.end());
|
||||
}
|
||||
results
|
||||
}
|
||||
|
||||
fn prev(text: &str, pos: usize, count: usize) -> usize {
|
||||
preceding_by_pos(text.as_bytes(), pos, b'\n', count)
|
||||
}
|
||||
|
||||
fn loc(text: &str, start: usize, end: usize) -> Match {
|
||||
locate(text.as_bytes(), b'\n', Match::new(start, end))
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn line_count() {
|
||||
assert_eq!(0, count(b"", b'\n'));
|
||||
assert_eq!(1, count(b"\n", b'\n'));
|
||||
assert_eq!(2, count(b"\n\n", b'\n'));
|
||||
assert_eq!(2, count(b"a\nb\nc", b'\n'));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn line_locate() {
|
||||
let t = SHERLOCK;
|
||||
let lines = line_ranges(t);
|
||||
|
||||
assert_eq!(
|
||||
loc(t, lines[0].start, lines[0].end),
|
||||
m(lines[0].start, lines[0].end));
|
||||
assert_eq!(
|
||||
loc(t, lines[0].start + 1, lines[0].end),
|
||||
m(lines[0].start, lines[0].end));
|
||||
assert_eq!(
|
||||
loc(t, lines[0].end - 1, lines[0].end),
|
||||
m(lines[0].start, lines[0].end));
|
||||
assert_eq!(
|
||||
loc(t, lines[0].end, lines[0].end),
|
||||
m(lines[1].start, lines[1].end));
|
||||
|
||||
assert_eq!(
|
||||
loc(t, lines[5].start, lines[5].end),
|
||||
m(lines[5].start, lines[5].end));
|
||||
assert_eq!(
|
||||
loc(t, lines[5].start + 1, lines[5].end),
|
||||
m(lines[5].start, lines[5].end));
|
||||
assert_eq!(
|
||||
loc(t, lines[5].end - 1, lines[5].end),
|
||||
m(lines[5].start, lines[5].end));
|
||||
assert_eq!(
|
||||
loc(t, lines[5].end, lines[5].end),
|
||||
m(lines[5].start, lines[5].end));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn line_locate_weird() {
|
||||
assert_eq!(loc("", 0, 0), m(0, 0));
|
||||
|
||||
assert_eq!(loc("\n", 0, 1), m(0, 1));
|
||||
assert_eq!(loc("\n", 1, 1), m(1, 1));
|
||||
|
||||
assert_eq!(loc("\n\n", 0, 0), m(0, 1));
|
||||
assert_eq!(loc("\n\n", 0, 1), m(0, 1));
|
||||
assert_eq!(loc("\n\n", 1, 1), m(1, 2));
|
||||
assert_eq!(loc("\n\n", 1, 2), m(1, 2));
|
||||
assert_eq!(loc("\n\n", 2, 2), m(2, 2));
|
||||
|
||||
assert_eq!(loc("a\nb\nc", 0, 1), m(0, 2));
|
||||
assert_eq!(loc("a\nb\nc", 1, 2), m(0, 2));
|
||||
assert_eq!(loc("a\nb\nc", 2, 3), m(2, 4));
|
||||
assert_eq!(loc("a\nb\nc", 3, 4), m(2, 4));
|
||||
assert_eq!(loc("a\nb\nc", 4, 5), m(4, 5));
|
||||
assert_eq!(loc("a\nb\nc", 5, 5), m(4, 5));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn line_iter() {
|
||||
assert_eq!(lines("abc"), vec!["abc"]);
|
||||
|
||||
assert_eq!(lines("abc\n"), vec!["abc\n"]);
|
||||
assert_eq!(lines("abc\nxyz"), vec!["abc\n", "xyz"]);
|
||||
assert_eq!(lines("abc\nxyz\n"), vec!["abc\n", "xyz\n"]);
|
||||
|
||||
assert_eq!(lines("abc\n\n"), vec!["abc\n", "\n"]);
|
||||
assert_eq!(lines("abc\n\n\n"), vec!["abc\n", "\n", "\n"]);
|
||||
assert_eq!(lines("abc\n\nxyz"), vec!["abc\n", "\n", "xyz"]);
|
||||
assert_eq!(lines("abc\n\nxyz\n"), vec!["abc\n", "\n", "xyz\n"]);
|
||||
assert_eq!(lines("abc\nxyz\n\n"), vec!["abc\n", "xyz\n", "\n"]);
|
||||
|
||||
assert_eq!(lines("\n"), vec!["\n"]);
|
||||
assert_eq!(lines(""), Vec::<&str>::new());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn line_iter_empty() {
|
||||
let mut it = LineStep::new(b'\n', 0, 0);
|
||||
assert_eq!(it.next(b"abc"), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn preceding_lines_doc() {
|
||||
// These are the examples mentions in the documentation of `preceding`.
|
||||
let bytes = b"abc\nxyz\n";
|
||||
assert_eq!(4, preceding_by_pos(bytes, 7, b'\n', 0));
|
||||
assert_eq!(4, preceding_by_pos(bytes, 8, b'\n', 0));
|
||||
assert_eq!(0, preceding_by_pos(bytes, 7, b'\n', 1));
|
||||
assert_eq!(0, preceding_by_pos(bytes, 8, b'\n', 1));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn preceding_lines_sherlock() {
|
||||
let t = SHERLOCK;
|
||||
let lines = line_ranges(t);
|
||||
|
||||
// The following tests check the count == 0 case, i.e., finding the
|
||||
// beginning of the line containing the given position.
|
||||
assert_eq!(0, prev(t, 0, 0));
|
||||
assert_eq!(0, prev(t, 1, 0));
|
||||
// The line terminator is addressed by `end-1` and terminates the line
|
||||
// it is part of.
|
||||
assert_eq!(0, prev(t, lines[0].end - 1, 0));
|
||||
assert_eq!(lines[0].start, prev(t, lines[0].end, 0));
|
||||
// The end position of line addresses the byte immediately following a
|
||||
// line terminator, which puts it on the following line.
|
||||
assert_eq!(lines[1].start, prev(t, lines[0].end + 1, 0));
|
||||
|
||||
// Now tests for count > 0.
|
||||
assert_eq!(0, prev(t, 0, 1));
|
||||
assert_eq!(0, prev(t, 0, 2));
|
||||
assert_eq!(0, prev(t, 1, 1));
|
||||
assert_eq!(0, prev(t, 1, 2));
|
||||
assert_eq!(0, prev(t, lines[0].end - 1, 1));
|
||||
assert_eq!(0, prev(t, lines[0].end - 1, 2));
|
||||
assert_eq!(0, prev(t, lines[0].end, 1));
|
||||
assert_eq!(0, prev(t, lines[0].end, 2));
|
||||
assert_eq!(lines[3].start, prev(t, lines[4].end - 1, 1));
|
||||
assert_eq!(lines[3].start, prev(t, lines[4].end, 1));
|
||||
assert_eq!(lines[4].start, prev(t, lines[4].end + 1, 1));
|
||||
|
||||
// The last line has no line terminator.
|
||||
assert_eq!(lines[5].start, prev(t, lines[5].end, 0));
|
||||
assert_eq!(lines[5].start, prev(t, lines[5].end - 1, 0));
|
||||
assert_eq!(lines[4].start, prev(t, lines[5].end, 1));
|
||||
assert_eq!(lines[0].start, prev(t, lines[5].end, 5));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn preceding_lines_short() {
|
||||
let t = "a\nb\nc\nd\ne\nf\n";
|
||||
let lines = line_ranges(t);
|
||||
assert_eq!(12, t.len());
|
||||
|
||||
assert_eq!(lines[5].start, prev(t, lines[5].end, 0));
|
||||
assert_eq!(lines[4].start, prev(t, lines[5].end, 1));
|
||||
assert_eq!(lines[3].start, prev(t, lines[5].end, 2));
|
||||
assert_eq!(lines[2].start, prev(t, lines[5].end, 3));
|
||||
assert_eq!(lines[1].start, prev(t, lines[5].end, 4));
|
||||
assert_eq!(lines[0].start, prev(t, lines[5].end, 5));
|
||||
assert_eq!(lines[0].start, prev(t, lines[5].end, 6));
|
||||
|
||||
assert_eq!(lines[5].start, prev(t, lines[5].end - 1, 0));
|
||||
assert_eq!(lines[4].start, prev(t, lines[5].end - 1, 1));
|
||||
assert_eq!(lines[3].start, prev(t, lines[5].end - 1, 2));
|
||||
assert_eq!(lines[2].start, prev(t, lines[5].end - 1, 3));
|
||||
assert_eq!(lines[1].start, prev(t, lines[5].end - 1, 4));
|
||||
assert_eq!(lines[0].start, prev(t, lines[5].end - 1, 5));
|
||||
assert_eq!(lines[0].start, prev(t, lines[5].end - 1, 6));
|
||||
|
||||
assert_eq!(lines[4].start, prev(t, lines[5].start, 0));
|
||||
assert_eq!(lines[3].start, prev(t, lines[5].start, 1));
|
||||
assert_eq!(lines[2].start, prev(t, lines[5].start, 2));
|
||||
assert_eq!(lines[1].start, prev(t, lines[5].start, 3));
|
||||
assert_eq!(lines[0].start, prev(t, lines[5].start, 4));
|
||||
assert_eq!(lines[0].start, prev(t, lines[5].start, 5));
|
||||
|
||||
assert_eq!(lines[3].start, prev(t, lines[4].end - 1, 1));
|
||||
assert_eq!(lines[2].start, prev(t, lines[4].start, 1));
|
||||
|
||||
assert_eq!(lines[2].start, prev(t, lines[3].end - 1, 1));
|
||||
assert_eq!(lines[1].start, prev(t, lines[3].start, 1));
|
||||
|
||||
assert_eq!(lines[1].start, prev(t, lines[2].end - 1, 1));
|
||||
assert_eq!(lines[0].start, prev(t, lines[2].start, 1));
|
||||
|
||||
assert_eq!(lines[0].start, prev(t, lines[1].end - 1, 1));
|
||||
assert_eq!(lines[0].start, prev(t, lines[1].start, 1));
|
||||
|
||||
assert_eq!(lines[0].start, prev(t, lines[0].end - 1, 1));
|
||||
assert_eq!(lines[0].start, prev(t, lines[0].start, 1));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn preceding_lines_empty1() {
|
||||
let t = "\n\n\nd\ne\nf\n";
|
||||
let lines = line_ranges(t);
|
||||
assert_eq!(9, t.len());
|
||||
|
||||
assert_eq!(lines[0].start, prev(t, lines[0].end, 0));
|
||||
assert_eq!(lines[0].start, prev(t, lines[0].end, 1));
|
||||
assert_eq!(lines[1].start, prev(t, lines[1].end, 0));
|
||||
assert_eq!(lines[0].start, prev(t, lines[1].end, 1));
|
||||
|
||||
assert_eq!(lines[5].start, prev(t, lines[5].end, 0));
|
||||
assert_eq!(lines[4].start, prev(t, lines[5].end, 1));
|
||||
assert_eq!(lines[3].start, prev(t, lines[5].end, 2));
|
||||
assert_eq!(lines[2].start, prev(t, lines[5].end, 3));
|
||||
assert_eq!(lines[1].start, prev(t, lines[5].end, 4));
|
||||
assert_eq!(lines[0].start, prev(t, lines[5].end, 5));
|
||||
assert_eq!(lines[0].start, prev(t, lines[5].end, 6));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn preceding_lines_empty2() {
|
||||
let t = "a\n\n\nd\ne\nf\n";
|
||||
let lines = line_ranges(t);
|
||||
assert_eq!(10, t.len());
|
||||
|
||||
assert_eq!(lines[0].start, prev(t, lines[0].end, 0));
|
||||
assert_eq!(lines[0].start, prev(t, lines[0].end, 1));
|
||||
assert_eq!(lines[1].start, prev(t, lines[1].end, 0));
|
||||
assert_eq!(lines[0].start, prev(t, lines[1].end, 1));
|
||||
|
||||
assert_eq!(lines[5].start, prev(t, lines[5].end, 0));
|
||||
assert_eq!(lines[4].start, prev(t, lines[5].end, 1));
|
||||
assert_eq!(lines[3].start, prev(t, lines[5].end, 2));
|
||||
assert_eq!(lines[2].start, prev(t, lines[5].end, 3));
|
||||
assert_eq!(lines[1].start, prev(t, lines[5].end, 4));
|
||||
assert_eq!(lines[0].start, prev(t, lines[5].end, 5));
|
||||
assert_eq!(lines[0].start, prev(t, lines[5].end, 6));
|
||||
}
|
||||
}
|
||||
@@ -1,25 +0,0 @@
|
||||
/// Like assert_eq, but nicer output for long strings.
|
||||
#[cfg(test)]
|
||||
#[macro_export]
|
||||
macro_rules! assert_eq_printed {
|
||||
($expected:expr, $got:expr, $($tt:tt)*) => {
|
||||
let expected = &*$expected;
|
||||
let got = &*$got;
|
||||
let label = format!($($tt)*);
|
||||
if expected != got {
|
||||
panic!("
|
||||
printed outputs differ! (label: {})
|
||||
|
||||
expected:
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
{}
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
got:
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
{}
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
", label, expected, got);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,599 +0,0 @@
|
||||
use std::cmp;
|
||||
|
||||
use bstr::B;
|
||||
|
||||
use grep_matcher::{LineMatchKind, Matcher};
|
||||
use lines::{self, LineStep};
|
||||
use line_buffer::BinaryDetection;
|
||||
use searcher::{Config, Range, Searcher};
|
||||
use sink::{
|
||||
Sink, SinkError,
|
||||
SinkFinish, SinkContext, SinkContextKind, SinkMatch,
|
||||
};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Core<'s, M: 's, S> {
|
||||
config: &'s Config,
|
||||
matcher: M,
|
||||
searcher: &'s Searcher,
|
||||
sink: S,
|
||||
binary: bool,
|
||||
pos: usize,
|
||||
absolute_byte_offset: u64,
|
||||
binary_byte_offset: Option<usize>,
|
||||
line_number: Option<u64>,
|
||||
last_line_counted: usize,
|
||||
last_line_visited: usize,
|
||||
after_context_left: usize,
|
||||
has_sunk: bool,
|
||||
}
|
||||
|
||||
impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
pub fn new(
|
||||
searcher: &'s Searcher,
|
||||
matcher: M,
|
||||
sink: S,
|
||||
binary: bool,
|
||||
) -> Core<'s, M, S> {
|
||||
let line_number =
|
||||
if searcher.config.line_number {
|
||||
Some(1)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let core = Core {
|
||||
config: &searcher.config,
|
||||
matcher: matcher,
|
||||
searcher: searcher,
|
||||
sink: sink,
|
||||
binary: binary,
|
||||
pos: 0,
|
||||
absolute_byte_offset: 0,
|
||||
binary_byte_offset: None,
|
||||
line_number: line_number,
|
||||
last_line_counted: 0,
|
||||
last_line_visited: 0,
|
||||
after_context_left: 0,
|
||||
has_sunk: false,
|
||||
};
|
||||
if !core.searcher.multi_line_with_matcher(&core.matcher) {
|
||||
if core.is_line_by_line_fast() {
|
||||
trace!("searcher core: will use fast line searcher");
|
||||
} else {
|
||||
trace!("searcher core: will use slow line searcher");
|
||||
}
|
||||
}
|
||||
core
|
||||
}
|
||||
|
||||
pub fn pos(&self) -> usize {
|
||||
self.pos
|
||||
}
|
||||
|
||||
pub fn set_pos(&mut self, pos: usize) {
|
||||
self.pos = pos;
|
||||
}
|
||||
|
||||
pub fn binary_byte_offset(&self) -> Option<u64> {
|
||||
self.binary_byte_offset.map(|offset| offset as u64)
|
||||
}
|
||||
|
||||
pub fn matcher(&self) -> &M {
|
||||
&self.matcher
|
||||
}
|
||||
|
||||
pub fn matched(
|
||||
&mut self,
|
||||
buf: &[u8],
|
||||
range: &Range,
|
||||
) -> Result<bool, S::Error> {
|
||||
self.sink_matched(buf, range)
|
||||
}
|
||||
|
||||
pub fn binary_data(
|
||||
&mut self,
|
||||
binary_byte_offset: u64,
|
||||
) -> Result<bool, S::Error> {
|
||||
self.sink.binary_data(&self.searcher, binary_byte_offset)
|
||||
}
|
||||
|
||||
pub fn begin(&mut self) -> Result<bool, S::Error> {
|
||||
self.sink.begin(&self.searcher)
|
||||
}
|
||||
|
||||
pub fn finish(
|
||||
&mut self,
|
||||
byte_count: u64,
|
||||
binary_byte_offset: Option<u64>,
|
||||
) -> Result<(), S::Error> {
|
||||
self.sink.finish(
|
||||
&self.searcher,
|
||||
&SinkFinish {
|
||||
byte_count,
|
||||
binary_byte_offset,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn match_by_line(&mut self, buf: &[u8]) -> Result<bool, S::Error> {
|
||||
if self.is_line_by_line_fast() {
|
||||
self.match_by_line_fast(buf)
|
||||
} else {
|
||||
self.match_by_line_slow(buf)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn roll(&mut self, buf: &[u8]) -> usize {
|
||||
let consumed =
|
||||
if self.config.max_context() == 0 {
|
||||
buf.len()
|
||||
} else {
|
||||
// It might seem like all we need to care about here is just
|
||||
// the "before context," but in order to sink the context
|
||||
// separator (when before_context==0 and after_context>0), we
|
||||
// need to know something about the position of the previous
|
||||
// line visited, even if we're at the beginning of the buffer.
|
||||
let context_start = lines::preceding(
|
||||
buf,
|
||||
self.config.line_term.as_byte(),
|
||||
self.config.max_context(),
|
||||
);
|
||||
let consumed = cmp::max(context_start, self.last_line_visited);
|
||||
consumed
|
||||
};
|
||||
self.count_lines(buf, consumed);
|
||||
self.absolute_byte_offset += consumed as u64;
|
||||
self.last_line_counted = 0;
|
||||
self.last_line_visited = 0;
|
||||
self.set_pos(buf.len() - consumed);
|
||||
consumed
|
||||
}
|
||||
|
||||
pub fn detect_binary(
|
||||
&mut self,
|
||||
buf: &[u8],
|
||||
range: &Range,
|
||||
) -> Result<bool, S::Error> {
|
||||
if self.binary_byte_offset.is_some() {
|
||||
return Ok(self.config.binary.quit_byte().is_some());
|
||||
}
|
||||
let binary_byte = match self.config.binary.0 {
|
||||
BinaryDetection::Quit(b) => b,
|
||||
BinaryDetection::Convert(b) => b,
|
||||
_ => return Ok(false),
|
||||
};
|
||||
if let Some(i) = B(&buf[*range]).find_byte(binary_byte) {
|
||||
let offset = range.start() + i;
|
||||
self.binary_byte_offset = Some(offset);
|
||||
if !self.binary_data(offset as u64)? {
|
||||
return Ok(true);
|
||||
}
|
||||
Ok(self.config.binary.quit_byte().is_some())
|
||||
} else {
|
||||
Ok(false)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn before_context_by_line(
|
||||
&mut self,
|
||||
buf: &[u8],
|
||||
upto: usize,
|
||||
) -> Result<bool, S::Error> {
|
||||
if self.config.before_context == 0 {
|
||||
return Ok(true);
|
||||
}
|
||||
let range = Range::new(self.last_line_visited, upto);
|
||||
if range.is_empty() {
|
||||
return Ok(true);
|
||||
}
|
||||
let before_context_start = range.start() + lines::preceding(
|
||||
&buf[range],
|
||||
self.config.line_term.as_byte(),
|
||||
self.config.before_context - 1,
|
||||
);
|
||||
|
||||
let range = Range::new(before_context_start, range.end());
|
||||
let mut stepper = LineStep::new(
|
||||
self.config.line_term.as_byte(),
|
||||
range.start(),
|
||||
range.end(),
|
||||
);
|
||||
while let Some(line) = stepper.next_match(buf) {
|
||||
if !self.sink_break_context(line.start())? {
|
||||
return Ok(false);
|
||||
}
|
||||
if !self.sink_before_context(buf, &line)? {
|
||||
return Ok(false);
|
||||
}
|
||||
}
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
pub fn after_context_by_line(
|
||||
&mut self,
|
||||
buf: &[u8],
|
||||
upto: usize,
|
||||
) -> Result<bool, S::Error> {
|
||||
if self.after_context_left == 0 {
|
||||
return Ok(true);
|
||||
}
|
||||
let range = Range::new(self.last_line_visited, upto);
|
||||
let mut stepper = LineStep::new(
|
||||
self.config.line_term.as_byte(),
|
||||
range.start(),
|
||||
range.end(),
|
||||
);
|
||||
while let Some(line) = stepper.next_match(buf) {
|
||||
if !self.sink_after_context(buf, &line)? {
|
||||
return Ok(false);
|
||||
}
|
||||
if self.after_context_left == 0 {
|
||||
break;
|
||||
}
|
||||
}
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
pub fn other_context_by_line(
|
||||
&mut self,
|
||||
buf: &[u8],
|
||||
upto: usize,
|
||||
) -> Result<bool, S::Error> {
|
||||
let range = Range::new(self.last_line_visited, upto);
|
||||
let mut stepper = LineStep::new(
|
||||
self.config.line_term.as_byte(),
|
||||
range.start(),
|
||||
range.end(),
|
||||
);
|
||||
while let Some(line) = stepper.next_match(buf) {
|
||||
if !self.sink_other_context(buf, &line)? {
|
||||
return Ok(false);
|
||||
}
|
||||
}
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
fn match_by_line_slow(&mut self, buf: &[u8]) -> Result<bool, S::Error> {
|
||||
debug_assert!(!self.searcher.multi_line_with_matcher(&self.matcher));
|
||||
|
||||
let range = Range::new(self.pos(), buf.len());
|
||||
let mut stepper = LineStep::new(
|
||||
self.config.line_term.as_byte(),
|
||||
range.start(),
|
||||
range.end(),
|
||||
);
|
||||
while let Some(line) = stepper.next_match(buf) {
|
||||
let matched = {
|
||||
// Stripping the line terminator is necessary to prevent some
|
||||
// classes of regexes from matching the empty position *after*
|
||||
// the end of the line. For example, `(?m)^$` will match at
|
||||
// position (2, 2) in the string `a\n`.
|
||||
let slice = lines::without_terminator(
|
||||
&buf[line],
|
||||
self.config.line_term,
|
||||
);
|
||||
match self.matcher.shortest_match(slice) {
|
||||
Err(err) => return Err(S::Error::error_message(err)),
|
||||
Ok(result) => result.is_some(),
|
||||
}
|
||||
};
|
||||
self.set_pos(line.end());
|
||||
if matched != self.config.invert_match {
|
||||
if !self.before_context_by_line(buf, line.start())? {
|
||||
return Ok(false);
|
||||
}
|
||||
if !self.sink_matched(buf, &line)? {
|
||||
return Ok(false);
|
||||
}
|
||||
} else if self.after_context_left >= 1 {
|
||||
if !self.sink_after_context(buf, &line)? {
|
||||
return Ok(false);
|
||||
}
|
||||
} else if self.config.passthru {
|
||||
if !self.sink_other_context(buf, &line)? {
|
||||
return Ok(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
fn match_by_line_fast(&mut self, buf: &[u8]) -> Result<bool, S::Error> {
|
||||
debug_assert!(!self.config.passthru);
|
||||
|
||||
while !buf[self.pos()..].is_empty() {
|
||||
if self.config.invert_match {
|
||||
if !self.match_by_line_fast_invert(buf)? {
|
||||
return Ok(false);
|
||||
}
|
||||
} else if let Some(line) = self.find_by_line_fast(buf)? {
|
||||
if self.config.max_context() > 0 {
|
||||
if !self.after_context_by_line(buf, line.start())? {
|
||||
return Ok(false);
|
||||
}
|
||||
if !self.before_context_by_line(buf, line.start())? {
|
||||
return Ok(false);
|
||||
}
|
||||
}
|
||||
self.set_pos(line.end());
|
||||
if !self.sink_matched(buf, &line)? {
|
||||
return Ok(false);
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if !self.after_context_by_line(buf, buf.len())? {
|
||||
return Ok(false);
|
||||
}
|
||||
self.set_pos(buf.len());
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn match_by_line_fast_invert(
|
||||
&mut self,
|
||||
buf: &[u8],
|
||||
) -> Result<bool, S::Error> {
|
||||
assert!(self.config.invert_match);
|
||||
|
||||
let invert_match = match self.find_by_line_fast(buf)? {
|
||||
None => {
|
||||
let range = Range::new(self.pos(), buf.len());
|
||||
self.set_pos(range.end());
|
||||
range
|
||||
}
|
||||
Some(line) => {
|
||||
let range = Range::new(self.pos(), line.start());
|
||||
self.set_pos(line.end());
|
||||
range
|
||||
}
|
||||
};
|
||||
if invert_match.is_empty() {
|
||||
return Ok(true);
|
||||
}
|
||||
if !self.after_context_by_line(buf, invert_match.start())? {
|
||||
return Ok(false);
|
||||
}
|
||||
if !self.before_context_by_line(buf, invert_match.start())? {
|
||||
return Ok(false);
|
||||
}
|
||||
let mut stepper = LineStep::new(
|
||||
self.config.line_term.as_byte(),
|
||||
invert_match.start(),
|
||||
invert_match.end(),
|
||||
);
|
||||
while let Some(line) = stepper.next_match(buf) {
|
||||
if !self.sink_matched(buf, &line)? {
|
||||
return Ok(false);
|
||||
}
|
||||
}
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn find_by_line_fast(
|
||||
&self,
|
||||
buf: &[u8],
|
||||
) -> Result<Option<Range>, S::Error> {
|
||||
debug_assert!(!self.searcher.multi_line_with_matcher(&self.matcher));
|
||||
debug_assert!(self.is_line_by_line_fast());
|
||||
|
||||
let mut pos = self.pos();
|
||||
while !buf[pos..].is_empty() {
|
||||
match self.matcher.find_candidate_line(&buf[pos..]) {
|
||||
Err(err) => return Err(S::Error::error_message(err)),
|
||||
Ok(None) => return Ok(None),
|
||||
Ok(Some(LineMatchKind::Confirmed(i))) => {
|
||||
let line = lines::locate(
|
||||
buf,
|
||||
self.config.line_term.as_byte(),
|
||||
Range::zero(i).offset(pos),
|
||||
);
|
||||
// If we matched beyond the end of the buffer, then we
|
||||
// don't report this as a match.
|
||||
if line.start() == buf.len() {
|
||||
pos = buf.len();
|
||||
continue;
|
||||
}
|
||||
return Ok(Some(line));
|
||||
}
|
||||
Ok(Some(LineMatchKind::Candidate(i))) => {
|
||||
let line = lines::locate(
|
||||
buf,
|
||||
self.config.line_term.as_byte(),
|
||||
Range::zero(i).offset(pos),
|
||||
);
|
||||
// We need to strip the line terminator here to match the
|
||||
// semantics of line-by-line searching. Namely, regexes
|
||||
// like `(?m)^$` can match at the final position beyond a
|
||||
// line terminator, which is non-sensical in line oriented
|
||||
// matching.
|
||||
let slice = lines::without_terminator(
|
||||
&buf[line],
|
||||
self.config.line_term,
|
||||
);
|
||||
match self.matcher.is_match(slice) {
|
||||
Err(err) => return Err(S::Error::error_message(err)),
|
||||
Ok(true) => return Ok(Some(line)),
|
||||
Ok(false) => {
|
||||
pos = line.end();
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn sink_matched(
|
||||
&mut self,
|
||||
buf: &[u8],
|
||||
range: &Range,
|
||||
) -> Result<bool, S::Error> {
|
||||
if self.binary && self.detect_binary(buf, range)? {
|
||||
return Ok(false);
|
||||
}
|
||||
if !self.sink_break_context(range.start())? {
|
||||
return Ok(false);
|
||||
}
|
||||
self.count_lines(buf, range.start());
|
||||
let offset = self.absolute_byte_offset + range.start() as u64;
|
||||
let linebuf = &buf[*range];
|
||||
let keepgoing = self.sink.matched(
|
||||
&self.searcher,
|
||||
&SinkMatch {
|
||||
line_term: self.config.line_term,
|
||||
bytes: linebuf,
|
||||
absolute_byte_offset: offset,
|
||||
line_number: self.line_number,
|
||||
},
|
||||
)?;
|
||||
if !keepgoing {
|
||||
return Ok(false);
|
||||
}
|
||||
self.last_line_visited = range.end();
|
||||
self.after_context_left = self.config.after_context;
|
||||
self.has_sunk = true;
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
fn sink_before_context(
|
||||
&mut self,
|
||||
buf: &[u8],
|
||||
range: &Range,
|
||||
) -> Result<bool, S::Error> {
|
||||
if self.binary && self.detect_binary(buf, range)? {
|
||||
return Ok(false);
|
||||
}
|
||||
self.count_lines(buf, range.start());
|
||||
let offset = self.absolute_byte_offset + range.start() as u64;
|
||||
let keepgoing = self.sink.context(
|
||||
&self.searcher,
|
||||
&SinkContext {
|
||||
line_term: self.config.line_term,
|
||||
bytes: &buf[*range],
|
||||
kind: SinkContextKind::Before,
|
||||
absolute_byte_offset: offset,
|
||||
line_number: self.line_number,
|
||||
},
|
||||
)?;
|
||||
if !keepgoing {
|
||||
return Ok(false);
|
||||
}
|
||||
self.last_line_visited = range.end();
|
||||
self.has_sunk = true;
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
fn sink_after_context(
|
||||
&mut self,
|
||||
buf: &[u8],
|
||||
range: &Range,
|
||||
) -> Result<bool, S::Error> {
|
||||
assert!(self.after_context_left >= 1);
|
||||
|
||||
if self.binary && self.detect_binary(buf, range)? {
|
||||
return Ok(false);
|
||||
}
|
||||
self.count_lines(buf, range.start());
|
||||
let offset = self.absolute_byte_offset + range.start() as u64;
|
||||
let keepgoing = self.sink.context(
|
||||
&self.searcher,
|
||||
&SinkContext {
|
||||
line_term: self.config.line_term,
|
||||
bytes: &buf[*range],
|
||||
kind: SinkContextKind::After,
|
||||
absolute_byte_offset: offset,
|
||||
line_number: self.line_number,
|
||||
},
|
||||
)?;
|
||||
if !keepgoing {
|
||||
return Ok(false);
|
||||
}
|
||||
self.last_line_visited = range.end();
|
||||
self.after_context_left -= 1;
|
||||
self.has_sunk = true;
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
fn sink_other_context(
|
||||
&mut self,
|
||||
buf: &[u8],
|
||||
range: &Range,
|
||||
) -> Result<bool, S::Error> {
|
||||
if self.binary && self.detect_binary(buf, range)? {
|
||||
return Ok(false);
|
||||
}
|
||||
self.count_lines(buf, range.start());
|
||||
let offset = self.absolute_byte_offset + range.start() as u64;
|
||||
let keepgoing = self.sink.context(
|
||||
&self.searcher,
|
||||
&SinkContext {
|
||||
line_term: self.config.line_term,
|
||||
bytes: &buf[*range],
|
||||
kind: SinkContextKind::Other,
|
||||
absolute_byte_offset: offset,
|
||||
line_number: self.line_number,
|
||||
},
|
||||
)?;
|
||||
if !keepgoing {
|
||||
return Ok(false);
|
||||
}
|
||||
self.last_line_visited = range.end();
|
||||
self.has_sunk = true;
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
fn sink_break_context(
|
||||
&mut self,
|
||||
start_of_line: usize,
|
||||
) -> Result<bool, S::Error> {
|
||||
let is_gap = self.last_line_visited < start_of_line;
|
||||
let any_context =
|
||||
self.config.before_context > 0
|
||||
|| self.config.after_context > 0;
|
||||
|
||||
if !any_context || !self.has_sunk || !is_gap {
|
||||
Ok(true)
|
||||
} else {
|
||||
self.sink.context_break(&self.searcher)
|
||||
}
|
||||
}
|
||||
|
||||
fn count_lines(&mut self, buf: &[u8], upto: usize) {
|
||||
if let Some(ref mut line_number) = self.line_number {
|
||||
if self.last_line_counted >= upto {
|
||||
return;
|
||||
}
|
||||
let slice = &buf[self.last_line_counted..upto];
|
||||
let count = lines::count(slice, self.config.line_term.as_byte());
|
||||
*line_number += count;
|
||||
self.last_line_counted = upto;
|
||||
}
|
||||
}
|
||||
|
||||
fn is_line_by_line_fast(&self) -> bool {
|
||||
debug_assert!(!self.searcher.multi_line_with_matcher(&self.matcher));
|
||||
|
||||
if self.config.passthru {
|
||||
return false;
|
||||
}
|
||||
if let Some(line_term) = self.matcher.line_terminator() {
|
||||
if line_term == self.config.line_term {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
if let Some(non_matching) = self.matcher.non_matching_bytes() {
|
||||
// If the line terminator is CRLF, we don't actually need to care
|
||||
// whether the regex can match `\r` or not. Namely, a `\r` is
|
||||
// neither necessary nor sufficient to terminate a line. A `\n` is
|
||||
// always required.
|
||||
if non_matching.contains(self.config.line_term.as_byte()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,106 +0,0 @@
|
||||
use std::fs::File;
|
||||
use std::path::Path;
|
||||
|
||||
use memmap::Mmap;
|
||||
|
||||
/// Controls the strategy used for determining when to use memory maps.
|
||||
///
|
||||
/// If a searcher is called in circumstances where it is possible to use memory
|
||||
/// maps, and memory maps are enabled, then it will attempt to do so if it
|
||||
/// believes it will make the search faster.
|
||||
///
|
||||
/// By default, memory maps are disabled.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct MmapChoice(MmapChoiceImpl);
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
enum MmapChoiceImpl {
|
||||
Auto,
|
||||
Never,
|
||||
}
|
||||
|
||||
impl Default for MmapChoice {
|
||||
fn default() -> MmapChoice {
|
||||
MmapChoice(MmapChoiceImpl::Never)
|
||||
}
|
||||
}
|
||||
|
||||
impl MmapChoice {
|
||||
/// Use memory maps when they are believed to be advantageous.
|
||||
///
|
||||
/// The heuristics used to determine whether to use a memory map or not
|
||||
/// may depend on many things, including but not limited to, file size
|
||||
/// and platform.
|
||||
///
|
||||
/// If memory maps are unavailable or cannot be used for a specific input,
|
||||
/// then normal OS read calls are used instead.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// This constructor is not safe because there is no obvious way to
|
||||
/// encapsulate the safety of file backed memory maps on all platforms
|
||||
/// without simultaneously negating some or all of their benefits.
|
||||
///
|
||||
/// The specific contract the caller is required to uphold isn't precise,
|
||||
/// but it basically amounts to something like, "the caller guarantees that
|
||||
/// the underlying file won't be mutated." This, of course, isn't feasible
|
||||
/// in many environments. However, command line tools may still decide to
|
||||
/// take the risk of, say, a `SIGBUS` occurring while attempting to read a
|
||||
/// memory map.
|
||||
pub unsafe fn auto() -> MmapChoice {
|
||||
MmapChoice(MmapChoiceImpl::Auto)
|
||||
}
|
||||
|
||||
/// Never use memory maps, no matter what. This is the default.
|
||||
pub fn never() -> MmapChoice {
|
||||
MmapChoice(MmapChoiceImpl::Never)
|
||||
}
|
||||
|
||||
/// Return a memory map if memory maps are enabled and if creating a
|
||||
/// memory from the given file succeeded and if memory maps are believed
|
||||
/// to be advantageous for performance.
|
||||
///
|
||||
/// If this does attempt to open a memory map and it fails, then `None`
|
||||
/// is returned and the corresponding error (along with the file path, if
|
||||
/// present) is logged at the debug level.
|
||||
pub(crate) fn open(
|
||||
&self,
|
||||
file: &File,
|
||||
path: Option<&Path>,
|
||||
) -> Option<Mmap> {
|
||||
if !self.is_enabled() {
|
||||
return None;
|
||||
}
|
||||
if cfg!(target_os = "macos") {
|
||||
// I guess memory maps on macOS aren't great. Should re-evaluate.
|
||||
return None;
|
||||
}
|
||||
// SAFETY: This is acceptable because the only way `MmapChoiceImpl` can
|
||||
// be `Auto` is if the caller invoked the `auto` constructor, which
|
||||
// is itself not safe. Thus, this is a propagation of the caller's
|
||||
// assertion that using memory maps is safe.
|
||||
match unsafe { Mmap::map(file) } {
|
||||
Ok(mmap) => Some(mmap),
|
||||
Err(err) => {
|
||||
if let Some(path) = path {
|
||||
debug!(
|
||||
"{}: failed to open memory map: {}",
|
||||
path.display(),
|
||||
err
|
||||
);
|
||||
} else {
|
||||
debug!("failed to open memory map: {}", err);
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Whether this strategy may employ memory maps or not.
|
||||
pub(crate) fn is_enabled(&self) -> bool {
|
||||
match self.0 {
|
||||
MmapChoiceImpl::Auto => true,
|
||||
MmapChoiceImpl::Never => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,646 +0,0 @@
|
||||
use std::fmt;
|
||||
use std::io;
|
||||
|
||||
use grep_matcher::LineTerminator;
|
||||
|
||||
use lines::LineIter;
|
||||
use searcher::{ConfigError, Searcher};
|
||||
|
||||
/// A trait that describes errors that can be reported by searchers and
|
||||
/// implementations of `Sink`.
|
||||
///
|
||||
/// Unless you have a specialized use case, you probably don't need to
|
||||
/// implement this trait explicitly. It's likely that using `io::Error` (which
|
||||
/// implements this trait) for your error type is good enough, largely because
|
||||
/// most errors that occur during search will likely be an `io::Error`.
|
||||
pub trait SinkError: Sized {
|
||||
/// A constructor for converting any value that satisfies the
|
||||
/// `fmt::Display` trait into an error.
|
||||
fn error_message<T: fmt::Display>(message: T) -> Self;
|
||||
|
||||
/// A constructor for converting I/O errors that occur while searching into
|
||||
/// an error of this type.
|
||||
///
|
||||
/// By default, this is implemented via the `error_message` constructor.
|
||||
fn error_io(err: io::Error) -> Self {
|
||||
Self::error_message(err)
|
||||
}
|
||||
|
||||
/// A constructor for converting configuration errors that occur while
|
||||
/// building a searcher into an error of this type.
|
||||
///
|
||||
/// By default, this is implemented via the `error_message` constructor.
|
||||
fn error_config(err: ConfigError) -> Self {
|
||||
Self::error_message(err)
|
||||
}
|
||||
}
|
||||
|
||||
/// An `io::Error` can be used as an error for `Sink` implementations out of
|
||||
/// the box.
|
||||
impl SinkError for io::Error {
|
||||
fn error_message<T: fmt::Display>(message: T) -> io::Error {
|
||||
io::Error::new(io::ErrorKind::Other, message.to_string())
|
||||
}
|
||||
|
||||
fn error_io(err: io::Error) -> io::Error {
|
||||
err
|
||||
}
|
||||
}
|
||||
|
||||
/// A `Box<std::error::Error>` can be used as an error for `Sink`
|
||||
/// implementations out of the box.
|
||||
impl SinkError for Box<::std::error::Error> {
|
||||
fn error_message<T: fmt::Display>(message: T) -> Box<::std::error::Error> {
|
||||
Box::<::std::error::Error>::from(message.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
/// A trait that defines how results from searchers are handled.
|
||||
///
|
||||
/// In this crate, a searcher follows the "push" model. What that means is that
|
||||
/// the searcher drives execution, and pushes results back to the caller. This
|
||||
/// is in contrast to a "pull" model where the caller drives execution and
|
||||
/// takes results as they need them. These are also known as "internal" and
|
||||
/// "external" iteration strategies, respectively.
|
||||
///
|
||||
/// For a variety of reasons, including the complexity of the searcher
|
||||
/// implementation, this crate chooses the "push" or "internal" model of
|
||||
/// execution. Thus, in order to act on search results, callers must provide
|
||||
/// an implementation of this trait to a searcher, and the searcher is then
|
||||
/// responsible for calling the methods on this trait.
|
||||
///
|
||||
/// This trait defines several behaviors:
|
||||
///
|
||||
/// * What to do when a match is found. Callers must provide this.
|
||||
/// * What to do when an error occurs. Callers must provide this via the
|
||||
/// [`SinkError`](trait.SinkError.html) trait. Generally, callers can just
|
||||
/// use `io::Error` for this, which already implements `SinkError`.
|
||||
/// * What to do when a contextual line is found. By default, these are
|
||||
/// ignored.
|
||||
/// * What to do when a gap between contextual lines has been found. By
|
||||
/// default, this is ignored.
|
||||
/// * What to do when a search has started. By default, this does nothing.
|
||||
/// * What to do when a search has finished successfully. By default, this does
|
||||
/// nothing.
|
||||
///
|
||||
/// Callers must, at minimum, specify the behavior when an error occurs and
|
||||
/// the behavior when a match occurs. The rest is optional. For each behavior,
|
||||
/// callers may report an error (say, if writing the result to another
|
||||
/// location failed) or simply return `false` if they want the search to stop
|
||||
/// (e.g., when implementing a cap on the number of search results to show).
|
||||
///
|
||||
/// When errors are reported (whether in the searcher or in the implementation
|
||||
/// of `Sink`), then searchers quit immediately without calling `finish`.
|
||||
///
|
||||
/// For simpler uses of `Sink`, callers may elect to use one of
|
||||
/// the more convenient but less flexible implementations in the
|
||||
/// [`sinks`](sinks/index.html) module.
|
||||
pub trait Sink {
|
||||
/// The type of an error that should be reported by a searcher.
|
||||
///
|
||||
/// Errors of this type are not only returned by the methods on this
|
||||
/// trait, but the constructors defined in `SinkError` are also used in
|
||||
/// the searcher implementation itself. e.g., When a I/O error occurs when
|
||||
/// reading data from a file.
|
||||
type Error: SinkError;
|
||||
|
||||
/// This method is called whenever a match is found.
|
||||
///
|
||||
/// If multi line is enabled on the searcher, then the match reported here
|
||||
/// may span multiple lines and it may include multiple matches. When multi
|
||||
/// line is disabled, then the match is guaranteed to span exactly one
|
||||
/// non-empty line (where a single line is, at minimum, a line terminator).
|
||||
///
|
||||
/// If this returns `true`, then searching continues. If this returns
|
||||
/// `false`, then searching is stopped immediately and `finish` is called.
|
||||
///
|
||||
/// If this returns an error, then searching is stopped immediately,
|
||||
/// `finish` is not called and the error is bubbled back up to the caller
|
||||
/// of the searcher.
|
||||
fn matched(
|
||||
&mut self,
|
||||
_searcher: &Searcher,
|
||||
_mat: &SinkMatch,
|
||||
) -> Result<bool, Self::Error>;
|
||||
|
||||
/// This method is called whenever a context line is found, and is optional
|
||||
/// to implement. By default, it does nothing and returns `true`.
|
||||
///
|
||||
/// In all cases, the context given is guaranteed to span exactly one
|
||||
/// non-empty line (where a single line is, at minimum, a line terminator).
|
||||
///
|
||||
/// If this returns `true`, then searching continues. If this returns
|
||||
/// `false`, then searching is stopped immediately and `finish` is called.
|
||||
///
|
||||
/// If this returns an error, then searching is stopped immediately,
|
||||
/// `finish` is not called and the error is bubbled back up to the caller
|
||||
/// of the searcher.
|
||||
#[inline]
|
||||
fn context(
|
||||
&mut self,
|
||||
_searcher: &Searcher,
|
||||
_context: &SinkContext,
|
||||
) -> Result<bool, Self::Error> {
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
/// This method is called whenever a break in contextual lines is found,
|
||||
/// and is optional to implement. By default, it does nothing and returns
|
||||
/// `true`.
|
||||
///
|
||||
/// A break can only occur when context reporting is enabled (that is,
|
||||
/// either or both of `before_context` or `after_context` are greater than
|
||||
/// `0`). More precisely, a break occurs between non-contiguous groups of
|
||||
/// lines.
|
||||
///
|
||||
/// If this returns `true`, then searching continues. If this returns
|
||||
/// `false`, then searching is stopped immediately and `finish` is called.
|
||||
///
|
||||
/// If this returns an error, then searching is stopped immediately,
|
||||
/// `finish` is not called and the error is bubbled back up to the caller
|
||||
/// of the searcher.
|
||||
#[inline]
|
||||
fn context_break(
|
||||
&mut self,
|
||||
_searcher: &Searcher,
|
||||
) -> Result<bool, Self::Error> {
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
/// This method is called whenever binary detection is enabled and binary
|
||||
/// data is found. If binary data is found, then this is called at least
|
||||
/// once for the first occurrence with the absolute byte offset at which
|
||||
/// the binary data begins.
|
||||
///
|
||||
/// If this returns `true`, then searching continues. If this returns
|
||||
/// `false`, then searching is stopped immediately and `finish` is called.
|
||||
///
|
||||
/// If this returns an error, then searching is stopped immediately,
|
||||
/// `finish` is not called and the error is bubbled back up to the caller
|
||||
/// of the searcher.
|
||||
///
|
||||
/// By default, it does nothing and returns `true`.
|
||||
#[inline]
|
||||
fn binary_data(
|
||||
&mut self,
|
||||
_searcher: &Searcher,
|
||||
_binary_byte_offset: u64,
|
||||
) -> Result<bool, Self::Error> {
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
/// This method is called when a search has begun, before any search is
|
||||
/// executed. By default, this does nothing.
|
||||
///
|
||||
/// If this returns `true`, then searching continues. If this returns
|
||||
/// `false`, then searching is stopped immediately and `finish` is called.
|
||||
///
|
||||
/// If this returns an error, then searching is stopped immediately,
|
||||
/// `finish` is not called and the error is bubbled back up to the caller
|
||||
/// of the searcher.
|
||||
#[inline]
|
||||
fn begin(
|
||||
&mut self,
|
||||
_searcher: &Searcher,
|
||||
) -> Result<bool, Self::Error> {
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
/// This method is called when a search has completed. By default, this
|
||||
/// does nothing.
|
||||
///
|
||||
/// If this returns an error, the error is bubbled back up to the caller of
|
||||
/// the searcher.
|
||||
#[inline]
|
||||
fn finish(
|
||||
&mut self,
|
||||
_searcher: &Searcher,
|
||||
_: &SinkFinish,
|
||||
) -> Result<(), Self::Error> {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, S: Sink> Sink for &'a mut S {
|
||||
type Error = S::Error;
|
||||
|
||||
#[inline]
|
||||
fn matched(
|
||||
&mut self,
|
||||
searcher: &Searcher,
|
||||
mat: &SinkMatch,
|
||||
) -> Result<bool, S::Error> {
|
||||
(**self).matched(searcher, mat)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn context(
|
||||
&mut self,
|
||||
searcher: &Searcher,
|
||||
context: &SinkContext,
|
||||
) -> Result<bool, S::Error> {
|
||||
(**self).context(searcher, context)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn context_break(
|
||||
&mut self,
|
||||
searcher: &Searcher,
|
||||
) -> Result<bool, S::Error> {
|
||||
(**self).context_break(searcher)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn binary_data(
|
||||
&mut self,
|
||||
searcher: &Searcher,
|
||||
binary_byte_offset: u64,
|
||||
) -> Result<bool, S::Error> {
|
||||
(**self).binary_data(searcher, binary_byte_offset)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn begin(
|
||||
&mut self,
|
||||
searcher: &Searcher,
|
||||
) -> Result<bool, S::Error> {
|
||||
(**self).begin(searcher)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn finish(
|
||||
&mut self,
|
||||
searcher: &Searcher,
|
||||
sink_finish: &SinkFinish,
|
||||
) -> Result<(), S::Error> {
|
||||
(**self).finish(searcher, sink_finish)
|
||||
}
|
||||
}
|
||||
|
||||
impl<S: Sink + ?Sized> Sink for Box<S> {
|
||||
type Error = S::Error;
|
||||
|
||||
#[inline]
|
||||
fn matched(
|
||||
&mut self,
|
||||
searcher: &Searcher,
|
||||
mat: &SinkMatch,
|
||||
) -> Result<bool, S::Error> {
|
||||
(**self).matched(searcher, mat)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn context(
|
||||
&mut self,
|
||||
searcher: &Searcher,
|
||||
context: &SinkContext,
|
||||
) -> Result<bool, S::Error> {
|
||||
(**self).context(searcher, context)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn context_break(
|
||||
&mut self,
|
||||
searcher: &Searcher,
|
||||
) -> Result<bool, S::Error> {
|
||||
(**self).context_break(searcher)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn binary_data(
|
||||
&mut self,
|
||||
searcher: &Searcher,
|
||||
binary_byte_offset: u64,
|
||||
) -> Result<bool, S::Error> {
|
||||
(**self).binary_data(searcher, binary_byte_offset)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn begin(
|
||||
&mut self,
|
||||
searcher: &Searcher,
|
||||
) -> Result<bool, S::Error> {
|
||||
(**self).begin(searcher)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn finish(
|
||||
&mut self,
|
||||
searcher: &Searcher,
|
||||
sink_finish: &SinkFinish,
|
||||
) -> Result<(), S::Error> {
|
||||
(**self).finish(searcher, sink_finish)
|
||||
}
|
||||
}
|
||||
|
||||
/// Summary data reported at the end of a search.
|
||||
///
|
||||
/// This reports data such as the total number of bytes searched and the
|
||||
/// absolute offset of the first occurrence of binary data, if any were found.
|
||||
///
|
||||
/// A searcher that stops early because of an error does not call `finish`.
|
||||
/// A searcher that stops early because the `Sink` implementor instructed it
|
||||
/// to will still call `finish`.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct SinkFinish {
|
||||
pub(crate) byte_count: u64,
|
||||
pub(crate) binary_byte_offset: Option<u64>,
|
||||
}
|
||||
|
||||
impl SinkFinish {
|
||||
/// Return the total number of bytes searched.
|
||||
#[inline]
|
||||
pub fn byte_count(&self) -> u64 {
|
||||
self.byte_count
|
||||
}
|
||||
|
||||
/// If binary detection is enabled and if binary data was found, then this
|
||||
/// returns the absolute byte offset of the first detected byte of binary
|
||||
/// data.
|
||||
///
|
||||
/// Note that since this is an absolute byte offset, it cannot be relied
|
||||
/// upon to index into any addressable memory.
|
||||
#[inline]
|
||||
pub fn binary_byte_offset(&self) -> Option<u64> {
|
||||
self.binary_byte_offset
|
||||
}
|
||||
}
|
||||
|
||||
/// A type that describes a match reported by a searcher.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct SinkMatch<'b> {
|
||||
pub(crate) line_term: LineTerminator,
|
||||
pub(crate) bytes: &'b [u8],
|
||||
pub(crate) absolute_byte_offset: u64,
|
||||
pub(crate) line_number: Option<u64>,
|
||||
}
|
||||
|
||||
impl<'b> SinkMatch<'b> {
|
||||
/// Returns the bytes for all matching lines, including the line
|
||||
/// terminators, if they exist.
|
||||
#[inline]
|
||||
pub fn bytes(&self) -> &'b [u8] {
|
||||
self.bytes
|
||||
}
|
||||
|
||||
/// Return an iterator over the lines in this match.
|
||||
///
|
||||
/// If multi line search is enabled, then this may yield more than one
|
||||
/// line (but always at least one line). If multi line search is disabled,
|
||||
/// then this always reports exactly one line (but may consist of just
|
||||
/// the line terminator).
|
||||
///
|
||||
/// Lines yielded by this iterator include their terminators.
|
||||
#[inline]
|
||||
pub fn lines(&self) -> LineIter<'b> {
|
||||
LineIter::new(self.line_term.as_byte(), self.bytes)
|
||||
}
|
||||
|
||||
/// Returns the absolute byte offset of the start of this match. This
|
||||
/// offset is absolute in that it is relative to the very beginning of the
|
||||
/// input in a search, and can never be relied upon to be a valid index
|
||||
/// into an in-memory slice.
|
||||
#[inline]
|
||||
pub fn absolute_byte_offset(&self) -> u64 {
|
||||
self.absolute_byte_offset
|
||||
}
|
||||
|
||||
/// Returns the line number of the first line in this match, if available.
|
||||
///
|
||||
/// Line numbers are only available when the search builder is instructed
|
||||
/// to compute them.
|
||||
#[inline]
|
||||
pub fn line_number(&self) -> Option<u64> {
|
||||
self.line_number
|
||||
}
|
||||
}
|
||||
|
||||
/// The type of context reported by a searcher.
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
pub enum SinkContextKind {
|
||||
/// The line reported occurred before a match.
|
||||
Before,
|
||||
/// The line reported occurred after a match.
|
||||
After,
|
||||
/// Any other type of context reported, e.g., as a result of a searcher's
|
||||
/// "passthru" mode.
|
||||
Other,
|
||||
}
|
||||
|
||||
/// A type that describes a contextual line reported by a searcher.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct SinkContext<'b> {
|
||||
pub(crate) line_term: LineTerminator,
|
||||
pub(crate) bytes: &'b [u8],
|
||||
pub(crate) kind: SinkContextKind,
|
||||
pub(crate) absolute_byte_offset: u64,
|
||||
pub(crate) line_number: Option<u64>,
|
||||
}
|
||||
|
||||
impl<'b> SinkContext<'b> {
|
||||
/// Returns the context bytes, including line terminators.
|
||||
#[inline]
|
||||
pub fn bytes(&self) -> &'b [u8] {
|
||||
self.bytes
|
||||
}
|
||||
|
||||
/// Returns the type of context.
|
||||
#[inline]
|
||||
pub fn kind(&self) -> &SinkContextKind {
|
||||
&self.kind
|
||||
}
|
||||
|
||||
/// Return an iterator over the lines in this match.
|
||||
///
|
||||
/// This always yields exactly one line (and that one line may contain just
|
||||
/// the line terminator).
|
||||
///
|
||||
/// Lines yielded by this iterator include their terminators.
|
||||
#[cfg(test)]
|
||||
pub(crate) fn lines(&self) -> LineIter<'b> {
|
||||
LineIter::new(self.line_term.as_byte(), self.bytes)
|
||||
}
|
||||
|
||||
/// Returns the absolute byte offset of the start of this context. This
|
||||
/// offset is absolute in that it is relative to the very beginning of the
|
||||
/// input in a search, and can never be relied upon to be a valid index
|
||||
/// into an in-memory slice.
|
||||
#[inline]
|
||||
pub fn absolute_byte_offset(&self) -> u64 {
|
||||
self.absolute_byte_offset
|
||||
}
|
||||
|
||||
/// Returns the line number of the first line in this context, if
|
||||
/// available.
|
||||
///
|
||||
/// Line numbers are only available when the search builder is instructed
|
||||
/// to compute them.
|
||||
#[inline]
|
||||
pub fn line_number(&self) -> Option<u64> {
|
||||
self.line_number
|
||||
}
|
||||
}
|
||||
|
||||
/// A collection of convenience implementations of `Sink`.
|
||||
///
|
||||
/// Each implementation in this module makes some kind of sacrifice in the name
|
||||
/// of making common cases easier to use. Most frequently, each type is a
|
||||
/// wrapper around a closure specified by the caller that provides limited
|
||||
/// access to the full suite of information available to implementors of
|
||||
/// `Sink`.
|
||||
///
|
||||
/// For example, the `UTF8` sink makes the following sacrifices:
|
||||
///
|
||||
/// * All matches must be UTF-8. An arbitrary `Sink` does not have this
|
||||
/// restriction and can deal with arbitrary data. If this sink sees invalid
|
||||
/// UTF-8, then an error is returned and searching stops. (Use the `Lossy`
|
||||
/// sink instead to suppress this error.)
|
||||
/// * The searcher must be configured to report line numbers. If it isn't,
|
||||
/// an error is reported at the first match and searching stops.
|
||||
/// * Context lines, context breaks and summary data reported at the end of
|
||||
/// a search are all ignored.
|
||||
/// * Implementors are forced to use `io::Error` as their error type.
|
||||
///
|
||||
/// If you need more flexibility, then you're advised to implement the `Sink`
|
||||
/// trait directly.
|
||||
pub mod sinks {
|
||||
use std::io;
|
||||
use std::str;
|
||||
|
||||
use searcher::Searcher;
|
||||
use super::{Sink, SinkError, SinkMatch};
|
||||
|
||||
/// A sink that provides line numbers and matches as strings while ignoring
|
||||
/// everything else.
|
||||
///
|
||||
/// This implementation will return an error if a match contains invalid
|
||||
/// UTF-8 or if the searcher was not configured to count lines. Errors
|
||||
/// on invalid UTF-8 can be suppressed by using the `Lossy` sink instead
|
||||
/// of this one.
|
||||
///
|
||||
/// The closure accepts two parameters: a line number and a UTF-8 string
|
||||
/// containing the matched data. The closure returns a
|
||||
/// `Result<bool, io::Error>`. If the `bool` is `false`, then the search
|
||||
/// stops immediately. Otherwise, searching continues.
|
||||
///
|
||||
/// If multi line mode was enabled, the line number refers to the line
|
||||
/// number of the first line in the match.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct UTF8<F>(pub F)
|
||||
where F: FnMut(u64, &str) -> Result<bool, io::Error>;
|
||||
|
||||
impl<F> Sink for UTF8<F>
|
||||
where F: FnMut(u64, &str) -> Result<bool, io::Error>
|
||||
{
|
||||
type Error = io::Error;
|
||||
|
||||
fn matched(
|
||||
&mut self,
|
||||
_searcher: &Searcher,
|
||||
mat: &SinkMatch,
|
||||
) -> Result<bool, io::Error> {
|
||||
let matched = match str::from_utf8(mat.bytes()) {
|
||||
Ok(matched) => matched,
|
||||
Err(err) => return Err(io::Error::error_message(err)),
|
||||
};
|
||||
let line_number = match mat.line_number() {
|
||||
Some(line_number) => line_number,
|
||||
None => {
|
||||
let msg = "line numbers not enabled";
|
||||
return Err(io::Error::error_message(msg));
|
||||
}
|
||||
};
|
||||
(self.0)(line_number, &matched)
|
||||
}
|
||||
}
|
||||
|
||||
/// A sink that provides line numbers and matches as (lossily converted)
|
||||
/// strings while ignoring everything else.
|
||||
///
|
||||
/// This is like `UTF8`, except that if a match contains invalid UTF-8,
|
||||
/// then it will be lossily converted to valid UTF-8 by substituting
|
||||
/// invalid UTF-8 with Unicode replacement characters.
|
||||
///
|
||||
/// This implementation will return an error on the first match if the
|
||||
/// searcher was not configured to count lines.
|
||||
///
|
||||
/// The closure accepts two parameters: a line number and a UTF-8 string
|
||||
/// containing the matched data. The closure returns a
|
||||
/// `Result<bool, io::Error>`. If the `bool` is `false`, then the search
|
||||
/// stops immediately. Otherwise, searching continues.
|
||||
///
|
||||
/// If multi line mode was enabled, the line number refers to the line
|
||||
/// number of the first line in the match.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Lossy<F>(pub F)
|
||||
where F: FnMut(u64, &str) -> Result<bool, io::Error>;
|
||||
|
||||
impl<F> Sink for Lossy<F>
|
||||
where F: FnMut(u64, &str) -> Result<bool, io::Error>
|
||||
{
|
||||
type Error = io::Error;
|
||||
|
||||
fn matched(
|
||||
&mut self,
|
||||
_searcher: &Searcher,
|
||||
mat: &SinkMatch,
|
||||
) -> Result<bool, io::Error> {
|
||||
use std::borrow::Cow;
|
||||
|
||||
let matched = match str::from_utf8(mat.bytes()) {
|
||||
Ok(matched) => Cow::Borrowed(matched),
|
||||
// TODO: In theory, it should be possible to amortize
|
||||
// allocation here, but `std` doesn't provide such an API.
|
||||
// Regardless, this only happens on matches with invalid UTF-8,
|
||||
// which should be pretty rare.
|
||||
Err(_) => String::from_utf8_lossy(mat.bytes()),
|
||||
};
|
||||
let line_number = match mat.line_number() {
|
||||
Some(line_number) => line_number,
|
||||
None => {
|
||||
let msg = "line numbers not enabled";
|
||||
return Err(io::Error::error_message(msg));
|
||||
}
|
||||
};
|
||||
(self.0)(line_number, &matched)
|
||||
}
|
||||
}
|
||||
|
||||
/// A sink that provides line numbers and matches as raw bytes while
|
||||
/// ignoring everything else.
|
||||
///
|
||||
/// This implementation will return an error on the first match if the
|
||||
/// searcher was not configured to count lines.
|
||||
///
|
||||
/// The closure accepts two parameters: a line number and a raw byte string
|
||||
/// containing the matched data. The closure returns a `Result<bool,
|
||||
/// io::Error>`. If the `bool` is `false`, then the search stops
|
||||
/// immediately. Otherwise, searching continues.
|
||||
///
|
||||
/// If multi line mode was enabled, the line number refers to the line
|
||||
/// number of the first line in the match.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Bytes<F>(pub F)
|
||||
where F: FnMut(u64, &[u8]) -> Result<bool, io::Error>;
|
||||
|
||||
impl<F> Sink for Bytes<F>
|
||||
where F: FnMut(u64, &[u8]) -> Result<bool, io::Error>
|
||||
{
|
||||
type Error = io::Error;
|
||||
|
||||
fn matched(
|
||||
&mut self,
|
||||
_searcher: &Searcher,
|
||||
mat: &SinkMatch,
|
||||
) -> Result<bool, io::Error> {
|
||||
let line_number = match mat.line_number() {
|
||||
Some(line_number) => line_number,
|
||||
None => {
|
||||
let msg = "line numbers not enabled";
|
||||
return Err(io::Error::error_message(msg));
|
||||
}
|
||||
};
|
||||
(self.0)(line_number, mat.bytes())
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,788 +0,0 @@
|
||||
use std::io::{self, Write};
|
||||
use std::str;
|
||||
|
||||
use bstr::B;
|
||||
use grep_matcher::{
|
||||
LineMatchKind, LineTerminator, Match, Matcher, NoCaptures, NoError,
|
||||
};
|
||||
use regex::bytes::{Regex, RegexBuilder};
|
||||
|
||||
use searcher::{BinaryDetection, Searcher, SearcherBuilder};
|
||||
use sink::{Sink, SinkContext, SinkFinish, SinkMatch};
|
||||
|
||||
/// A simple regex matcher.
|
||||
///
|
||||
/// This supports setting the matcher's line terminator configuration directly,
|
||||
/// which we use for testing purposes. That is, the caller explicitly
|
||||
/// determines whether the line terminator optimization is enabled. (In reality
|
||||
/// this optimization is detected automatically by inspecting and possibly
|
||||
/// modifying the regex itself.)
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct RegexMatcher {
|
||||
regex: Regex,
|
||||
line_term: Option<LineTerminator>,
|
||||
every_line_is_candidate: bool,
|
||||
}
|
||||
|
||||
impl RegexMatcher {
|
||||
/// Create a new regex matcher.
|
||||
pub fn new(pattern: &str) -> RegexMatcher {
|
||||
let regex = RegexBuilder::new(pattern)
|
||||
.multi_line(true) // permits ^ and $ to match at \n boundaries
|
||||
.build()
|
||||
.unwrap();
|
||||
RegexMatcher {
|
||||
regex: regex,
|
||||
line_term: None,
|
||||
every_line_is_candidate: false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Forcefully set the line terminator of this matcher.
|
||||
///
|
||||
/// By default, this matcher has no line terminator set.
|
||||
pub fn set_line_term(
|
||||
&mut self,
|
||||
line_term: Option<LineTerminator>,
|
||||
) -> &mut RegexMatcher {
|
||||
self.line_term = line_term;
|
||||
self
|
||||
}
|
||||
|
||||
/// Whether to return every line as a candidate or not.
|
||||
///
|
||||
/// This forces searchers to handle the case of reporting a false positive.
|
||||
pub fn every_line_is_candidate(
|
||||
&mut self,
|
||||
yes: bool,
|
||||
) -> &mut RegexMatcher {
|
||||
self.every_line_is_candidate = yes;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl Matcher for RegexMatcher {
|
||||
type Captures = NoCaptures;
|
||||
type Error = NoError;
|
||||
|
||||
fn find_at(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
) -> Result<Option<Match>, NoError> {
|
||||
Ok(self.regex
|
||||
.find_at(haystack, at)
|
||||
.map(|m| Match::new(m.start(), m.end())))
|
||||
}
|
||||
|
||||
fn new_captures(&self) -> Result<NoCaptures, NoError> {
|
||||
Ok(NoCaptures::new())
|
||||
}
|
||||
|
||||
fn line_terminator(&self) -> Option<LineTerminator> {
|
||||
self.line_term
|
||||
}
|
||||
|
||||
fn find_candidate_line(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
) -> Result<Option<LineMatchKind>, NoError> {
|
||||
if self.every_line_is_candidate {
|
||||
assert!(self.line_term.is_some());
|
||||
if haystack.is_empty() {
|
||||
return Ok(None);
|
||||
}
|
||||
// Make it interesting and return the last byte in the current
|
||||
// line.
|
||||
let i = B(haystack)
|
||||
.find_byte(self.line_term.unwrap().as_byte())
|
||||
.map(|i| i)
|
||||
.unwrap_or(haystack.len() - 1);
|
||||
Ok(Some(LineMatchKind::Candidate(i)))
|
||||
} else {
|
||||
Ok(self.shortest_match(haystack)?.map(LineMatchKind::Confirmed))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// An implementation of Sink that prints all available information.
|
||||
///
|
||||
/// This is useful for tests because it lets us easily confirm whether data
|
||||
/// is being passed to Sink correctly.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct KitchenSink(Vec<u8>);
|
||||
|
||||
impl KitchenSink {
|
||||
/// Create a new implementation of Sink that includes everything in the
|
||||
/// kitchen.
|
||||
pub fn new() -> KitchenSink {
|
||||
KitchenSink(vec![])
|
||||
}
|
||||
|
||||
/// Return the data written to this sink.
|
||||
pub fn as_bytes(&self) -> &[u8] {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl Sink for KitchenSink {
|
||||
type Error = io::Error;
|
||||
|
||||
fn matched(
|
||||
&mut self,
|
||||
_searcher: &Searcher,
|
||||
mat: &SinkMatch,
|
||||
) -> Result<bool, io::Error> {
|
||||
assert!(!mat.bytes().is_empty());
|
||||
assert!(mat.lines().count() >= 1);
|
||||
|
||||
let mut line_number = mat.line_number();
|
||||
let mut byte_offset = mat.absolute_byte_offset();
|
||||
for line in mat.lines() {
|
||||
if let Some(ref mut n) = line_number {
|
||||
write!(self.0, "{}:", n)?;
|
||||
*n += 1;
|
||||
}
|
||||
|
||||
write!(self.0, "{}:", byte_offset)?;
|
||||
byte_offset += line.len() as u64;
|
||||
self.0.write_all(line)?;
|
||||
}
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
fn context(
|
||||
&mut self,
|
||||
_searcher: &Searcher,
|
||||
context: &SinkContext,
|
||||
) -> Result<bool, io::Error> {
|
||||
assert!(!context.bytes().is_empty());
|
||||
assert!(context.lines().count() == 1);
|
||||
|
||||
if let Some(line_number) = context.line_number() {
|
||||
write!(self.0, "{}-", line_number)?;
|
||||
}
|
||||
write!(self.0, "{}-", context.absolute_byte_offset)?;
|
||||
self.0.write_all(context.bytes())?;
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
fn context_break(
|
||||
&mut self,
|
||||
_searcher: &Searcher,
|
||||
) -> Result<bool, io::Error> {
|
||||
self.0.write_all(b"--\n")?;
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
fn finish(
|
||||
&mut self,
|
||||
_searcher: &Searcher,
|
||||
sink_finish: &SinkFinish,
|
||||
) -> Result<(), io::Error> {
|
||||
writeln!(self.0, "")?;
|
||||
writeln!(self.0, "byte count:{}", sink_finish.byte_count())?;
|
||||
if let Some(offset) = sink_finish.binary_byte_offset() {
|
||||
writeln!(self.0, "binary offset:{}", offset)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// A type for expressing tests on a searcher.
|
||||
///
|
||||
/// The searcher code has a lot of different code paths, mostly for the
|
||||
/// purposes of optimizing a bunch of different use cases. The intent of the
|
||||
/// searcher is to pick the best code path based on the configuration, which
|
||||
/// means there is no obviously direct way to ask that a specific code path
|
||||
/// be exercised. Thus, the purpose of this tester is to explicitly check as
|
||||
/// many code paths that make sense.
|
||||
///
|
||||
/// The tester works by assuming you want to test all pertinent code paths.
|
||||
/// These can be trimmed down as necessary via the various builder methods.
|
||||
#[derive(Debug)]
|
||||
pub struct SearcherTester {
|
||||
haystack: String,
|
||||
pattern: String,
|
||||
filter: Option<::regex::Regex>,
|
||||
print_labels: bool,
|
||||
expected_no_line_number: Option<String>,
|
||||
expected_with_line_number: Option<String>,
|
||||
expected_slice_no_line_number: Option<String>,
|
||||
expected_slice_with_line_number: Option<String>,
|
||||
by_line: bool,
|
||||
multi_line: bool,
|
||||
invert_match: bool,
|
||||
line_number: bool,
|
||||
binary: BinaryDetection,
|
||||
auto_heap_limit: bool,
|
||||
after_context: usize,
|
||||
before_context: usize,
|
||||
passthru: bool,
|
||||
}
|
||||
|
||||
impl SearcherTester {
|
||||
/// Create a new tester for testing searchers.
|
||||
pub fn new(haystack: &str, pattern: &str) -> SearcherTester {
|
||||
SearcherTester {
|
||||
haystack: haystack.to_string(),
|
||||
pattern: pattern.to_string(),
|
||||
filter: None,
|
||||
print_labels: false,
|
||||
expected_no_line_number: None,
|
||||
expected_with_line_number: None,
|
||||
expected_slice_no_line_number: None,
|
||||
expected_slice_with_line_number: None,
|
||||
by_line: true,
|
||||
multi_line: true,
|
||||
invert_match: false,
|
||||
line_number: true,
|
||||
binary: BinaryDetection::none(),
|
||||
auto_heap_limit: true,
|
||||
after_context: 0,
|
||||
before_context: 0,
|
||||
passthru: false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Execute the test. If the test succeeds, then this returns successfully.
|
||||
/// If the test fails, then it panics with an informative message.
|
||||
pub fn test(&self) {
|
||||
// Check for configuration errors.
|
||||
if self.expected_no_line_number.is_none() {
|
||||
panic!("an 'expected' string with NO line numbers must be given");
|
||||
}
|
||||
if self.line_number && self.expected_with_line_number.is_none() {
|
||||
panic!("an 'expected' string with line numbers must be given, \
|
||||
or disable testing with line numbers");
|
||||
}
|
||||
|
||||
let configs = self.configs();
|
||||
if configs.is_empty() {
|
||||
panic!("test configuration resulted in nothing being tested");
|
||||
}
|
||||
if self.print_labels {
|
||||
for config in &configs {
|
||||
let labels = vec![
|
||||
format!("reader-{}", config.label),
|
||||
format!("slice-{}", config.label),
|
||||
];
|
||||
for label in &labels {
|
||||
if self.include(label) {
|
||||
println!("{}", label);
|
||||
} else {
|
||||
println!("{} (ignored)", label);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
for config in &configs {
|
||||
let label = format!("reader-{}", config.label);
|
||||
if self.include(&label) {
|
||||
let got = config.search_reader(&self.haystack);
|
||||
assert_eq_printed!(config.expected_reader, got, "{}", label);
|
||||
}
|
||||
|
||||
let label = format!("slice-{}", config.label);
|
||||
if self.include(&label) {
|
||||
let got = config.search_slice(&self.haystack);
|
||||
assert_eq_printed!(config.expected_slice, got, "{}", label);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Set a regex pattern to filter the tests that are run.
|
||||
///
|
||||
/// By default, no filter is present. When a filter is set, only test
|
||||
/// configurations with a label matching the given pattern will be run.
|
||||
///
|
||||
/// This is often useful when debugging tests, e.g., when you want to do
|
||||
/// printf debugging and only want one particular test configuration to
|
||||
/// execute.
|
||||
#[allow(dead_code)]
|
||||
pub fn filter(&mut self, pattern: &str) -> &mut SearcherTester {
|
||||
self.filter = Some(::regex::Regex::new(pattern).unwrap());
|
||||
self
|
||||
}
|
||||
|
||||
/// When set, the labels for all test configurations are printed before
|
||||
/// executing any test.
|
||||
///
|
||||
/// Note that in order to see these in tests that aren't failing, you'll
|
||||
/// want to use `cargo test -- --nocapture`.
|
||||
#[allow(dead_code)]
|
||||
pub fn print_labels(&mut self, yes: bool) -> &mut SearcherTester {
|
||||
self.print_labels = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the expected search results, without line numbers.
|
||||
pub fn expected_no_line_number(
|
||||
&mut self,
|
||||
exp: &str,
|
||||
) -> &mut SearcherTester {
|
||||
self.expected_no_line_number = Some(exp.to_string());
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the expected search results, with line numbers.
|
||||
pub fn expected_with_line_number(
|
||||
&mut self,
|
||||
exp: &str,
|
||||
) -> &mut SearcherTester {
|
||||
self.expected_with_line_number = Some(exp.to_string());
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the expected search results, without line numbers, when performing
|
||||
/// a search on a slice. When not present, `expected_no_line_number` is
|
||||
/// used instead.
|
||||
pub fn expected_slice_no_line_number(
|
||||
&mut self,
|
||||
exp: &str,
|
||||
) -> &mut SearcherTester {
|
||||
self.expected_slice_no_line_number = Some(exp.to_string());
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the expected search results, with line numbers, when performing a
|
||||
/// search on a slice. When not present, `expected_with_line_number` is
|
||||
/// used instead.
|
||||
#[allow(dead_code)]
|
||||
pub fn expected_slice_with_line_number(
|
||||
&mut self,
|
||||
exp: &str,
|
||||
) -> &mut SearcherTester {
|
||||
self.expected_slice_with_line_number = Some(exp.to_string());
|
||||
self
|
||||
}
|
||||
|
||||
/// Whether to test search with line numbers or not.
|
||||
///
|
||||
/// This is enabled by default. When enabled, the string that is expected
|
||||
/// when line numbers are present must be provided. Otherwise, the expected
|
||||
/// string isn't required.
|
||||
pub fn line_number(&mut self, yes: bool) -> &mut SearcherTester {
|
||||
self.line_number = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Whether to test search using the line-by-line searcher or not.
|
||||
///
|
||||
/// By default, this is enabled.
|
||||
pub fn by_line(&mut self, yes: bool) -> &mut SearcherTester {
|
||||
self.by_line = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Whether to test search using the multi line searcher or not.
|
||||
///
|
||||
/// By default, this is enabled.
|
||||
#[allow(dead_code)]
|
||||
pub fn multi_line(&mut self, yes: bool) -> &mut SearcherTester {
|
||||
self.multi_line = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Whether to perform an inverted search or not.
|
||||
///
|
||||
/// By default, this is disabled.
|
||||
pub fn invert_match(&mut self, yes: bool) -> &mut SearcherTester {
|
||||
self.invert_match = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Whether to enable binary detection on all searches.
|
||||
///
|
||||
/// By default, this is disabled.
|
||||
pub fn binary_detection(
|
||||
&mut self,
|
||||
detection: BinaryDetection,
|
||||
) -> &mut SearcherTester {
|
||||
self.binary = detection;
|
||||
self
|
||||
}
|
||||
|
||||
/// Whether to automatically attempt to test the heap limit setting or not.
|
||||
///
|
||||
/// By default, one of the test configurations includes setting the heap
|
||||
/// limit to its minimal value for normal operation, which checks that
|
||||
/// everything works even at the extremes. However, in some cases, the heap
|
||||
/// limit can (expectedly) alter the output slightly. For example, it can
|
||||
/// impact the number of bytes searched when performing binary detection.
|
||||
/// For convenience, it can be useful to disable the automatic heap limit
|
||||
/// test.
|
||||
pub fn auto_heap_limit(&mut self, yes: bool) -> &mut SearcherTester {
|
||||
self.auto_heap_limit = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the number of lines to include in the "after" context.
|
||||
///
|
||||
/// The default is `0`, which is equivalent to not printing any context.
|
||||
pub fn after_context(&mut self, lines: usize) -> &mut SearcherTester {
|
||||
self.after_context = lines;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the number of lines to include in the "before" context.
|
||||
///
|
||||
/// The default is `0`, which is equivalent to not printing any context.
|
||||
pub fn before_context(&mut self, lines: usize) -> &mut SearcherTester {
|
||||
self.before_context = lines;
|
||||
self
|
||||
}
|
||||
|
||||
/// Whether to enable the "passthru" feature or not.
|
||||
///
|
||||
/// When passthru is enabled, it effectively treats all non-matching lines
|
||||
/// as contextual lines. In other words, enabling this is akin to
|
||||
/// requesting an unbounded number of before and after contextual lines.
|
||||
///
|
||||
/// This is disabled by default.
|
||||
pub fn passthru(&mut self, yes: bool) -> &mut SearcherTester {
|
||||
self.passthru = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Return the minimum size of a buffer required for a successful search.
|
||||
///
|
||||
/// Generally, this corresponds to the maximum length of a line (including
|
||||
/// its terminator), but if context settings are enabled, then this must
|
||||
/// include the sum of the longest N lines.
|
||||
///
|
||||
/// Note that this must account for whether the test is using multi line
|
||||
/// search or not, since multi line search requires being able to fit the
|
||||
/// entire haystack into memory.
|
||||
fn minimal_heap_limit(&self, multi_line: bool) -> usize {
|
||||
if multi_line {
|
||||
1 + self.haystack.len()
|
||||
} else if self.before_context == 0 && self.after_context == 0 {
|
||||
1 + self.haystack.lines().map(|s| s.len()).max().unwrap_or(0)
|
||||
} else {
|
||||
let mut lens: Vec<usize> =
|
||||
self.haystack.lines().map(|s| s.len()).collect();
|
||||
lens.sort();
|
||||
lens.reverse();
|
||||
|
||||
let context_count =
|
||||
if self.passthru {
|
||||
self.haystack.lines().count()
|
||||
} else {
|
||||
// Why do we add 2 here? Well, we need to add 1 in order to
|
||||
// have room to search at least one line. We add another
|
||||
// because the implementation will occasionally include
|
||||
// an additional line when handling the context. There's
|
||||
// no particularly good reason, other than keeping the
|
||||
// implementation simple.
|
||||
2 + self.before_context + self.after_context
|
||||
};
|
||||
|
||||
// We add 1 to each line since `str::lines` doesn't include the
|
||||
// line terminator.
|
||||
lens.into_iter()
|
||||
.take(context_count)
|
||||
.map(|len| len + 1)
|
||||
.sum::<usize>()
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if and only if the given label should be included as part
|
||||
/// of executing `test`.
|
||||
///
|
||||
/// Inclusion is determined by the filter specified. If no filter has been
|
||||
/// given, then this always returns `true`.
|
||||
fn include(&self, label: &str) -> bool {
|
||||
let re = match self.filter {
|
||||
None => return true,
|
||||
Some(ref re) => re,
|
||||
};
|
||||
re.is_match(label)
|
||||
}
|
||||
|
||||
/// Configs generates a set of all search configurations that should be
|
||||
/// tested. The configs generated are based on the configuration in this
|
||||
/// builder.
|
||||
fn configs(&self) -> Vec<TesterConfig> {
|
||||
let mut configs = vec![];
|
||||
|
||||
let matcher = RegexMatcher::new(&self.pattern);
|
||||
let mut builder = SearcherBuilder::new();
|
||||
builder
|
||||
.line_number(false)
|
||||
.invert_match(self.invert_match)
|
||||
.binary_detection(self.binary.clone())
|
||||
.after_context(self.after_context)
|
||||
.before_context(self.before_context)
|
||||
.passthru(self.passthru);
|
||||
|
||||
if self.by_line {
|
||||
let mut matcher = matcher.clone();
|
||||
let mut builder = builder.clone();
|
||||
|
||||
let expected_reader =
|
||||
self.expected_no_line_number.as_ref().unwrap().to_string();
|
||||
let expected_slice = match self.expected_slice_no_line_number {
|
||||
None => expected_reader.clone(),
|
||||
Some(ref e) => e.to_string(),
|
||||
};
|
||||
configs.push(TesterConfig {
|
||||
label: "byline-noterm-nonumber".to_string(),
|
||||
expected_reader: expected_reader.clone(),
|
||||
expected_slice: expected_slice.clone(),
|
||||
builder: builder.clone(),
|
||||
matcher: matcher.clone(),
|
||||
});
|
||||
|
||||
if self.auto_heap_limit {
|
||||
builder.heap_limit(Some(self.minimal_heap_limit(false)));
|
||||
configs.push(TesterConfig {
|
||||
label: "byline-noterm-nonumber-heaplimit".to_string(),
|
||||
expected_reader: expected_reader.clone(),
|
||||
expected_slice: expected_slice.clone(),
|
||||
builder: builder.clone(),
|
||||
matcher: matcher.clone(),
|
||||
});
|
||||
builder.heap_limit(None);
|
||||
}
|
||||
|
||||
matcher.set_line_term(Some(LineTerminator::byte(b'\n')));
|
||||
configs.push(TesterConfig {
|
||||
label: "byline-term-nonumber".to_string(),
|
||||
expected_reader: expected_reader.clone(),
|
||||
expected_slice: expected_slice.clone(),
|
||||
builder: builder.clone(),
|
||||
matcher: matcher.clone(),
|
||||
});
|
||||
|
||||
matcher.every_line_is_candidate(true);
|
||||
configs.push(TesterConfig {
|
||||
label: "byline-term-nonumber-candidates".to_string(),
|
||||
expected_reader: expected_reader.clone(),
|
||||
expected_slice: expected_slice.clone(),
|
||||
builder: builder.clone(),
|
||||
matcher: matcher.clone(),
|
||||
});
|
||||
}
|
||||
if self.by_line && self.line_number {
|
||||
let mut matcher = matcher.clone();
|
||||
let mut builder = builder.clone();
|
||||
|
||||
let expected_reader =
|
||||
self.expected_with_line_number.as_ref().unwrap().to_string();
|
||||
let expected_slice = match self.expected_slice_with_line_number {
|
||||
None => expected_reader.clone(),
|
||||
Some(ref e) => e.to_string(),
|
||||
};
|
||||
|
||||
builder.line_number(true);
|
||||
configs.push(TesterConfig {
|
||||
label: "byline-noterm-number".to_string(),
|
||||
expected_reader: expected_reader.clone(),
|
||||
expected_slice: expected_slice.clone(),
|
||||
builder: builder.clone(),
|
||||
matcher: matcher.clone(),
|
||||
});
|
||||
|
||||
matcher.set_line_term(Some(LineTerminator::byte(b'\n')));
|
||||
configs.push(TesterConfig {
|
||||
label: "byline-term-number".to_string(),
|
||||
expected_reader: expected_reader.clone(),
|
||||
expected_slice: expected_slice.clone(),
|
||||
builder: builder.clone(),
|
||||
matcher: matcher.clone(),
|
||||
});
|
||||
|
||||
matcher.every_line_is_candidate(true);
|
||||
configs.push(TesterConfig {
|
||||
label: "byline-term-number-candidates".to_string(),
|
||||
expected_reader: expected_reader.clone(),
|
||||
expected_slice: expected_slice.clone(),
|
||||
builder: builder.clone(),
|
||||
matcher: matcher.clone(),
|
||||
});
|
||||
}
|
||||
if self.multi_line {
|
||||
let mut builder = builder.clone();
|
||||
let expected_slice = match self.expected_slice_no_line_number {
|
||||
None => {
|
||||
self.expected_no_line_number.as_ref().unwrap().to_string()
|
||||
}
|
||||
Some(ref e) => e.to_string(),
|
||||
};
|
||||
|
||||
builder.multi_line(true);
|
||||
configs.push(TesterConfig {
|
||||
label: "multiline-nonumber".to_string(),
|
||||
expected_reader: expected_slice.clone(),
|
||||
expected_slice: expected_slice.clone(),
|
||||
builder: builder.clone(),
|
||||
matcher: matcher.clone(),
|
||||
});
|
||||
|
||||
if self.auto_heap_limit {
|
||||
builder.heap_limit(Some(self.minimal_heap_limit(true)));
|
||||
configs.push(TesterConfig {
|
||||
label: "multiline-nonumber-heaplimit".to_string(),
|
||||
expected_reader: expected_slice.clone(),
|
||||
expected_slice: expected_slice.clone(),
|
||||
builder: builder.clone(),
|
||||
matcher: matcher.clone(),
|
||||
});
|
||||
builder.heap_limit(None);
|
||||
}
|
||||
}
|
||||
if self.multi_line && self.line_number {
|
||||
let mut builder = builder.clone();
|
||||
let expected_slice = match self.expected_slice_with_line_number {
|
||||
None => {
|
||||
self.expected_with_line_number
|
||||
.as_ref().unwrap().to_string()
|
||||
}
|
||||
Some(ref e) => e.to_string(),
|
||||
};
|
||||
|
||||
builder.multi_line(true);
|
||||
builder.line_number(true);
|
||||
configs.push(TesterConfig {
|
||||
label: "multiline-number".to_string(),
|
||||
expected_reader: expected_slice.clone(),
|
||||
expected_slice: expected_slice.clone(),
|
||||
builder: builder.clone(),
|
||||
matcher: matcher.clone(),
|
||||
});
|
||||
|
||||
builder.heap_limit(Some(self.minimal_heap_limit(true)));
|
||||
configs.push(TesterConfig {
|
||||
label: "multiline-number-heaplimit".to_string(),
|
||||
expected_reader: expected_slice.clone(),
|
||||
expected_slice: expected_slice.clone(),
|
||||
builder: builder.clone(),
|
||||
matcher: matcher.clone(),
|
||||
});
|
||||
builder.heap_limit(None);
|
||||
}
|
||||
configs
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct TesterConfig {
|
||||
label: String,
|
||||
expected_reader: String,
|
||||
expected_slice: String,
|
||||
builder: SearcherBuilder,
|
||||
matcher: RegexMatcher,
|
||||
}
|
||||
|
||||
impl TesterConfig {
|
||||
/// Execute a search using a reader. This exercises the incremental search
|
||||
/// strategy, where the entire contents of the corpus aren't necessarily
|
||||
/// in memory at once.
|
||||
fn search_reader(&self, haystack: &str) -> String {
|
||||
let mut sink = KitchenSink::new();
|
||||
let mut searcher = self.builder.build();
|
||||
let result = searcher.search_reader(
|
||||
&self.matcher,
|
||||
haystack.as_bytes(),
|
||||
&mut sink,
|
||||
);
|
||||
if let Err(err) = result {
|
||||
let label = format!("reader-{}", self.label);
|
||||
panic!("error running '{}': {}", label, err);
|
||||
}
|
||||
String::from_utf8(sink.as_bytes().to_vec()).unwrap()
|
||||
}
|
||||
|
||||
/// Execute a search using a slice. This exercises the search routines that
|
||||
/// have the entire contents of the corpus in memory at one time.
|
||||
fn search_slice(&self, haystack: &str) -> String {
|
||||
let mut sink = KitchenSink::new();
|
||||
let mut searcher = self.builder.build();
|
||||
let result = searcher.search_slice(
|
||||
&self.matcher,
|
||||
haystack.as_bytes(),
|
||||
&mut sink,
|
||||
);
|
||||
if let Err(err) = result {
|
||||
let label = format!("slice-{}", self.label);
|
||||
panic!("error running '{}': {}", label, err);
|
||||
}
|
||||
String::from_utf8(sink.as_bytes().to_vec()).unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use grep_matcher::{Match, Matcher};
|
||||
|
||||
use super::*;
|
||||
|
||||
fn m(start: usize, end: usize) -> Match {
|
||||
Match::new(start, end)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_line1() {
|
||||
let haystack = b"";
|
||||
let matcher = RegexMatcher::new(r"^$");
|
||||
|
||||
assert_eq!(matcher.find_at(haystack, 0), Ok(Some(m(0, 0))));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_line2() {
|
||||
let haystack = b"\n";
|
||||
let matcher = RegexMatcher::new(r"^$");
|
||||
|
||||
assert_eq!(matcher.find_at(haystack, 0), Ok(Some(m(0, 0))));
|
||||
assert_eq!(matcher.find_at(haystack, 1), Ok(Some(m(1, 1))));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_line3() {
|
||||
let haystack = b"\n\n";
|
||||
let matcher = RegexMatcher::new(r"^$");
|
||||
|
||||
assert_eq!(matcher.find_at(haystack, 0), Ok(Some(m(0, 0))));
|
||||
assert_eq!(matcher.find_at(haystack, 1), Ok(Some(m(1, 1))));
|
||||
assert_eq!(matcher.find_at(haystack, 2), Ok(Some(m(2, 2))));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_line4() {
|
||||
let haystack = b"a\n\nb\n";
|
||||
let matcher = RegexMatcher::new(r"^$");
|
||||
|
||||
assert_eq!(matcher.find_at(haystack, 0), Ok(Some(m(2, 2))));
|
||||
assert_eq!(matcher.find_at(haystack, 1), Ok(Some(m(2, 2))));
|
||||
assert_eq!(matcher.find_at(haystack, 2), Ok(Some(m(2, 2))));
|
||||
assert_eq!(matcher.find_at(haystack, 3), Ok(Some(m(5, 5))));
|
||||
assert_eq!(matcher.find_at(haystack, 4), Ok(Some(m(5, 5))));
|
||||
assert_eq!(matcher.find_at(haystack, 5), Ok(Some(m(5, 5))));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_line5() {
|
||||
let haystack = b"a\n\nb\nc";
|
||||
let matcher = RegexMatcher::new(r"^$");
|
||||
|
||||
assert_eq!(matcher.find_at(haystack, 0), Ok(Some(m(2, 2))));
|
||||
assert_eq!(matcher.find_at(haystack, 1), Ok(Some(m(2, 2))));
|
||||
assert_eq!(matcher.find_at(haystack, 2), Ok(Some(m(2, 2))));
|
||||
assert_eq!(matcher.find_at(haystack, 3), Ok(None));
|
||||
assert_eq!(matcher.find_at(haystack, 4), Ok(None));
|
||||
assert_eq!(matcher.find_at(haystack, 5), Ok(None));
|
||||
assert_eq!(matcher.find_at(haystack, 6), Ok(None));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_line6() {
|
||||
let haystack = b"a\n";
|
||||
let matcher = RegexMatcher::new(r"^$");
|
||||
|
||||
assert_eq!(matcher.find_at(haystack, 0), Ok(Some(m(2, 2))));
|
||||
assert_eq!(matcher.find_at(haystack, 1), Ok(Some(m(2, 2))));
|
||||
assert_eq!(matcher.find_at(haystack, 2), Ok(Some(m(2, 2))));
|
||||
}
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user