diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 47d33a4..423be2d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -194,7 +194,7 @@ jobs: - name: Print available short flags shell: bash - run: ${{ env.CARGO }} test --bin rg ${{ env.TARGET_FLAGS }} flags::defs::tests::available_shorts -- --nocapture + run: ${{ env.CARGO }} test --bin rgs ${{ env.TARGET_FLAGS }} flags::defs::tests::available_shorts -- --nocapture # Setup and compile on the wasm32-wasip1 target wasm: diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index b11550a..ab263aa 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -178,9 +178,9 @@ jobs: run: | ${{ env.CARGO }} build --verbose --profile release-lto --features pcre2 ${{ env.TARGET_FLAGS }} if [[ "${{ matrix.os }}" == windows-* ]]; then - bin="target/${{ matrix.target }}/release-lto/rg.exe" + bin="target/${{ matrix.target }}/release-lto/rgs.exe" else - bin="target/${{ matrix.target }}/release-lto/rg" + bin="target/${{ matrix.target }}/release-lto/rgs" fi echo "BIN=$bin" >> $GITHUB_ENV @@ -203,14 +203,14 @@ jobs: shell: bash run: | version="${{ needs.create-release.outputs.version }}" - echo "ARCHIVE=ripgrep-$version-${{ matrix.target }}" >> $GITHUB_ENV + echo "ARCHIVE=rgs-$version-${{ matrix.target }}" >> $GITHUB_ENV - name: Creating directory for archive shell: bash run: | mkdir -p "$ARCHIVE"/{complete,doc} cp "$BIN" "$ARCHIVE"/ - cp {README.md,COPYING,UNLICENSE,LICENSE-MIT} "$ARCHIVE"/ + cp {README.md,README-ripgrep.md,COPYING,UNLICENSE,LICENSE-MIT} "$ARCHIVE"/ cp {CHANGELOG.md,FAQ.md,GUIDE.md} "$ARCHIVE"/doc/ - name: Generate man page and completions (no emulation) @@ -218,11 +218,11 @@ jobs: shell: bash run: | "$BIN" --version - "$BIN" --generate complete-bash > "$ARCHIVE/complete/rg.bash" - "$BIN" --generate complete-fish > "$ARCHIVE/complete/rg.fish" - "$BIN" --generate complete-powershell > "$ARCHIVE/complete/_rg.ps1" - "$BIN" --generate complete-zsh > "$ARCHIVE/complete/_rg" - "$BIN" --generate man > "$ARCHIVE/doc/rg.1" + "$BIN" --generate complete-bash > "$ARCHIVE/complete/rgs.bash" + "$BIN" --generate complete-fish > "$ARCHIVE/complete/rgs.fish" + "$BIN" --generate complete-powershell > "$ARCHIVE/complete/_rgs.ps1" + "$BIN" --generate complete-zsh > "$ARCHIVE/complete/_rgs" + "$BIN" --generate man > "$ARCHIVE/doc/rgs.1" - name: Generate man page and completions (emulation) if: matrix.qemu != '' @@ -236,27 +236,27 @@ jobs: "$PWD/target:/target:Z" \ "ghcr.io/cross-rs/${{ matrix.target }}:main" \ "${{ matrix.qemu }}" "/$BIN" \ - --generate complete-bash > "$ARCHIVE/complete/rg.bash" + --generate complete-bash > "$ARCHIVE/complete/rgs.bash" docker run --rm -v \ "$PWD/target:/target:Z" \ "ghcr.io/cross-rs/${{ matrix.target }}:main" \ "${{ matrix.qemu }}" "/$BIN" \ - --generate complete-fish > "$ARCHIVE/complete/rg.fish" + --generate complete-fish > "$ARCHIVE/complete/rgs.fish" docker run --rm -v \ "$PWD/target:/target:Z" \ "ghcr.io/cross-rs/${{ matrix.target }}:main" \ "${{ matrix.qemu }}" "/$BIN" \ - --generate complete-powershell > "$ARCHIVE/complete/_rg.ps1" + --generate complete-powershell > "$ARCHIVE/complete/_rgs.ps1" docker run --rm -v \ "$PWD/target:/target:Z" \ "ghcr.io/cross-rs/${{ matrix.target }}:main" \ "${{ matrix.qemu }}" "/$BIN" \ - --generate complete-zsh > "$ARCHIVE/complete/_rg" + --generate complete-zsh > "$ARCHIVE/complete/_rgs" docker run --rm -v \ "$PWD/target:/target:Z" \ "ghcr.io/cross-rs/${{ matrix.target }}:main" \ "${{ matrix.qemu }}" "/$BIN" \ - --generate man > "$ARCHIVE/doc/rg.1" + --generate man > "$ARCHIVE/doc/rgs.1" - name: Build archive (Windows) shell: bash @@ -325,7 +325,7 @@ jobs: shell: bash run: | cargo build --target ${{ env.TARGET }} - bin="target/${{ env.TARGET }}/debug/rg" + bin="target/${{ env.TARGET }}/debug/rgs" echo "BIN=$bin" >> $GITHUB_ENV - name: Create deployment directory @@ -338,14 +338,14 @@ jobs: - name: Generate man page shell: bash run: | - "$BIN" --generate man > "$DEPLOY_DIR/rg.1" + "$BIN" --generate man > "$DEPLOY_DIR/rgs.1" - name: Generate shell completions shell: bash run: | - "$BIN" --generate complete-bash > "$DEPLOY_DIR/rg.bash" - "$BIN" --generate complete-fish > "$DEPLOY_DIR/rg.fish" - "$BIN" --generate complete-zsh > "$DEPLOY_DIR/_rg" + "$BIN" --generate complete-bash > "$DEPLOY_DIR/rgs.bash" + "$BIN" --generate complete-fish > "$DEPLOY_DIR/rgs.fish" + "$BIN" --generate complete-zsh > "$DEPLOY_DIR/_rgs" - name: Build release binary shell: bash @@ -353,7 +353,7 @@ jobs: cargo deb --profile deb --target ${{ env.TARGET }} version="${{ needs.create-release.outputs.version }}" echo "DEB_DIR=target/${{ env.TARGET }}/debian" >> $GITHUB_ENV - echo "DEB_NAME=ripgrep_$version-1_amd64.deb" >> $GITHUB_ENV + echo "DEB_NAME=rgs_$version-1_amd64.deb" >> $GITHUB_ENV - name: Create sha256 sum of deb file shell: bash diff --git a/Cargo.toml b/Cargo.toml index 9e40938..98cb271 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -93,25 +93,26 @@ inherits = "release-lto" features = ["pcre2"] section = "utils" assets = [ - ["target/release/rg", "usr/bin/", "755"], + ["target/release/rgs", "usr/bin/", "755"], ["COPYING", "usr/share/doc/ripgrep/", "644"], ["LICENSE-MIT", "usr/share/doc/ripgrep/", "644"], ["UNLICENSE", "usr/share/doc/ripgrep/", "644"], ["CHANGELOG.md", "usr/share/doc/ripgrep/CHANGELOG", "644"], ["README.md", "usr/share/doc/ripgrep/README", "644"], + ["README-ripgrep.md", "usr/share/doc/ripgrep/README-ripgrep", "644"], ["FAQ.md", "usr/share/doc/ripgrep/FAQ", "644"], # The man page is automatically generated by ripgrep's build process, so # this file isn't actually committed. Instead, to create a dpkg, either # create a deployment/deb directory and copy the man page to it, or use the # 'ci/build-deb' script. - ["deployment/deb/rg.1", "usr/share/man/man1/rg.1", "644"], + ["deployment/deb/rgs.1", "usr/share/man/man1/rgs.1", "644"], # Similarly for shell completions. - ["deployment/deb/rg.bash", "usr/share/bash-completion/completions/rg", "644"], - ["deployment/deb/rg.fish", "usr/share/fish/vendor_completions.d/rg.fish", "644"], - ["deployment/deb/_rg", "usr/share/zsh/vendor-completions/", "644"], + ["deployment/deb/rgs.bash", "usr/share/bash-completion/completions/rgs", "644"], + ["deployment/deb/rgs.fish", "usr/share/fish/vendor_completions.d/rgs.fish", "644"], + ["deployment/deb/_rgs", "usr/share/zsh/vendor-completions/", "644"], ] extended-description = """\ -ripgrep (rg) recursively searches your current directory for a regex pattern. +rgs recursively searches your current directory for a regex pattern. By default, ripgrep will respect your .gitignore and automatically skip hidden files/directories and binary files. """ diff --git a/GUIDE.md b/GUIDE.md index 6d51e46..244e137 100644 --- a/GUIDE.md +++ b/GUIDE.md @@ -2,7 +2,7 @@ This guide is intended to give an elementary description of ripgrep and an overview of its capabilities. This guide assumes that ripgrep is -[installed](README.md#installation) +[installed](README-ripgrep.md#installation) and that readers have passing familiarity with using command line tools. This also assumes a Unix-like system, although most commands are probably easily translatable to any command line shell environment. @@ -42,17 +42,17 @@ $ unzip 0.7.1.zip $ cd ripgrep-0.7.1 $ ls benchsuite grep tests Cargo.toml LICENSE-MIT -ci ignore wincolor CHANGELOG.md README.md +ci ignore wincolor CHANGELOG.md README-ripgrep.md complete pkg appveyor.yml compile snapcraft.yaml doc src build.rs COPYING UNLICENSE globset termcolor Cargo.lock HomebrewFormula ``` Let's try our first search by looking for all occurrences of the word `fast` -in `README.md`: +in `README-ripgrep.md`: ``` -$ rg fast README.md +$ rg fast README-ripgrep.md 75: faster than both. (N.B. It is not, strictly speaking, a "drop-in" replacement 88: color and full Unicode support. Unlike GNU grep, `ripgrep` stays fast while 119:### Is it really faster than everything else? @@ -64,7 +64,7 @@ $ rg fast README.md search any files, then re-run ripgrep with the `--debug` flag. One likely cause of this is that you have a `*` rule in a `$HOME/.gitignore` file.) -So what happened here? ripgrep read the contents of `README.md`, and for each +So what happened here? ripgrep read the contents of `README-ripgrep.md`, and for each line that contained `fast`, ripgrep printed it to your terminal. ripgrep also included the line number for each line by default. If your terminal supports colors, then your output might actually look something like this screenshot: @@ -79,7 +79,7 @@ what if we wanted to find all lines have a word that contains `fast` followed by some number of other letters? ``` -$ rg 'fast\w+' README.md +$ rg 'fast\w+' README-ripgrep.md 75: faster than both. (N.B. It is not, strictly speaking, a "drop-in" replacement 119:### Is it really faster than everything else? ``` @@ -95,7 +95,7 @@ like `faster` will. `faste` would also match! Here's a different variation on this same theme: ``` -$ rg 'fast\w*' README.md +$ rg 'fast\w*' README-ripgrep.md 75: faster than both. (N.B. It is not, strictly speaking, a "drop-in" replacement 88: color and full Unicode support. Unlike GNU grep, `ripgrep` stays fast while 119:### Is it really faster than everything else? @@ -444,7 +444,7 @@ text with some other text. This is easiest to explain with an example. Remember when we searched for the word `fast` in ripgrep's README? ``` -$ rg fast README.md +$ rg fast README-ripgrep.md 75: faster than both. (N.B. It is not, strictly speaking, a "drop-in" replacement 88: color and full Unicode support. Unlike GNU grep, `ripgrep` stays fast while 119:### Is it really faster than everything else? @@ -456,7 +456,7 @@ What if we wanted to *replace* all occurrences of `fast` with `FAST`? That's easy with ripgrep's `--replace` flag: ``` -$ rg fast README.md --replace FAST +$ rg fast README-ripgrep.md --replace FAST 75: FASTer than both. (N.B. It is not, strictly speaking, a "drop-in" replacement 88: color and full Unicode support. Unlike GNU grep, `ripgrep` stays FAST while 119:### Is it really FASTer than everything else? @@ -467,7 +467,7 @@ $ rg fast README.md --replace FAST or, more succinctly, ``` -$ rg fast README.md -r FAST +$ rg fast README-ripgrep.md -r FAST [snip] ``` @@ -476,7 +476,7 @@ in the output. If you instead wanted to replace an entire line of text, then you need to include the entire line in your match. For example: ``` -$ rg '^.*fast.*$' README.md -r FAST +$ rg '^.*fast.*$' README-ripgrep.md -r FAST 75:FAST 88:FAST 119:FAST @@ -488,7 +488,7 @@ Alternatively, you can combine the `--only-matching` (or `-o` for short) with the `--replace` flag to achieve the same result: ``` -$ rg fast README.md --only-matching --replace FAST +$ rg fast README-ripgrep.md --only-matching --replace FAST 75:FAST 88:FAST 119:FAST @@ -499,7 +499,7 @@ $ rg fast README.md --only-matching --replace FAST or, more succinctly, ``` -$ rg fast README.md -or FAST +$ rg fast README-ripgrep.md -or FAST [snip] ``` @@ -512,7 +512,7 @@ group" (indicated by parentheses) so that we can reference it later in our replacement string. For example: ``` -$ rg 'fast\s+(\w+)' README.md -r 'fast-$1' +$ rg 'fast\s+(\w+)' README-ripgrep.md -r 'fast-$1' 88: color and full Unicode support. Unlike GNU grep, `ripgrep` stays fast-while 124:Summarizing, `ripgrep` is fast-because: ``` @@ -528,7 +528,7 @@ using the indices. For example, the following command is equivalent to the above command: ``` -$ rg 'fast\s+(?P\w+)' README.md -r 'fast-$word' +$ rg 'fast\s+(?P\w+)' README-ripgrep.md -r 'fast-$word' 88: color and full Unicode support. Unlike GNU grep, `ripgrep` stays fast-while 124:Summarizing, `ripgrep` is fast-because: ``` diff --git a/README-ripgrep.md b/README-ripgrep.md new file mode 100644 index 0000000..73868ac --- /dev/null +++ b/README-ripgrep.md @@ -0,0 +1,541 @@ +ripgrep (rg) +------------ +ripgrep is a line-oriented search tool that recursively searches the current +directory for a regex pattern. By default, ripgrep will respect gitignore rules +and automatically skip hidden files/directories and binary files. (To disable +all automatic filtering by default, use `rg -uuu`.) ripgrep has first class +support on Windows, macOS and Linux, with binary downloads available for [every +release](https://github.com/BurntSushi/ripgrep/releases). ripgrep is similar to +other popular search tools like The Silver Searcher, ack and grep. + +[![Build status](https://github.com/BurntSushi/ripgrep/workflows/ci/badge.svg)](https://github.com/BurntSushi/ripgrep/actions) +[![Crates.io](https://img.shields.io/crates/v/ripgrep.svg)](https://crates.io/crates/ripgrep) +[![Packaging status](https://repology.org/badge/tiny-repos/ripgrep.svg)](https://repology.org/project/ripgrep/badges) + +Dual-licensed under MIT or the [UNLICENSE](https://unlicense.org). + + +### CHANGELOG + +Please see the [CHANGELOG](CHANGELOG.md) for a release history. + +### Documentation quick links + +* [Installation](#installation) +* [User Guide](GUIDE.md) +* [Frequently Asked Questions](FAQ.md) +* [Regex syntax](https://docs.rs/regex/1/regex/#syntax) +* [Configuration files](GUIDE.md#configuration-file) +* [Shell completions](FAQ.md#complete) +* [Building](#building) +* [Translations](#translations) + + +### Screenshot of search results + +[![A screenshot of a sample search with ripgrep](https://burntsushi.net/stuff/ripgrep1.png)](https://burntsushi.net/stuff/ripgrep1.png) + + +### Quick examples comparing tools + +This example searches the entire +[Linux kernel source tree](https://github.com/BurntSushi/linux) +(after running `make defconfig && make -j8`) for `[A-Z]+_SUSPEND`, where +all matches must be words. Timings were collected on a system with an Intel +i9-12900K 5.2 GHz. + +Please remember that a single benchmark is never enough! See my +[blog post on ripgrep](https://blog.burntsushi.net/ripgrep/) +for a very detailed comparison with more benchmarks and analysis. + +| Tool | Command | Line count | Time | +| ---- | ------- | ---------- | ---- | +| ripgrep (Unicode) | `rg -n -w '[A-Z]+_SUSPEND'` | 536 | **0.082s** (1.00x) | +| [hypergrep](https://github.com/p-ranav/hypergrep) | `hgrep -n -w '[A-Z]+_SUSPEND'` | 536 | 0.167s (2.04x) | +| [git grep](https://www.kernel.org/pub/software/scm/git/docs/git-grep.html) | `git grep -P -n -w '[A-Z]+_SUSPEND'` | 536 | 0.273s (3.34x) | +| [The Silver Searcher](https://github.com/ggreer/the_silver_searcher) | `ag -w '[A-Z]+_SUSPEND'` | 534 | 0.443s (5.43x) | +| [ugrep](https://github.com/Genivia/ugrep) | `ugrep -r --ignore-files --no-hidden -I -w '[A-Z]+_SUSPEND'` | 536 | 0.639s (7.82x) | +| [git grep](https://www.kernel.org/pub/software/scm/git/docs/git-grep.html) | `LC_ALL=C git grep -E -n -w '[A-Z]+_SUSPEND'` | 536 | 0.727s (8.91x) | +| [git grep (Unicode)](https://www.kernel.org/pub/software/scm/git/docs/git-grep.html) | `LC_ALL=en_US.UTF-8 git grep -E -n -w '[A-Z]+_SUSPEND'` | 536 | 2.670s (32.70x) | +| [ack](https://github.com/beyondgrep/ack3) | `ack -w '[A-Z]+_SUSPEND'` | 2677 | 2.935s (35.94x) | + +Here's another benchmark on the same corpus as above that disregards gitignore +files and searches with a whitelist instead. The corpus is the same as in the +previous benchmark, and the flags passed to each command ensure that they are +doing equivalent work: + +| Tool | Command | Line count | Time | +| ---- | ------- | ---------- | ---- | +| ripgrep | `rg -uuu -tc -n -w '[A-Z]+_SUSPEND'` | 447 | **0.063s** (1.00x) | +| [ugrep](https://github.com/Genivia/ugrep) | `ugrep -r -n --include='*.c' --include='*.h' -w '[A-Z]+_SUSPEND'` | 447 | 0.607s (9.62x) | +| [GNU grep](https://www.gnu.org/software/grep/) | `grep -E -r -n --include='*.c' --include='*.h' -w '[A-Z]+_SUSPEND'` | 447 | 0.674s (10.69x) | + +Now we'll move to searching on single large file. Here is a straight-up +comparison between ripgrep, ugrep and GNU grep on a file cached in memory +(~13GB, [`OpenSubtitles.raw.en.gz`](http://opus.nlpl.eu/download.php?f=OpenSubtitles/v2018/mono/OpenSubtitles.raw.en.gz), decompressed): + +| Tool | Command | Line count | Time | +| ---- | ------- | ---------- | ---- | +| ripgrep (Unicode) | `rg -w 'Sherlock [A-Z]\w+'` | 7882 | **1.042s** (1.00x) | +| [ugrep](https://github.com/Genivia/ugrep) | `ugrep -w 'Sherlock [A-Z]\w+'` | 7882 | 1.339s (1.28x) | +| [GNU grep (Unicode)](https://www.gnu.org/software/grep/) | `LC_ALL=en_US.UTF-8 egrep -w 'Sherlock [A-Z]\w+'` | 7882 | 6.577s (6.31x) | + +In the above benchmark, passing the `-n` flag (for showing line numbers) +increases the times to `1.664s` for ripgrep and `9.484s` for GNU grep. ugrep +times are unaffected by the presence or absence of `-n`. + +Beware of performance cliffs though: + +| Tool | Command | Line count | Time | +| ---- | ------- | ---------- | ---- | +| ripgrep (Unicode) | `rg -w '[A-Z]\w+ Sherlock [A-Z]\w+'` | 485 | **1.053s** (1.00x) | +| [GNU grep (Unicode)](https://www.gnu.org/software/grep/) | `LC_ALL=en_US.UTF-8 grep -E -w '[A-Z]\w+ Sherlock [A-Z]\w+'` | 485 | 6.234s (5.92x) | +| [ugrep](https://github.com/Genivia/ugrep) | `ugrep -w '[A-Z]\w+ Sherlock [A-Z]\w+'` | 485 | 28.973s (27.51x) | + +And performance can drop precipitously across the board when searching big +files for patterns without any opportunities for literal optimizations: + +| Tool | Command | Line count | Time | +| ---- | ------- | ---------- | ---- | +| ripgrep | `rg '[A-Za-z]{30}'` | 6749 | **15.569s** (1.00x) | +| [ugrep](https://github.com/Genivia/ugrep) | `ugrep -E '[A-Za-z]{30}'` | 6749 | 21.857s (1.40x) | +| [GNU grep](https://www.gnu.org/software/grep/) | `LC_ALL=C grep -E '[A-Za-z]{30}'` | 6749 | 32.409s (2.08x) | +| [GNU grep (Unicode)](https://www.gnu.org/software/grep/) | `LC_ALL=en_US.UTF-8 grep -E '[A-Za-z]{30}'` | 6795 | 8m30s (32.74x) | + +Finally, high match counts also tend to both tank performance and smooth +out the differences between tools (because performance is dominated by how +quickly one can handle a match and not the algorithm used to detect the match, +generally speaking): + +| Tool | Command | Line count | Time | +| ---- | ------- | ---------- | ---- | +| ripgrep | `rg the` | 83499915 | **6.948s** (1.00x) | +| [ugrep](https://github.com/Genivia/ugrep) | `ugrep the` | 83499915 | 11.721s (1.69x) | +| [GNU grep](https://www.gnu.org/software/grep/) | `LC_ALL=C grep the` | 83499915 | 15.217s (2.19x) | + +### Why should I use ripgrep? + +* It can replace many use cases served by other search tools + because it contains most of their features and is generally faster. (See + [the FAQ](FAQ.md#posix4ever) for more details on whether ripgrep can truly + replace grep.) +* Like other tools specialized to code search, ripgrep defaults to + [recursive search](GUIDE.md#recursive-search) and does [automatic + filtering](GUIDE.md#automatic-filtering). Namely, ripgrep won't search files + ignored by your `.gitignore`/`.ignore`/`.rgignore` files, it won't search + hidden files and it won't search binary files. Automatic filtering can be + disabled with `rg -uuu`. +* ripgrep can [search specific types of files](GUIDE.md#manual-filtering-file-types). + For example, `rg -tpy foo` limits your search to Python files and `rg -Tjs + foo` excludes JavaScript files from your search. ripgrep can be taught about + new file types with custom matching rules. +* ripgrep supports many features found in `grep`, such as showing the context + of search results, searching multiple patterns, highlighting matches with + color and full Unicode support. Unlike GNU grep, ripgrep stays fast while + supporting Unicode (which is always on). +* ripgrep has optional support for switching its regex engine to use PCRE2. + Among other things, this makes it possible to use look-around and + backreferences in your patterns, which are not supported in ripgrep's default + regex engine. PCRE2 support can be enabled with `-P/--pcre2` (use PCRE2 + always) or `--auto-hybrid-regex` (use PCRE2 only if needed). An alternative + syntax is provided via the `--engine (default|pcre2|auto)` option. +* ripgrep has [rudimentary support for replacements](GUIDE.md#replacements), + which permit rewriting output based on what was matched. +* ripgrep supports [searching files in text encodings](GUIDE.md#file-encoding) + other than UTF-8, such as UTF-16, latin-1, GBK, EUC-JP, Shift_JIS and more. + (Some support for automatically detecting UTF-16 is provided. Other text + encodings must be specifically specified with the `-E/--encoding` flag.) +* ripgrep supports searching files compressed in a common format (brotli, + bzip2, gzip, lz4, lzma, xz, or zstandard) with the `-z/--search-zip` flag. +* ripgrep supports + [arbitrary input preprocessing filters](GUIDE.md#preprocessor) + which could be PDF text extraction, less supported decompression, decrypting, + automatic encoding detection and so on. +* ripgrep can be configured via a + [configuration file](GUIDE.md#configuration-file). + +In other words, use ripgrep if you like speed, filtering by default, fewer +bugs and Unicode support. + + +### Why shouldn't I use ripgrep? + +Despite initially not wanting to add every feature under the sun to ripgrep, +over time, ripgrep has grown support for most features found in other file +searching tools. This includes searching for results spanning across multiple +lines, and opt-in support for PCRE2, which provides look-around and +backreference support. + +At this point, the primary reasons not to use ripgrep probably consist of one +or more of the following: + +* You need a portable and ubiquitous tool. While ripgrep works on Windows, + macOS and Linux, it is not ubiquitous and it does not conform to any + standard such as POSIX. The best tool for this job is good old grep. +* There still exists some other feature (or bug) not listed in this README that + you rely on that's in another tool that isn't in ripgrep. +* There is a performance edge case where ripgrep doesn't do well where another + tool does do well. (Please file a bug report!) +* ripgrep isn't possible to install on your machine or isn't available for your + platform. (Please file a bug report!) + + +### Is it really faster than everything else? + +Generally, yes. A large number of benchmarks with detailed analysis for each is +[available on my blog](https://blog.burntsushi.net/ripgrep/). + +Summarizing, ripgrep is fast because: + +* It is built on top of + [Rust's regex engine](https://github.com/rust-lang/regex). + Rust's regex engine uses finite automata, SIMD and aggressive literal + optimizations to make searching very fast. (PCRE2 support can be opted into + with the `-P/--pcre2` flag.) +* Rust's regex library maintains performance with full Unicode support by + building UTF-8 decoding directly into its deterministic finite automaton + engine. +* It supports searching with either memory maps or by searching incrementally + with an intermediate buffer. The former is better for single files and the + latter is better for large directories. ripgrep chooses the best searching + strategy for you automatically. +* Applies your ignore patterns in `.gitignore` files using a + [`RegexSet`](https://docs.rs/regex/1/regex/struct.RegexSet.html). + That means a single file path can be matched against multiple glob patterns + simultaneously. +* It uses a lock-free parallel recursive directory iterator, courtesy of + [`crossbeam`](https://docs.rs/crossbeam) and + [`ignore`](https://docs.rs/ignore). + + +### Feature comparison + +Andy Lester, author of [ack](https://beyondgrep.com/), has published an +excellent table comparing the features of ack, ag, git-grep, GNU grep and +ripgrep: https://beyondgrep.com/feature-comparison/ + +Note that ripgrep has grown a few significant new features recently that +are not yet present in Andy's table. This includes, but is not limited to, +configuration files, passthru, support for searching compressed files, +multiline search and opt-in fancy regex support via PCRE2. + + +### Playground + +If you'd like to try ripgrep before installing, there's an unofficial +[playground](https://codapi.org/ripgrep/) and an [interactive +tutorial](https://codapi.org/try/ripgrep/). + +If you have any questions about these, please open an issue in the [tutorial +repo](https://github.com/nalgeon/tryxinyminutes). + + +### Installation + +The binary name for ripgrep is `rg`. + +**[Archives of precompiled binaries for ripgrep are available for Windows, +macOS and Linux.](https://github.com/BurntSushi/ripgrep/releases)** Linux and +Windows binaries are static executables. Users of platforms not explicitly +mentioned below are advised to download one of these archives. + +If you're a **macOS Homebrew** or a **Linuxbrew** user, then you can install +ripgrep from homebrew-core: + +``` +$ brew install ripgrep +``` + +If you're a **MacPorts** user, then you can install ripgrep from the +[official ports](https://www.macports.org/ports.php?by=name&substr=ripgrep): + +``` +$ sudo port install ripgrep +``` + +If you're a **Windows Chocolatey** user, then you can install ripgrep from the +[official repo](https://chocolatey.org/packages/ripgrep): + +``` +$ choco install ripgrep +``` + +If you're a **Windows Scoop** user, then you can install ripgrep from the +[official bucket](https://github.com/ScoopInstaller/Main/blob/master/bucket/ripgrep.json): + +``` +$ scoop install ripgrep +``` + +If you're a **Windows Winget** user, then you can install ripgrep from the +[winget-pkgs](https://github.com/microsoft/winget-pkgs/tree/master/manifests/b/BurntSushi/ripgrep) +repository: + +``` +$ winget install BurntSushi.ripgrep.MSVC +``` + +If you're an **Arch Linux** user, then you can install ripgrep from the official repos: + +``` +$ sudo pacman -S ripgrep +``` + +If you're a **Gentoo** user, you can install ripgrep from the +[official repo](https://packages.gentoo.org/packages/sys-apps/ripgrep): + +``` +$ sudo emerge sys-apps/ripgrep +``` + +If you're a **Fedora** user, you can install ripgrep from official +repositories. + +``` +$ sudo dnf install ripgrep +``` + +If you're an **openSUSE** user, ripgrep is included in **openSUSE Tumbleweed** +and **openSUSE Leap** since 15.1. + +``` +$ sudo zypper install ripgrep +``` + +If you're a **CentOS Stream 10** user, you can install ripgrep from the +[EPEL](https://docs.fedoraproject.org/en-US/epel/getting-started/) repository: + +``` +$ sudo dnf config-manager --set-enabled crb +$ sudo dnf install https://dl.fedoraproject.org/pub/epel/epel-release-latest-10.noarch.rpm +$ sudo dnf install ripgrep +``` + +If you're a **Red Hat 10** user, you can install ripgrep from the +[EPEL](https://docs.fedoraproject.org/en-US/epel/getting-started/) repository: + +``` +$ sudo subscription-manager repos --enable codeready-builder-for-rhel-10-$(arch)-rpms +$ sudo dnf install https://dl.fedoraproject.org/pub/epel/epel-release-latest-10.noarch.rpm +$ sudo dnf install ripgrep +``` + +If you're a **Rocky Linux 10** user, you can install ripgrep from the +[EPEL](https://docs.fedoraproject.org/en-US/epel/getting-started/) repository: + +``` +$ sudo dnf install https://dl.fedoraproject.org/pub/epel/epel-release-latest-10.noarch.rpm +$ sudo dnf install ripgrep +``` + +If you're a **Nix** user, you can install ripgrep from +[nixpkgs](https://github.com/NixOS/nixpkgs/blob/master/pkgs/by-name/ri/ripgrep/package.nix): + +``` +$ nix-env --install ripgrep +``` + +If you're a **Flox** user, you can install ripgrep as follows: + +``` +$ flox install ripgrep +``` + +If you're a **Guix** user, you can install ripgrep from the official +package collection: + +``` +$ guix install ripgrep +``` + +If you're a **Debian** user (or a user of a Debian derivative like **Ubuntu**), +then ripgrep can be installed using a binary `.deb` file provided in each +[ripgrep release](https://github.com/BurntSushi/ripgrep/releases). + +``` +$ curl -LO https://github.com/BurntSushi/ripgrep/releases/download/14.1.1/ripgrep_14.1.1-1_amd64.deb +$ sudo dpkg -i ripgrep_14.1.1-1_amd64.deb +``` + +If you run Debian stable, ripgrep is [officially maintained by +Debian](https://tracker.debian.org/pkg/rust-ripgrep), although its version may +be older than the `deb` package available in the previous step. + +``` +$ sudo apt-get install ripgrep +``` + +If you're an **Ubuntu Cosmic (18.10)** (or newer) user, ripgrep is +[available](https://launchpad.net/ubuntu/+source/rust-ripgrep) using the same +packaging as Debian: + +``` +$ sudo apt-get install ripgrep +``` + +(N.B. Various snaps for ripgrep on Ubuntu are also available, but none of them +seem to work right and generate a number of very strange bug reports that I +don't know how to fix and don't have the time to fix. Therefore, it is no +longer a recommended installation option.) + +If you're an **ALT** user, you can install ripgrep from the +[official repo](https://packages.altlinux.org/en/search?name=ripgrep): + +``` +$ sudo apt-get install ripgrep +``` + +If you're a **FreeBSD** user, then you can install ripgrep from the +[official ports](https://www.freshports.org/textproc/ripgrep/): + +``` +$ sudo pkg install ripgrep +``` + +If you're an **OpenBSD** user, then you can install ripgrep from the +[official ports](https://openports.se/textproc/ripgrep): + +``` +$ doas pkg_add ripgrep +``` + +If you're a **NetBSD** user, then you can install ripgrep from +[pkgsrc](https://pkgsrc.se/textproc/ripgrep): + +``` +$ sudo pkgin install ripgrep +``` + +If you're a **Haiku x86_64** user, then you can install ripgrep from the +[official ports](https://github.com/haikuports/haikuports/tree/master/sys-apps/ripgrep): + +``` +$ sudo pkgman install ripgrep +``` + +If you're a **Haiku x86_gcc2** user, then you can install ripgrep from the +same port as Haiku x86_64 using the x86 secondary architecture build: + +``` +$ sudo pkgman install ripgrep_x86 +``` + +If you're a **Void Linux** user, then you can install ripgrep from the +[official repository](https://voidlinux.org/packages/?arch=x86_64&q=ripgrep): + +``` +$ sudo xbps-install -Syv ripgrep +``` + +If you're a **Rust programmer**, ripgrep can be installed with `cargo`. + +* Note that the minimum supported version of Rust for ripgrep is **1.85.0**, + although ripgrep may work with older versions. +* Note that the binary may be bigger than expected because it contains debug + symbols. This is intentional. To remove debug symbols and therefore reduce + the file size, run `strip` on the binary. + +``` +$ cargo install ripgrep +``` + +Alternatively, one can use [`cargo +binstall`](https://github.com/cargo-bins/cargo-binstall) to install a ripgrep +binary directly from GitHub: + +``` +$ cargo binstall ripgrep +``` + + +### Building + +ripgrep is written in Rust, so you'll need to grab a +[Rust installation](https://www.rust-lang.org/) in order to compile it. +ripgrep compiles with Rust 1.85.0 (stable) or newer. In general, ripgrep tracks +the latest stable release of the Rust compiler. + +To build ripgrep: + +``` +$ git clone https://github.com/BurntSushi/ripgrep +$ cd ripgrep +$ cargo build --release +$ ./target/release/rg --version +0.1.3 +``` + +**NOTE:** In the past, ripgrep supported a `simd-accel` Cargo feature when +using a Rust nightly compiler. This only benefited UTF-16 transcoding. +Since it required unstable features, this build mode was prone to breakage. +Because of that, support for it has been removed. If you want SIMD +optimizations for UTF-16 transcoding, then you'll have to petition the +[`encoding_rs`](https://github.com/hsivonen/encoding_rs) project to use stable +APIs. + +Finally, optional PCRE2 support can be built with ripgrep by enabling the +`pcre2` feature: + +``` +$ cargo build --release --features 'pcre2' +``` + +Enabling the PCRE2 feature works with a stable Rust compiler and will +attempt to automatically find and link with your system's PCRE2 library via +`pkg-config`. If one doesn't exist, then ripgrep will build PCRE2 from source +using your system's C compiler and then statically link it into the final +executable. Static linking can be forced even when there is an available PCRE2 +system library by either building ripgrep with the MUSL target or by setting +`PCRE2_SYS_STATIC=1`. + +ripgrep can be built with the MUSL target on Linux by first installing the MUSL +library on your system (consult your friendly neighborhood package manager). +Then you just need to add MUSL support to your Rust toolchain and rebuild +ripgrep, which yields a fully static executable: + +``` +$ rustup target add x86_64-unknown-linux-musl +$ cargo build --release --target x86_64-unknown-linux-musl +``` + +Applying the `--features` flag from above works as expected. If you want to +build a static executable with MUSL and with PCRE2, then you will need to have +`musl-gcc` installed, which might be in a separate package from the actual +MUSL library, depending on your Linux distribution. + + +### Running tests + +ripgrep is relatively well-tested, including both unit tests and integration +tests. To run the full test suite, use: + +``` +$ cargo test --all +``` + +from the repository root. + + +### Related tools + +* [delta](https://github.com/dandavison/delta) is a syntax highlighting +pager that supports the `rg --json` output format. So all you need to do to +make it work is `rg --json pattern | delta`. See [delta's manual section on +grep](https://dandavison.github.io/delta/grep.html) for more details. + + +### Vulnerability reporting + +For reporting a security vulnerability, please +[contact Andrew Gallant](https://blog.burntsushi.net/about/). +The contact page has my email address and PGP public key if you wish to send an +encrypted message. + + +### Translations + +The following is a list of known translations of ripgrep's documentation. These +are unofficially maintained and may not be up to date. + +* [Chinese](https://github.com/chinanf-boy/ripgrep-zh#%E6%9B%B4%E6%96%B0-) +* [Spanish](https://github.com/UltiRequiem/traducciones/tree/master/ripgrep) diff --git a/README.md b/README.md index 73868ac..ed0366d 100644 --- a/README.md +++ b/README.md @@ -1,541 +1,42 @@ -ripgrep (rg) ------------- -ripgrep is a line-oriented search tool that recursively searches the current -directory for a regex pattern. By default, ripgrep will respect gitignore rules -and automatically skip hidden files/directories and binary files. (To disable -all automatic filtering by default, use `rg -uuu`.) ripgrep has first class -support on Windows, macOS and Linux, with binary downloads available for [every -release](https://github.com/BurntSushi/ripgrep/releases). ripgrep is similar to -other popular search tools like The Silver Searcher, ack and grep. +# rgs -[![Build status](https://github.com/BurntSushi/ripgrep/workflows/ci/badge.svg)](https://github.com/BurntSushi/ripgrep/actions) -[![Crates.io](https://img.shields.io/crates/v/ripgrep.svg)](https://crates.io/crates/ripgrep) -[![Packaging status](https://repology.org/badge/tiny-repos/ripgrep.svg)](https://repology.org/project/ripgrep/badges) +This repository is a fork of ripgrep with additional features. The original +ripgrep documentation is in README-ripgrep.md: -Dual-licensed under MIT or the [UNLICENSE](https://unlicense.org). +- README-ripgrep.md +## Additional features in this fork -### CHANGELOG +### Multiline windowing -Please see the [CHANGELOG](CHANGELOG.md) for a release history. +- `--multiline-window=N` (short: `-W N`) limits multiline matches to a sliding + window of N lines while still using multiline matching semantics. +- `--multiline-window` implicitly enables `--multiline` and cannot be used with + `--no-multiline`. -### Documentation quick links +### Per-file match indexing -* [Installation](#installation) -* [User Guide](GUIDE.md) -* [Frequently Asked Questions](FAQ.md) -* [Regex syntax](https://docs.rs/regex/1/regex/#syntax) -* [Configuration files](GUIDE.md#configuration-file) -* [Shell completions](FAQ.md#complete) -* [Building](#building) -* [Translations](#translations) +- `--in-file-index` / `--no-in-file-index` control indexing of matches within a + file to disambiguate overlapping multiline results. +- When enabled, output is formatted as `filename[index]:line:`. +- When searching a single file, the output is formatted as `[index]:line:` (no + filename). +### Squashed output -### Screenshot of search results +- `--squash` collapses contiguous Unicode whitespace (including newlines) into a + single ASCII space in output. +- `--squash-nl-only` collapses newlines into spaces while preserving other + whitespace. +- When multiple lines are squashed into one, line numbers are printed as + `start-end:`. -[![A screenshot of a sample search with ripgrep](https://burntsushi.net/stuff/ripgrep1.png)](https://burntsushi.net/stuff/ripgrep1.png) +### Binary name +- The target binary name is `rgs` (not `rg`). -### Quick examples comparing tools +## Acknowledgements -This example searches the entire -[Linux kernel source tree](https://github.com/BurntSushi/linux) -(after running `make defconfig && make -j8`) for `[A-Z]+_SUSPEND`, where -all matches must be words. Timings were collected on a system with an Intel -i9-12900K 5.2 GHz. - -Please remember that a single benchmark is never enough! See my -[blog post on ripgrep](https://blog.burntsushi.net/ripgrep/) -for a very detailed comparison with more benchmarks and analysis. - -| Tool | Command | Line count | Time | -| ---- | ------- | ---------- | ---- | -| ripgrep (Unicode) | `rg -n -w '[A-Z]+_SUSPEND'` | 536 | **0.082s** (1.00x) | -| [hypergrep](https://github.com/p-ranav/hypergrep) | `hgrep -n -w '[A-Z]+_SUSPEND'` | 536 | 0.167s (2.04x) | -| [git grep](https://www.kernel.org/pub/software/scm/git/docs/git-grep.html) | `git grep -P -n -w '[A-Z]+_SUSPEND'` | 536 | 0.273s (3.34x) | -| [The Silver Searcher](https://github.com/ggreer/the_silver_searcher) | `ag -w '[A-Z]+_SUSPEND'` | 534 | 0.443s (5.43x) | -| [ugrep](https://github.com/Genivia/ugrep) | `ugrep -r --ignore-files --no-hidden -I -w '[A-Z]+_SUSPEND'` | 536 | 0.639s (7.82x) | -| [git grep](https://www.kernel.org/pub/software/scm/git/docs/git-grep.html) | `LC_ALL=C git grep -E -n -w '[A-Z]+_SUSPEND'` | 536 | 0.727s (8.91x) | -| [git grep (Unicode)](https://www.kernel.org/pub/software/scm/git/docs/git-grep.html) | `LC_ALL=en_US.UTF-8 git grep -E -n -w '[A-Z]+_SUSPEND'` | 536 | 2.670s (32.70x) | -| [ack](https://github.com/beyondgrep/ack3) | `ack -w '[A-Z]+_SUSPEND'` | 2677 | 2.935s (35.94x) | - -Here's another benchmark on the same corpus as above that disregards gitignore -files and searches with a whitelist instead. The corpus is the same as in the -previous benchmark, and the flags passed to each command ensure that they are -doing equivalent work: - -| Tool | Command | Line count | Time | -| ---- | ------- | ---------- | ---- | -| ripgrep | `rg -uuu -tc -n -w '[A-Z]+_SUSPEND'` | 447 | **0.063s** (1.00x) | -| [ugrep](https://github.com/Genivia/ugrep) | `ugrep -r -n --include='*.c' --include='*.h' -w '[A-Z]+_SUSPEND'` | 447 | 0.607s (9.62x) | -| [GNU grep](https://www.gnu.org/software/grep/) | `grep -E -r -n --include='*.c' --include='*.h' -w '[A-Z]+_SUSPEND'` | 447 | 0.674s (10.69x) | - -Now we'll move to searching on single large file. Here is a straight-up -comparison between ripgrep, ugrep and GNU grep on a file cached in memory -(~13GB, [`OpenSubtitles.raw.en.gz`](http://opus.nlpl.eu/download.php?f=OpenSubtitles/v2018/mono/OpenSubtitles.raw.en.gz), decompressed): - -| Tool | Command | Line count | Time | -| ---- | ------- | ---------- | ---- | -| ripgrep (Unicode) | `rg -w 'Sherlock [A-Z]\w+'` | 7882 | **1.042s** (1.00x) | -| [ugrep](https://github.com/Genivia/ugrep) | `ugrep -w 'Sherlock [A-Z]\w+'` | 7882 | 1.339s (1.28x) | -| [GNU grep (Unicode)](https://www.gnu.org/software/grep/) | `LC_ALL=en_US.UTF-8 egrep -w 'Sherlock [A-Z]\w+'` | 7882 | 6.577s (6.31x) | - -In the above benchmark, passing the `-n` flag (for showing line numbers) -increases the times to `1.664s` for ripgrep and `9.484s` for GNU grep. ugrep -times are unaffected by the presence or absence of `-n`. - -Beware of performance cliffs though: - -| Tool | Command | Line count | Time | -| ---- | ------- | ---------- | ---- | -| ripgrep (Unicode) | `rg -w '[A-Z]\w+ Sherlock [A-Z]\w+'` | 485 | **1.053s** (1.00x) | -| [GNU grep (Unicode)](https://www.gnu.org/software/grep/) | `LC_ALL=en_US.UTF-8 grep -E -w '[A-Z]\w+ Sherlock [A-Z]\w+'` | 485 | 6.234s (5.92x) | -| [ugrep](https://github.com/Genivia/ugrep) | `ugrep -w '[A-Z]\w+ Sherlock [A-Z]\w+'` | 485 | 28.973s (27.51x) | - -And performance can drop precipitously across the board when searching big -files for patterns without any opportunities for literal optimizations: - -| Tool | Command | Line count | Time | -| ---- | ------- | ---------- | ---- | -| ripgrep | `rg '[A-Za-z]{30}'` | 6749 | **15.569s** (1.00x) | -| [ugrep](https://github.com/Genivia/ugrep) | `ugrep -E '[A-Za-z]{30}'` | 6749 | 21.857s (1.40x) | -| [GNU grep](https://www.gnu.org/software/grep/) | `LC_ALL=C grep -E '[A-Za-z]{30}'` | 6749 | 32.409s (2.08x) | -| [GNU grep (Unicode)](https://www.gnu.org/software/grep/) | `LC_ALL=en_US.UTF-8 grep -E '[A-Za-z]{30}'` | 6795 | 8m30s (32.74x) | - -Finally, high match counts also tend to both tank performance and smooth -out the differences between tools (because performance is dominated by how -quickly one can handle a match and not the algorithm used to detect the match, -generally speaking): - -| Tool | Command | Line count | Time | -| ---- | ------- | ---------- | ---- | -| ripgrep | `rg the` | 83499915 | **6.948s** (1.00x) | -| [ugrep](https://github.com/Genivia/ugrep) | `ugrep the` | 83499915 | 11.721s (1.69x) | -| [GNU grep](https://www.gnu.org/software/grep/) | `LC_ALL=C grep the` | 83499915 | 15.217s (2.19x) | - -### Why should I use ripgrep? - -* It can replace many use cases served by other search tools - because it contains most of their features and is generally faster. (See - [the FAQ](FAQ.md#posix4ever) for more details on whether ripgrep can truly - replace grep.) -* Like other tools specialized to code search, ripgrep defaults to - [recursive search](GUIDE.md#recursive-search) and does [automatic - filtering](GUIDE.md#automatic-filtering). Namely, ripgrep won't search files - ignored by your `.gitignore`/`.ignore`/`.rgignore` files, it won't search - hidden files and it won't search binary files. Automatic filtering can be - disabled with `rg -uuu`. -* ripgrep can [search specific types of files](GUIDE.md#manual-filtering-file-types). - For example, `rg -tpy foo` limits your search to Python files and `rg -Tjs - foo` excludes JavaScript files from your search. ripgrep can be taught about - new file types with custom matching rules. -* ripgrep supports many features found in `grep`, such as showing the context - of search results, searching multiple patterns, highlighting matches with - color and full Unicode support. Unlike GNU grep, ripgrep stays fast while - supporting Unicode (which is always on). -* ripgrep has optional support for switching its regex engine to use PCRE2. - Among other things, this makes it possible to use look-around and - backreferences in your patterns, which are not supported in ripgrep's default - regex engine. PCRE2 support can be enabled with `-P/--pcre2` (use PCRE2 - always) or `--auto-hybrid-regex` (use PCRE2 only if needed). An alternative - syntax is provided via the `--engine (default|pcre2|auto)` option. -* ripgrep has [rudimentary support for replacements](GUIDE.md#replacements), - which permit rewriting output based on what was matched. -* ripgrep supports [searching files in text encodings](GUIDE.md#file-encoding) - other than UTF-8, such as UTF-16, latin-1, GBK, EUC-JP, Shift_JIS and more. - (Some support for automatically detecting UTF-16 is provided. Other text - encodings must be specifically specified with the `-E/--encoding` flag.) -* ripgrep supports searching files compressed in a common format (brotli, - bzip2, gzip, lz4, lzma, xz, or zstandard) with the `-z/--search-zip` flag. -* ripgrep supports - [arbitrary input preprocessing filters](GUIDE.md#preprocessor) - which could be PDF text extraction, less supported decompression, decrypting, - automatic encoding detection and so on. -* ripgrep can be configured via a - [configuration file](GUIDE.md#configuration-file). - -In other words, use ripgrep if you like speed, filtering by default, fewer -bugs and Unicode support. - - -### Why shouldn't I use ripgrep? - -Despite initially not wanting to add every feature under the sun to ripgrep, -over time, ripgrep has grown support for most features found in other file -searching tools. This includes searching for results spanning across multiple -lines, and opt-in support for PCRE2, which provides look-around and -backreference support. - -At this point, the primary reasons not to use ripgrep probably consist of one -or more of the following: - -* You need a portable and ubiquitous tool. While ripgrep works on Windows, - macOS and Linux, it is not ubiquitous and it does not conform to any - standard such as POSIX. The best tool for this job is good old grep. -* There still exists some other feature (or bug) not listed in this README that - you rely on that's in another tool that isn't in ripgrep. -* There is a performance edge case where ripgrep doesn't do well where another - tool does do well. (Please file a bug report!) -* ripgrep isn't possible to install on your machine or isn't available for your - platform. (Please file a bug report!) - - -### Is it really faster than everything else? - -Generally, yes. A large number of benchmarks with detailed analysis for each is -[available on my blog](https://blog.burntsushi.net/ripgrep/). - -Summarizing, ripgrep is fast because: - -* It is built on top of - [Rust's regex engine](https://github.com/rust-lang/regex). - Rust's regex engine uses finite automata, SIMD and aggressive literal - optimizations to make searching very fast. (PCRE2 support can be opted into - with the `-P/--pcre2` flag.) -* Rust's regex library maintains performance with full Unicode support by - building UTF-8 decoding directly into its deterministic finite automaton - engine. -* It supports searching with either memory maps or by searching incrementally - with an intermediate buffer. The former is better for single files and the - latter is better for large directories. ripgrep chooses the best searching - strategy for you automatically. -* Applies your ignore patterns in `.gitignore` files using a - [`RegexSet`](https://docs.rs/regex/1/regex/struct.RegexSet.html). - That means a single file path can be matched against multiple glob patterns - simultaneously. -* It uses a lock-free parallel recursive directory iterator, courtesy of - [`crossbeam`](https://docs.rs/crossbeam) and - [`ignore`](https://docs.rs/ignore). - - -### Feature comparison - -Andy Lester, author of [ack](https://beyondgrep.com/), has published an -excellent table comparing the features of ack, ag, git-grep, GNU grep and -ripgrep: https://beyondgrep.com/feature-comparison/ - -Note that ripgrep has grown a few significant new features recently that -are not yet present in Andy's table. This includes, but is not limited to, -configuration files, passthru, support for searching compressed files, -multiline search and opt-in fancy regex support via PCRE2. - - -### Playground - -If you'd like to try ripgrep before installing, there's an unofficial -[playground](https://codapi.org/ripgrep/) and an [interactive -tutorial](https://codapi.org/try/ripgrep/). - -If you have any questions about these, please open an issue in the [tutorial -repo](https://github.com/nalgeon/tryxinyminutes). - - -### Installation - -The binary name for ripgrep is `rg`. - -**[Archives of precompiled binaries for ripgrep are available for Windows, -macOS and Linux.](https://github.com/BurntSushi/ripgrep/releases)** Linux and -Windows binaries are static executables. Users of platforms not explicitly -mentioned below are advised to download one of these archives. - -If you're a **macOS Homebrew** or a **Linuxbrew** user, then you can install -ripgrep from homebrew-core: - -``` -$ brew install ripgrep -``` - -If you're a **MacPorts** user, then you can install ripgrep from the -[official ports](https://www.macports.org/ports.php?by=name&substr=ripgrep): - -``` -$ sudo port install ripgrep -``` - -If you're a **Windows Chocolatey** user, then you can install ripgrep from the -[official repo](https://chocolatey.org/packages/ripgrep): - -``` -$ choco install ripgrep -``` - -If you're a **Windows Scoop** user, then you can install ripgrep from the -[official bucket](https://github.com/ScoopInstaller/Main/blob/master/bucket/ripgrep.json): - -``` -$ scoop install ripgrep -``` - -If you're a **Windows Winget** user, then you can install ripgrep from the -[winget-pkgs](https://github.com/microsoft/winget-pkgs/tree/master/manifests/b/BurntSushi/ripgrep) -repository: - -``` -$ winget install BurntSushi.ripgrep.MSVC -``` - -If you're an **Arch Linux** user, then you can install ripgrep from the official repos: - -``` -$ sudo pacman -S ripgrep -``` - -If you're a **Gentoo** user, you can install ripgrep from the -[official repo](https://packages.gentoo.org/packages/sys-apps/ripgrep): - -``` -$ sudo emerge sys-apps/ripgrep -``` - -If you're a **Fedora** user, you can install ripgrep from official -repositories. - -``` -$ sudo dnf install ripgrep -``` - -If you're an **openSUSE** user, ripgrep is included in **openSUSE Tumbleweed** -and **openSUSE Leap** since 15.1. - -``` -$ sudo zypper install ripgrep -``` - -If you're a **CentOS Stream 10** user, you can install ripgrep from the -[EPEL](https://docs.fedoraproject.org/en-US/epel/getting-started/) repository: - -``` -$ sudo dnf config-manager --set-enabled crb -$ sudo dnf install https://dl.fedoraproject.org/pub/epel/epel-release-latest-10.noarch.rpm -$ sudo dnf install ripgrep -``` - -If you're a **Red Hat 10** user, you can install ripgrep from the -[EPEL](https://docs.fedoraproject.org/en-US/epel/getting-started/) repository: - -``` -$ sudo subscription-manager repos --enable codeready-builder-for-rhel-10-$(arch)-rpms -$ sudo dnf install https://dl.fedoraproject.org/pub/epel/epel-release-latest-10.noarch.rpm -$ sudo dnf install ripgrep -``` - -If you're a **Rocky Linux 10** user, you can install ripgrep from the -[EPEL](https://docs.fedoraproject.org/en-US/epel/getting-started/) repository: - -``` -$ sudo dnf install https://dl.fedoraproject.org/pub/epel/epel-release-latest-10.noarch.rpm -$ sudo dnf install ripgrep -``` - -If you're a **Nix** user, you can install ripgrep from -[nixpkgs](https://github.com/NixOS/nixpkgs/blob/master/pkgs/by-name/ri/ripgrep/package.nix): - -``` -$ nix-env --install ripgrep -``` - -If you're a **Flox** user, you can install ripgrep as follows: - -``` -$ flox install ripgrep -``` - -If you're a **Guix** user, you can install ripgrep from the official -package collection: - -``` -$ guix install ripgrep -``` - -If you're a **Debian** user (or a user of a Debian derivative like **Ubuntu**), -then ripgrep can be installed using a binary `.deb` file provided in each -[ripgrep release](https://github.com/BurntSushi/ripgrep/releases). - -``` -$ curl -LO https://github.com/BurntSushi/ripgrep/releases/download/14.1.1/ripgrep_14.1.1-1_amd64.deb -$ sudo dpkg -i ripgrep_14.1.1-1_amd64.deb -``` - -If you run Debian stable, ripgrep is [officially maintained by -Debian](https://tracker.debian.org/pkg/rust-ripgrep), although its version may -be older than the `deb` package available in the previous step. - -``` -$ sudo apt-get install ripgrep -``` - -If you're an **Ubuntu Cosmic (18.10)** (or newer) user, ripgrep is -[available](https://launchpad.net/ubuntu/+source/rust-ripgrep) using the same -packaging as Debian: - -``` -$ sudo apt-get install ripgrep -``` - -(N.B. Various snaps for ripgrep on Ubuntu are also available, but none of them -seem to work right and generate a number of very strange bug reports that I -don't know how to fix and don't have the time to fix. Therefore, it is no -longer a recommended installation option.) - -If you're an **ALT** user, you can install ripgrep from the -[official repo](https://packages.altlinux.org/en/search?name=ripgrep): - -``` -$ sudo apt-get install ripgrep -``` - -If you're a **FreeBSD** user, then you can install ripgrep from the -[official ports](https://www.freshports.org/textproc/ripgrep/): - -``` -$ sudo pkg install ripgrep -``` - -If you're an **OpenBSD** user, then you can install ripgrep from the -[official ports](https://openports.se/textproc/ripgrep): - -``` -$ doas pkg_add ripgrep -``` - -If you're a **NetBSD** user, then you can install ripgrep from -[pkgsrc](https://pkgsrc.se/textproc/ripgrep): - -``` -$ sudo pkgin install ripgrep -``` - -If you're a **Haiku x86_64** user, then you can install ripgrep from the -[official ports](https://github.com/haikuports/haikuports/tree/master/sys-apps/ripgrep): - -``` -$ sudo pkgman install ripgrep -``` - -If you're a **Haiku x86_gcc2** user, then you can install ripgrep from the -same port as Haiku x86_64 using the x86 secondary architecture build: - -``` -$ sudo pkgman install ripgrep_x86 -``` - -If you're a **Void Linux** user, then you can install ripgrep from the -[official repository](https://voidlinux.org/packages/?arch=x86_64&q=ripgrep): - -``` -$ sudo xbps-install -Syv ripgrep -``` - -If you're a **Rust programmer**, ripgrep can be installed with `cargo`. - -* Note that the minimum supported version of Rust for ripgrep is **1.85.0**, - although ripgrep may work with older versions. -* Note that the binary may be bigger than expected because it contains debug - symbols. This is intentional. To remove debug symbols and therefore reduce - the file size, run `strip` on the binary. - -``` -$ cargo install ripgrep -``` - -Alternatively, one can use [`cargo -binstall`](https://github.com/cargo-bins/cargo-binstall) to install a ripgrep -binary directly from GitHub: - -``` -$ cargo binstall ripgrep -``` - - -### Building - -ripgrep is written in Rust, so you'll need to grab a -[Rust installation](https://www.rust-lang.org/) in order to compile it. -ripgrep compiles with Rust 1.85.0 (stable) or newer. In general, ripgrep tracks -the latest stable release of the Rust compiler. - -To build ripgrep: - -``` -$ git clone https://github.com/BurntSushi/ripgrep -$ cd ripgrep -$ cargo build --release -$ ./target/release/rg --version -0.1.3 -``` - -**NOTE:** In the past, ripgrep supported a `simd-accel` Cargo feature when -using a Rust nightly compiler. This only benefited UTF-16 transcoding. -Since it required unstable features, this build mode was prone to breakage. -Because of that, support for it has been removed. If you want SIMD -optimizations for UTF-16 transcoding, then you'll have to petition the -[`encoding_rs`](https://github.com/hsivonen/encoding_rs) project to use stable -APIs. - -Finally, optional PCRE2 support can be built with ripgrep by enabling the -`pcre2` feature: - -``` -$ cargo build --release --features 'pcre2' -``` - -Enabling the PCRE2 feature works with a stable Rust compiler and will -attempt to automatically find and link with your system's PCRE2 library via -`pkg-config`. If one doesn't exist, then ripgrep will build PCRE2 from source -using your system's C compiler and then statically link it into the final -executable. Static linking can be forced even when there is an available PCRE2 -system library by either building ripgrep with the MUSL target or by setting -`PCRE2_SYS_STATIC=1`. - -ripgrep can be built with the MUSL target on Linux by first installing the MUSL -library on your system (consult your friendly neighborhood package manager). -Then you just need to add MUSL support to your Rust toolchain and rebuild -ripgrep, which yields a fully static executable: - -``` -$ rustup target add x86_64-unknown-linux-musl -$ cargo build --release --target x86_64-unknown-linux-musl -``` - -Applying the `--features` flag from above works as expected. If you want to -build a static executable with MUSL and with PCRE2, then you will need to have -`musl-gcc` installed, which might be in a separate package from the actual -MUSL library, depending on your Linux distribution. - - -### Running tests - -ripgrep is relatively well-tested, including both unit tests and integration -tests. To run the full test suite, use: - -``` -$ cargo test --all -``` - -from the repository root. - - -### Related tools - -* [delta](https://github.com/dandavison/delta) is a syntax highlighting -pager that supports the `rg --json` output format. So all you need to do to -make it work is `rg --json pattern | delta`. See [delta's manual section on -grep](https://dandavison.github.io/delta/grep.html) for more details. - - -### Vulnerability reporting - -For reporting a security vulnerability, please -[contact Andrew Gallant](https://blog.burntsushi.net/about/). -The contact page has my email address and PGP public key if you wish to send an -encrypted message. - - -### Translations - -The following is a list of known translations of ripgrep's documentation. These -are unofficially maintained and may not be up to date. - -* [Chinese](https://github.com/chinanf-boy/ripgrep-zh#%E6%9B%B4%E6%96%B0-) -* [Spanish](https://github.com/UltiRequiem/traducciones/tree/master/ripgrep) +This project is built on top of ripgrep by Andrew Gallant and contributors. +All credit for the original tool, documentation, and design belongs to the +ripgrep project. See README-ripgrep.md and the upstream licenses for details. diff --git a/build.rs b/build.rs index 3479b69..4df9832 100644 --- a/build.rs +++ b/build.rs @@ -24,11 +24,11 @@ fn set_windows_exe_options() { println!("cargo:rerun-if-changed={MANIFEST}"); // Embed the Windows application manifest file. - println!("cargo:rustc-link-arg-bin=rg=/MANIFEST:EMBED"); - println!("cargo:rustc-link-arg-bin=rg=/MANIFESTINPUT:{manifest}"); + println!("cargo:rustc-link-arg-bin=rgs=/MANIFEST:EMBED"); + println!("cargo:rustc-link-arg-bin=rgs=/MANIFESTINPUT:{manifest}"); // Turn linker warnings into errors. Helps debugging, otherwise the // warnings get squashed (I believe). - println!("cargo:rustc-link-arg-bin=rg=/WX"); + println!("cargo:rustc-link-arg-bin=rgs=/WX"); } /// Make the current git hash available to the build as the environment diff --git a/ci/sha256-releases b/ci/sha256-releases index 670c976..408a57a 100755 --- a/ci/sha256-releases +++ b/ci/sha256-releases @@ -11,7 +11,7 @@ version="$1" # Linux and Darwin builds. for arch in i686 x86_64; do for target in apple-darwin unknown-linux-musl; do - url="https://github.com/BurntSushi/ripgrep/releases/download/$version/ripgrep-$version-$arch-$target.tar.gz" + url="https://git.peisongxiao.com/peisongxiao/rgs/releases/download/$version/rgs-$version-$arch-$target.tar.gz" sha=$(curl -sfSL "$url" | sha256sum) echo "$version-$arch-$target $sha" done @@ -19,7 +19,7 @@ done # Source. for ext in zip tar.gz; do - url="https://github.com/BurntSushi/ripgrep/archive/$version.$ext" + url="https://git.peisongxiao.com/peisongxiao/rgs/archive/$version.$ext" sha=$(curl -sfSL "$url" | sha256sum) echo "source.$ext $sha" done diff --git a/ci/test-complete b/ci/test-complete index 3793aff..0468e3d 100755 --- a/ci/test-complete +++ b/ci/test-complete @@ -18,11 +18,11 @@ get_comp_args() { main() { local diff - local rg="${0:a:h}/../${TARGET_DIR:-target}/release/rg" - local _rg="${0:a:h}/../crates/core/flags/complete/rg.zsh" + local rg="${0:a:h}/../${TARGET_DIR:-target}/release/rgs" + local _rg="${0:a:h}/../crates/core/flags/complete/rgs.zsh" local -a help_args comp_args - [[ -e $rg ]] || rg=${rg/%\/release\/rg/\/debug\/rg} + [[ -e $rg ]] || rg=${rg/%\/release\/rgs/\/debug\/rgs} rg=${rg:a} _rg=${_rg:a} diff --git a/crates/core/flags/complete/rg.zsh b/crates/core/flags/complete/rgs.zsh similarity index 98% rename from crates/core/flags/complete/rg.zsh rename to crates/core/flags/complete/rgs.zsh index 2078c18..e8ef17f 100644 --- a/crates/core/flags/complete/rg.zsh +++ b/crates/core/flags/complete/rgs.zsh @@ -1,7 +1,7 @@ -#compdef rg +#compdef rgs ## -# zsh completion function for ripgrep +# zsh completion function for rgs # # Run ci/test-complete after building to ensure that the options supported by # this function stay in synch with the `rg` binary. @@ -212,7 +212,7 @@ _rg() { + '(multiline)' # Multiline options {-U,--multiline}'[permit matching across multiple lines]' - '--multiline-window=[limit multiline matches to NUM lines (with -U)]:number of lines' + {-W+,--multiline-window=}'[limit multiline matches to NUM lines (with -U enabled implicitly)]:number of lines' $no'(multiline-dotall)--no-multiline[restrict matches to at most one line each]' + '(multiline-dotall)' # Multiline DOTALL options @@ -282,6 +282,10 @@ _rg() { + '(threads)' # Thread-count options '(sort)'{-j+,--threads=}'[specify approximate number of threads to use]:number of threads' + + '(squash)' # Squash options + '--squash[squash contiguous whitespace into a single space]' + '--squash-nl-only[squash new lines into a single space]' + + '(trim)' # Trim options '--trim[trim any ASCII whitespace prefix from each line]' $no"--no-trim[don't trim ASCII whitespace prefix from each line]" diff --git a/crates/core/flags/complete/zsh.rs b/crates/core/flags/complete/zsh.rs index 0aa8b9c..1e6e0a2 100644 --- a/crates/core/flags/complete/zsh.rs +++ b/crates/core/flags/complete/zsh.rs @@ -26,7 +26,7 @@ pub(crate) fn generate() -> String { }) .collect::>() .join("\n"); - include_str!("rg.zsh") + include_str!("rgs.zsh") .replace("!ENCODINGS!", super::ENCODINGS.trim_end()) .replace("!HYPERLINK_ALIASES!", &hyperlink_alias_descriptions) } diff --git a/crates/core/flags/defs.rs b/crates/core/flags/defs.rs index 9a31ec2..b9dcde3 100644 --- a/crates/core/flags/defs.rs +++ b/crates/core/flags/defs.rs @@ -134,6 +134,8 @@ pub(super) const FLAGS: &[&dyn Flag] = &[ &Text, &Threads, &Trace, + &Squash, + &SquashNlOnly, &Trim, &Type, &TypeNot, @@ -4187,7 +4189,14 @@ This overrides the \flag{stop-on-nonmatch} flag. } fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { - args.multiline = v.unwrap_switch(); + let enabled = v.unwrap_switch(); + if !enabled && args.multiline_window.is_some() { + anyhow::bail!( + "--no-multiline cannot be used with --multiline-window \ + (which implicitly enables --multiline)" + ); + } + args.multiline = enabled; if args.multiline { args.stop_on_nonmatch = false; } @@ -4219,6 +4228,9 @@ impl Flag for MultilineWindow { fn is_switch(&self) -> bool { false } + fn name_short(&self) -> Option { + Some(b'W') + } fn name_long(&self) -> &'static str { "multiline-window" } @@ -4236,7 +4248,7 @@ impl Flag for MultilineWindow { Limit the maximum number of lines that a multiline match may span to \fINUM\fP (use \fB--multiline-window=\fP\fINUM\fP). .sp -This flag requires \flag{multiline}. Matches are found as if the file being +This flag implicitly enables \flag{multiline}. Matches are found as if the file being searched were limited to \fINUM\fP lines at a time, which can prevent unintended long matches while still enabling multi-line searching. .sp @@ -4250,6 +4262,7 @@ The value of \fINUM\fP must be at least 1. anyhow::bail!("--multiline-window must be at least 1"); } args.multiline_window = Some(lines); + args.multiline = true; Ok(()) } } @@ -4262,6 +4275,11 @@ fn test_multiline_window() { let args = parse_low_raw(["--multiline-window=2"]).unwrap(); assert_eq!(Some(2), args.multiline_window); + assert_eq!(true, args.multiline); + + let args = parse_low_raw(["-W", "3"]).unwrap(); + assert_eq!(Some(3), args.multiline_window); + assert_eq!(true, args.multiline); } /// --multiline-dotall @@ -6866,6 +6884,88 @@ fn test_trace() { assert_eq!(Some(LoggingMode::Trace), args.logging); } +/// --squash +#[derive(Debug)] +struct Squash; + +impl Flag for Squash { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "squash" + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + r"Squash contiguous whitespace in output to a single space." + } + fn doc_long(&self) -> &'static str { + r#" +Squash any contiguous Unicode whitespace (including new lines) into a single +ASCII space when printing matches. +"# + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + assert!(v.unwrap_switch(), "--squash can only be enabled"); + args.squash = grep::printer::SquashMode::Whitespace; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_squash() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(grep::printer::SquashMode::None, args.squash); + + let args = parse_low_raw(["--squash"]).unwrap(); + assert_eq!(grep::printer::SquashMode::Whitespace, args.squash); +} + +/// --squash-nl-only +#[derive(Debug)] +struct SquashNlOnly; + +impl Flag for SquashNlOnly { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "squash-nl-only" + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + r"Squash new lines into spaces in output." + } + fn doc_long(&self) -> &'static str { + r#" +Squash contiguous line terminators into a single ASCII space when printing +matches. Other whitespace is preserved. +"# + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + assert!(v.unwrap_switch(), "--squash-nl-only can only be enabled"); + args.squash = grep::printer::SquashMode::Newlines; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_squash_nl_only() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(grep::printer::SquashMode::None, args.squash); + + let args = parse_low_raw(["--squash-nl-only"]).unwrap(); + assert_eq!(grep::printer::SquashMode::Newlines, args.squash); +} + /// --trim #[derive(Debug)] struct Trim; diff --git a/crates/core/flags/hiargs.rs b/crates/core/flags/hiargs.rs index eb74819..33bbd50 100644 --- a/crates/core/flags/hiargs.rs +++ b/crates/core/flags/hiargs.rs @@ -9,7 +9,7 @@ use std::{ use { bstr::BString, - grep::printer::{ColorSpecs, SummaryKind}, + grep::printer::{ColorSpecs, SquashMode, SummaryKind}, }; use crate::{ @@ -100,6 +100,7 @@ pub(crate) struct HiArgs { sort: Option, stats: Option, stop_on_nonmatch: bool, + squash: SquashMode, threads: usize, trim: bool, types: ignore::types::Types, @@ -142,9 +143,6 @@ impl HiArgs { } let mut state = State::new()?; - if low.multiline_window.is_some() && !low.multiline { - anyhow::bail!("--multiline-window requires --multiline"); - } let patterns = Patterns::from_low_args(&mut state, &mut low)?; let paths = Paths::from_low_args(&mut state, &patterns, &mut low)?; @@ -320,6 +318,7 @@ impl HiArgs { sort: low.sort, stats, stop_on_nonmatch: low.stop_on_nonmatch, + squash: low.squash, threads, trim: low.trim, types, @@ -632,6 +631,7 @@ impl HiArgs { .per_match_one_line(true) .per_match(self.vimgrep) .replacement(self.replace.clone().map(|r| r.into())) + .squash(self.squash) .separator_context(self.context_separator.clone().into_bytes()) .separator_field_context( self.field_context_separator.clone().into_bytes(), diff --git a/crates/core/flags/lowargs.rs b/crates/core/flags/lowargs.rs index 596206b..ca4cf2d 100644 --- a/crates/core/flags/lowargs.rs +++ b/crates/core/flags/lowargs.rs @@ -9,7 +9,7 @@ use std::{ use { bstr::{BString, ByteVec}, - grep::printer::{HyperlinkFormat, UserColorSpec}, + grep::printer::{HyperlinkFormat, SquashMode, UserColorSpec}, }; /// A collection of "low level" arguments. @@ -103,6 +103,7 @@ pub(crate) struct LowArgs { pub(crate) sort: Option, pub(crate) stats: bool, pub(crate) stop_on_nonmatch: bool, + pub(crate) squash: SquashMode, pub(crate) threads: Option, pub(crate) trim: bool, pub(crate) type_changes: Vec, diff --git a/crates/printer/src/lib.rs b/crates/printer/src/lib.rs index 9675c64..a7b7034 100644 --- a/crates/printer/src/lib.rs +++ b/crates/printer/src/lib.rs @@ -67,7 +67,7 @@ pub use crate::{ HyperlinkFormat, HyperlinkFormatError, hyperlink_aliases, }, path::{PathPrinter, PathPrinterBuilder}, - standard::{Standard, StandardBuilder, StandardSink}, + standard::{SquashMode, Standard, StandardBuilder, StandardSink}, stats::Stats, summary::{Summary, SummaryBuilder, SummaryKind, SummarySink}, }; diff --git a/crates/printer/src/standard.rs b/crates/printer/src/standard.rs index 6ef590f..340c906 100644 --- a/crates/printer/src/standard.rs +++ b/crates/printer/src/standard.rs @@ -27,6 +27,23 @@ use crate::{ }, }; +/// Controls how whitespace is squashed in the standard printer output. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum SquashMode { + /// Do not squash whitespace in output. + None, + /// Squash any Unicode whitespace into a single ASCII space. + Whitespace, + /// Squash line terminators into a single ASCII space. + Newlines, +} + +impl Default for SquashMode { + fn default() -> SquashMode { + SquashMode::None + } +} + /// The configuration for the standard printer. /// /// This is manipulated by the StandardBuilder and then referenced by the @@ -40,6 +57,7 @@ struct Config { heading: bool, path: bool, in_file_index: bool, + squash: SquashMode, only_matching: bool, per_match: bool, per_match_one_line: bool, @@ -66,6 +84,7 @@ impl Default for Config { heading: false, path: true, in_file_index: false, + squash: SquashMode::None, only_matching: false, per_match: false, per_match_one_line: false, @@ -366,6 +385,12 @@ impl StandardBuilder { self } + /// Configure whitespace squashing in standard output. + pub fn squash(&mut self, mode: SquashMode) -> &mut StandardBuilder { + self.config.squash = mode; + self + } + /// Set the separator used between sets of search results. /// /// When this is set, then it will be printed on its own line immediately @@ -969,6 +994,7 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> { self.sunk.absolute_byte_offset(), self.sunk.line_number(), None, + None, self.in_file_index(), )?; self.write_line(self.sunk.bytes()) @@ -988,6 +1014,10 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> { // instead. debug_assert!(self.multi_line()); + if self.config().squash != SquashMode::None { + return self.sink_fast_multi_line_squash(); + } + let line_term = self.searcher.line_terminator().as_byte(); let mut absolute_byte_offset = self.sunk.absolute_byte_offset(); for (i, line) in self.sunk.lines(line_term).enumerate() { @@ -995,6 +1025,7 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> { absolute_byte_offset, self.sunk.line_number().map(|n| n + i as u64), None, + None, self.in_file_index(), )?; absolute_byte_offset += line.len() as u64; @@ -1004,6 +1035,20 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> { Ok(()) } + fn sink_fast_multi_line_squash(&self) -> io::Result<()> { + let bytes = self.sunk.bytes(); + let (line_number, line_number_end) = + self.line_range(self.sunk.line_number(), bytes); + self.write_prelude( + self.sunk.absolute_byte_offset(), + line_number, + line_number_end, + None, + self.in_file_index(), + )?; + self.write_line(bytes) + } + /// Print a matching line where the configuration of the printer requires /// finding each individual match (e.g., for coloring). fn sink_slow(&self) -> io::Result<()> { @@ -1015,6 +1060,7 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> { self.write_prelude( self.sunk.absolute_byte_offset() + m.start() as u64, self.sunk.line_number(), + None, Some(m.start() as u64 + 1), self.in_file_index(), )?; @@ -1027,6 +1073,7 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> { self.write_prelude( self.sunk.absolute_byte_offset() + m.start() as u64, self.sunk.line_number(), + None, Some(m.start() as u64 + 1), self.in_file_index(), )?; @@ -1036,6 +1083,7 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> { self.write_prelude( self.sunk.absolute_byte_offset(), self.sunk.line_number(), + None, Some(self.sunk.matches()[0].start() as u64 + 1), self.in_file_index(), )?; @@ -1048,6 +1096,14 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> { debug_assert!(!self.sunk.matches().is_empty()); debug_assert!(self.multi_line()); + if self.config().squash != SquashMode::None { + if self.config().only_matching { + return self.sink_slow_multi_line_only_matching_squash(); + } else if self.config().per_match { + return self.sink_slow_multi_per_match_squash(); + } + return self.sink_slow_multi_line_squash(); + } if self.config().only_matching { return self.sink_slow_multi_line_only_matching(); } else if self.config().per_match { @@ -1065,6 +1121,7 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> { self.write_prelude( self.sunk.absolute_byte_offset() + line.start() as u64, self.sunk.line_number().map(|n| n + count), + None, Some(matches[0].start() as u64 + 1), self.in_file_index(), )?; @@ -1080,6 +1137,20 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> { Ok(()) } + fn sink_slow_multi_line_squash(&self) -> io::Result<()> { + let bytes = self.sunk.bytes(); + let (line_number, line_number_end) = + self.line_range(self.sunk.line_number(), bytes); + self.write_prelude( + self.sunk.absolute_byte_offset(), + line_number, + line_number_end, + Some(self.sunk.matches()[0].start() as u64 + 1), + self.in_file_index(), + )?; + self.write_colored_line(self.sunk.matches(), bytes) + } + fn sink_slow_multi_line_only_matching(&self) -> io::Result<()> { let line_term = self.searcher.line_terminator().as_byte(); let spec = self.config().colors.matched(); @@ -1111,6 +1182,7 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> { self.write_prelude( self.sunk.absolute_byte_offset() + m.start() as u64, self.sunk.line_number().map(|n| n + count), + None, Some(m.start() as u64 + 1), self.in_file_index(), )?; @@ -1132,6 +1204,30 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> { Ok(()) } + fn sink_slow_multi_line_only_matching_squash(&self) -> io::Result<()> { + let bytes = self.sunk.bytes(); + for &m in self.sunk.matches() { + let line_start = self.line_number_for_offset( + self.sunk.line_number(), + bytes, + m.start(), + ); + let (line_number, line_number_end) = + self.line_range(line_start, &bytes[m]); + self.write_prelude( + self.sunk.absolute_byte_offset() + m.start() as u64, + line_number, + line_number_end, + Some(m.start() as u64 + 1), + self.in_file_index(), + )?; + + let buf = &bytes[m]; + self.write_colored_line(&[Match::new(0, buf.len())], buf)?; + } + Ok(()) + } + fn sink_slow_multi_per_match(&self) -> io::Result<()> { let line_term = self.searcher.line_terminator().as_byte(); let spec = self.config().colors.matched(); @@ -1150,6 +1246,7 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> { self.write_prelude( self.sunk.absolute_byte_offset() + line.start() as u64, self.sunk.line_number().map(|n| n + count), + None, Some(m.start().saturating_sub(line.start()) as u64 + 1), self.in_file_index(), )?; @@ -1190,6 +1287,31 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> { Ok(()) } + fn sink_slow_multi_per_match_squash(&self) -> io::Result<()> { + let bytes = self.sunk.bytes(); + for &m in self.sunk.matches() { + let line_start = self.line_number_for_offset( + self.sunk.line_number(), + bytes, + m.start(), + ); + let (line_number, line_number_end) = + self.line_range(line_start, &bytes[m]); + let column = self.column_number_for_offset(bytes, m.start()); + self.write_prelude( + self.sunk.absolute_byte_offset() + m.start() as u64, + line_number, + line_number_end, + Some(column), + self.in_file_index(), + )?; + + let buf = &bytes[m]; + self.write_colored_line(&[Match::new(0, buf.len())], buf)?; + } + Ok(()) + } + /// Write the beginning part of a matching line. This (may) include things /// like the file path, line number among others, depending on the /// configuration and the parameters given. @@ -1198,13 +1320,14 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> { &self, absolute_byte_offset: u64, line_number: Option, + line_number_end: Option, column: Option, in_file_index: Option, ) -> io::Result<()> { let mut prelude = PreludeWriter::new(self); prelude.start(line_number, column)?; prelude.write_path(in_file_index)?; - prelude.write_line_number(line_number)?; + prelude.write_line_number(line_number, line_number_end)?; prelude.write_column_number(column)?; prelude.write_byte_offset(absolute_byte_offset)?; prelude.end() @@ -1228,12 +1351,20 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> { self.sunk.matches(), &mut 0, )?; - } else { + } else if self.config().squash == SquashMode::None { // self.write_trim(line)?; self.write(line)?; if !self.has_line_terminator(line) { self.write_line_term()?; } + } else { + let mut range = Match::new(0, line.len()); + self.trim_line_terminator(line, &mut range); + let line = &line[range]; + let mut squasher = SquashState::new(); + self.write_squashed(line, &mut squasher)?; + squasher.finish(self)?; + self.write_line_term()?; } Ok(()) } @@ -1254,7 +1385,11 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> { if self.exceeds_max_columns(bytes) { self.write_exceeded_line(bytes, line, matches, &mut 0) } else { - self.write_colored_matches(bytes, line, matches, &mut 0)?; + if self.config().squash == SquashMode::None { + self.write_colored_matches(bytes, line, matches, &mut 0)?; + } else { + self.write_colored_matches_squashed(bytes, line, matches, &mut 0)?; + } self.write_line_term()?; Ok(()) } @@ -1309,6 +1444,135 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> { Ok(()) } + fn write_colored_matches_squashed( + &self, + bytes: &[u8], + mut line: Match, + matches: &[Match], + match_index: &mut usize, + ) -> io::Result<()> { + self.trim_line_terminator(bytes, &mut line); + let mut squasher = SquashState::new(); + if matches.is_empty() { + self.write_squashed(&bytes[line], &mut squasher)?; + squasher.finish(self)?; + return Ok(()); + } + self.start_line_highlight()?; + while !line.is_empty() { + if matches[*match_index].end() <= line.start() { + if *match_index + 1 < matches.len() { + *match_index += 1; + continue; + } else { + self.end_color_match()?; + self.write_squashed(&bytes[line], &mut squasher)?; + break; + } + } + + let m = matches[*match_index]; + if line.start() < m.start() { + let upto = cmp::min(line.end(), m.start()); + self.end_color_match()?; + self.write_squashed( + &bytes[line.with_end(upto)], + &mut squasher, + )?; + line = line.with_start(upto); + } else { + let upto = cmp::min(line.end(), m.end()); + self.start_color_match()?; + self.write_squashed( + &bytes[line.with_end(upto)], + &mut squasher, + )?; + line = line.with_start(upto); + } + } + self.end_color_match()?; + self.end_line_highlight()?; + squasher.finish(self)?; + Ok(()) + } + + fn write_squashed( + &self, + bytes: &[u8], + squasher: &mut SquashState, + ) -> io::Result<()> { + match self.config().squash { + SquashMode::None => self.write(bytes), + SquashMode::Whitespace => self.write_squashed_whitespace(bytes, squasher), + SquashMode::Newlines => self.write_squashed_newlines(bytes, squasher), + } + } + + fn write_squashed_whitespace( + &self, + bytes: &[u8], + squasher: &mut SquashState, + ) -> io::Result<()> { + let line_term = self.searcher.line_terminator(); + let mut iter = bytes.char_indices(); + while let Some((start, end, ch)) = iter.next() { + let is_line_term = !line_term.is_crlf() + && bytes[start] == line_term.as_byte(); + if ch.is_whitespace() || is_line_term { + squasher.pending_space = true; + } else { + squasher.flush(self)?; + self.write(&bytes[start..end])?; + } + } + Ok(()) + } + + fn write_squashed_newlines( + &self, + bytes: &[u8], + squasher: &mut SquashState, + ) -> io::Result<()> { + let line_term = self.searcher.line_terminator(); + let mut last = 0; + let mut i = 0; + while i < bytes.len() { + let mut newline_start = None; + let mut newline_end = 0; + if line_term.is_crlf() + && bytes[i] == b'\r' + && i + 1 < bytes.len() + && bytes[i + 1] == b'\n' + { + newline_start = Some(i); + newline_end = i + 2; + } else if line_term.is_crlf() && bytes[i] == b'\n' { + newline_start = Some(i); + newline_end = i + 1; + } else if !line_term.is_crlf() && bytes[i] == line_term.as_byte() { + newline_start = Some(i); + newline_end = i + 1; + } + + if let Some(start) = newline_start { + if last < start { + squasher.flush(self)?; + self.write(&bytes[last..start])?; + } + squasher.pending_space = true; + i = newline_end; + last = newline_end; + } else { + i += 1; + } + } + if last < bytes.len() { + squasher.flush(self)?; + self.write(&bytes[last..])?; + } + Ok(()) + } + fn write_exceeded_line( &self, bytes: &[u8], @@ -1593,6 +1857,56 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> { self.config().max_columns.map_or(false, |m| line.len() as u64 > m) } + fn line_span(&self, bytes: &[u8]) -> u64 { + if bytes.is_empty() { + return 0; + } + let line_term = self.searcher.line_terminator().as_byte(); + let count = bytes.iter().filter(|&&b| b == line_term).count() as u64; + let ends_with_term = bytes.last().map_or(false, |&b| b == line_term); + let lines = if ends_with_term { count } else { count + 1 }; + lines.saturating_sub(1) + } + + fn line_range( + &self, + line_start: Option, + bytes: &[u8], + ) -> (Option, Option) { + let Some(start) = line_start else { return (None, None) }; + let end = start + self.line_span(bytes); + if end > start { + (Some(start), Some(end)) + } else { + (Some(start), None) + } + } + + fn line_number_for_offset( + &self, + line_start: Option, + bytes: &[u8], + offset: usize, + ) -> Option { + let line_start = line_start?; + let line_term = self.searcher.line_terminator().as_byte(); + let count = bytes[..offset] + .iter() + .filter(|&&b| b == line_term) + .count() as u64; + Some(line_start + count) + } + + fn column_number_for_offset(&self, bytes: &[u8], offset: usize) -> u64 { + let line_term = self.searcher.line_terminator().as_byte(); + let line_start = bytes[..offset] + .iter() + .rposition(|&b| b == line_term) + .map(|pos| pos + 1) + .unwrap_or(0); + (offset - line_start) as u64 + 1 + } + /// Returns true if and only if the searcher may report matches over /// multiple lines. /// @@ -1618,6 +1932,35 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> { } } +#[derive(Debug, Default)] +struct SquashState { + pending_space: bool, +} + +impl SquashState { + fn new() -> SquashState { + SquashState { pending_space: false } + } + + fn flush( + &mut self, + std: &StandardImpl<'_, M, W>, + ) -> io::Result<()> { + if self.pending_space { + std.write(b" ")?; + self.pending_space = false; + } + Ok(()) + } + + fn finish( + &mut self, + std: &StandardImpl<'_, M, W>, + ) -> io::Result<()> { + self.flush(std) + } +} + /// A writer for the prelude (the beginning part of a matching line). /// /// This encapsulates the state needed to print the prelude. @@ -1719,11 +2062,20 @@ impl<'a, M: Matcher, W: WriteColor> PreludeWriter<'a, M, W> { /// Writes the line number field if present. #[inline(always)] - fn write_line_number(&mut self, line: Option) -> io::Result<()> { + fn write_line_number( + &mut self, + line: Option, + line_end: Option, + ) -> io::Result<()> { let Some(line_number) = line else { return Ok(()) }; self.write_separator()?; let n = DecimalFormatter::new(line_number); self.std.write_spec(self.config().colors.line(), n.as_bytes())?; + if let Some(end) = line_end { + self.std.write_spec(self.config().colors.line(), b"-")?; + let n = DecimalFormatter::new(end); + self.std.write_spec(self.config().colors.line(), n.as_bytes())?; + } self.next_separator = PreludeSeparator::FieldSeparator; Ok(()) } @@ -2406,6 +2758,50 @@ Watson assert_eq_printed!(expected, got); } + #[test] + fn squash_multi_line_range() { + let matcher = RegexMatcher::new("(?s)line 1\\nline 2").unwrap(); + let mut printer = StandardBuilder::new() + .squash(SquashMode::Newlines) + .build(NoColor::new(vec![])); + SearcherBuilder::new() + .line_number(true) + .multi_line(true) + .build() + .search_reader( + &matcher, + b"line 1\nline 2\n", + printer.sink(&matcher), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "1-2:line 1 line 2\n"; + assert_eq_printed!(expected, got); + } + + #[test] + fn squash_whitespace_multi_line() { + let matcher = RegexMatcher::new("(?s)line\\s+2").unwrap(); + let mut printer = StandardBuilder::new() + .squash(SquashMode::Whitespace) + .build(NoColor::new(vec![])); + SearcherBuilder::new() + .line_number(true) + .multi_line(true) + .build() + .search_reader( + &matcher, + b"line\t\n 2\n", + printer.sink(&matcher), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "1-2:line 2\n"; + assert_eq_printed!(expected, got); + } + #[test] fn column_number() { let matcher = RegexMatcher::new("Watson").unwrap(); diff --git a/pkg/brew/ripgrep-bin.rb b/pkg/brew/ripgrep-bin.rb index 568e46a..3b9aa32 100644 --- a/pkg/brew/ripgrep-bin.rb +++ b/pkg/brew/ripgrep-bin.rb @@ -4,20 +4,20 @@ class RipgrepBin < Formula homepage "https://github.com/BurntSushi/ripgrep" if OS.mac? - url "https://github.com/BurntSushi/ripgrep/releases/download/#{version}/ripgrep-#{version}-x86_64-apple-darwin.tar.gz" + url "https://git.peisongxiao.com/peisongxiao/rgs/releases/download/#{version}/rgs-#{version}-x86_64-apple-darwin.tar.gz" sha256 "64811cb24e77cac3057d6c40b63ac9becf9082eedd54ca411b475b755d334882" elsif OS.linux? - url "https://github.com/BurntSushi/ripgrep/releases/download/#{version}/ripgrep-#{version}-x86_64-unknown-linux-musl.tar.gz" + url "https://git.peisongxiao.com/peisongxiao/rgs/releases/download/#{version}/rgs-#{version}-x86_64-unknown-linux-musl.tar.gz" sha256 "1c9297be4a084eea7ecaedf93eb03d058d6faae29bbc57ecdaf5063921491599" end conflicts_with "ripgrep" def install - bin.install "rg" - man1.install "doc/rg.1" + bin.install "rgs" + man1.install "doc/rgs.1" - bash_completion.install "complete/rg.bash" - zsh_completion.install "complete/_rg" + bash_completion.install "complete/rgs.bash" + zsh_completion.install "complete/_rgs" end end