added docs and migrated name to rgs, migrated repo, added squash-lines feature
Some checks failed
ci / test (beta, ubuntu-latest, beta) (pull_request) Has been cancelled
ci / test (macos, macos-latest, nightly) (pull_request) Has been cancelled
ci / test (nightly, ubuntu-latest, nightly) (pull_request) Has been cancelled
ci / test (pinned, ubuntu-latest, 1.85.0) (pull_request) Has been cancelled
ci / test (stable, ubuntu-latest, stable) (pull_request) Has been cancelled
ci / test (stable-aarch64, ubuntu-latest, stable, aarch64-unknown-linux-gnu) (pull_request) Has been cancelled
ci / test (stable-arm-gnueabihf, ubuntu-latest, stable, armv7-unknown-linux-gnueabihf) (pull_request) Has been cancelled
ci / test (stable-arm-musleabi, ubuntu-latest, stable, armv7-unknown-linux-musleabi) (pull_request) Has been cancelled
ci / test (stable-arm-musleabihf, ubuntu-latest, stable, armv7-unknown-linux-musleabihf) (pull_request) Has been cancelled
ci / test (stable-musl, ubuntu-latest, stable, x86_64-unknown-linux-musl) (pull_request) Has been cancelled
ci / test (stable-powerpc64, ubuntu-latest, stable, powerpc64-unknown-linux-gnu) (pull_request) Has been cancelled
ci / test (stable-riscv64, ubuntu-latest, stable, riscv64gc-unknown-linux-gnu) (pull_request) Has been cancelled
ci / test (stable-s390x, ubuntu-latest, stable, s390x-unknown-linux-gnu) (pull_request) Has been cancelled
ci / test (stable-x86, ubuntu-latest, stable, i686-unknown-linux-gnu) (pull_request) Has been cancelled
ci / test (win-gnu, windows-latest, nightly-x86_64-gnu) (pull_request) Has been cancelled
ci / test (win-msvc, windows-latest, nightly) (pull_request) Has been cancelled
ci / test (winaarch64-msvc, windows-11-arm, nightly) (pull_request) Has been cancelled
ci / wasm (pull_request) Has been cancelled
ci / rustfmt (pull_request) Has been cancelled
ci / docs (pull_request) Has been cancelled
ci / Compile Fuzz Test Targets (pull_request) Has been cancelled

This commit is contained in:
2026-01-13 20:35:39 -05:00
parent ad6ec1b4c5
commit 0994661424
17 changed files with 1144 additions and 600 deletions

View File

@@ -67,7 +67,7 @@ pub use crate::{
HyperlinkFormat, HyperlinkFormatError, hyperlink_aliases,
},
path::{PathPrinter, PathPrinterBuilder},
standard::{Standard, StandardBuilder, StandardSink},
standard::{SquashMode, Standard, StandardBuilder, StandardSink},
stats::Stats,
summary::{Summary, SummaryBuilder, SummaryKind, SummarySink},
};

View File

@@ -27,6 +27,23 @@ use crate::{
},
};
/// Controls how whitespace is squashed in the standard printer output.
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum SquashMode {
/// Do not squash whitespace in output.
None,
/// Squash any Unicode whitespace into a single ASCII space.
Whitespace,
/// Squash line terminators into a single ASCII space.
Newlines,
}
impl Default for SquashMode {
fn default() -> SquashMode {
SquashMode::None
}
}
/// The configuration for the standard printer.
///
/// This is manipulated by the StandardBuilder and then referenced by the
@@ -40,6 +57,7 @@ struct Config {
heading: bool,
path: bool,
in_file_index: bool,
squash: SquashMode,
only_matching: bool,
per_match: bool,
per_match_one_line: bool,
@@ -66,6 +84,7 @@ impl Default for Config {
heading: false,
path: true,
in_file_index: false,
squash: SquashMode::None,
only_matching: false,
per_match: false,
per_match_one_line: false,
@@ -366,6 +385,12 @@ impl StandardBuilder {
self
}
/// Configure whitespace squashing in standard output.
pub fn squash(&mut self, mode: SquashMode) -> &mut StandardBuilder {
self.config.squash = mode;
self
}
/// Set the separator used between sets of search results.
///
/// When this is set, then it will be printed on its own line immediately
@@ -969,6 +994,7 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
self.sunk.absolute_byte_offset(),
self.sunk.line_number(),
None,
None,
self.in_file_index(),
)?;
self.write_line(self.sunk.bytes())
@@ -988,6 +1014,10 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
// instead.
debug_assert!(self.multi_line());
if self.config().squash != SquashMode::None {
return self.sink_fast_multi_line_squash();
}
let line_term = self.searcher.line_terminator().as_byte();
let mut absolute_byte_offset = self.sunk.absolute_byte_offset();
for (i, line) in self.sunk.lines(line_term).enumerate() {
@@ -995,6 +1025,7 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
absolute_byte_offset,
self.sunk.line_number().map(|n| n + i as u64),
None,
None,
self.in_file_index(),
)?;
absolute_byte_offset += line.len() as u64;
@@ -1004,6 +1035,20 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
Ok(())
}
fn sink_fast_multi_line_squash(&self) -> io::Result<()> {
let bytes = self.sunk.bytes();
let (line_number, line_number_end) =
self.line_range(self.sunk.line_number(), bytes);
self.write_prelude(
self.sunk.absolute_byte_offset(),
line_number,
line_number_end,
None,
self.in_file_index(),
)?;
self.write_line(bytes)
}
/// Print a matching line where the configuration of the printer requires
/// finding each individual match (e.g., for coloring).
fn sink_slow(&self) -> io::Result<()> {
@@ -1015,6 +1060,7 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
self.write_prelude(
self.sunk.absolute_byte_offset() + m.start() as u64,
self.sunk.line_number(),
None,
Some(m.start() as u64 + 1),
self.in_file_index(),
)?;
@@ -1027,6 +1073,7 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
self.write_prelude(
self.sunk.absolute_byte_offset() + m.start() as u64,
self.sunk.line_number(),
None,
Some(m.start() as u64 + 1),
self.in_file_index(),
)?;
@@ -1036,6 +1083,7 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
self.write_prelude(
self.sunk.absolute_byte_offset(),
self.sunk.line_number(),
None,
Some(self.sunk.matches()[0].start() as u64 + 1),
self.in_file_index(),
)?;
@@ -1048,6 +1096,14 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
debug_assert!(!self.sunk.matches().is_empty());
debug_assert!(self.multi_line());
if self.config().squash != SquashMode::None {
if self.config().only_matching {
return self.sink_slow_multi_line_only_matching_squash();
} else if self.config().per_match {
return self.sink_slow_multi_per_match_squash();
}
return self.sink_slow_multi_line_squash();
}
if self.config().only_matching {
return self.sink_slow_multi_line_only_matching();
} else if self.config().per_match {
@@ -1065,6 +1121,7 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
self.write_prelude(
self.sunk.absolute_byte_offset() + line.start() as u64,
self.sunk.line_number().map(|n| n + count),
None,
Some(matches[0].start() as u64 + 1),
self.in_file_index(),
)?;
@@ -1080,6 +1137,20 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
Ok(())
}
fn sink_slow_multi_line_squash(&self) -> io::Result<()> {
let bytes = self.sunk.bytes();
let (line_number, line_number_end) =
self.line_range(self.sunk.line_number(), bytes);
self.write_prelude(
self.sunk.absolute_byte_offset(),
line_number,
line_number_end,
Some(self.sunk.matches()[0].start() as u64 + 1),
self.in_file_index(),
)?;
self.write_colored_line(self.sunk.matches(), bytes)
}
fn sink_slow_multi_line_only_matching(&self) -> io::Result<()> {
let line_term = self.searcher.line_terminator().as_byte();
let spec = self.config().colors.matched();
@@ -1111,6 +1182,7 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
self.write_prelude(
self.sunk.absolute_byte_offset() + m.start() as u64,
self.sunk.line_number().map(|n| n + count),
None,
Some(m.start() as u64 + 1),
self.in_file_index(),
)?;
@@ -1132,6 +1204,30 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
Ok(())
}
fn sink_slow_multi_line_only_matching_squash(&self) -> io::Result<()> {
let bytes = self.sunk.bytes();
for &m in self.sunk.matches() {
let line_start = self.line_number_for_offset(
self.sunk.line_number(),
bytes,
m.start(),
);
let (line_number, line_number_end) =
self.line_range(line_start, &bytes[m]);
self.write_prelude(
self.sunk.absolute_byte_offset() + m.start() as u64,
line_number,
line_number_end,
Some(m.start() as u64 + 1),
self.in_file_index(),
)?;
let buf = &bytes[m];
self.write_colored_line(&[Match::new(0, buf.len())], buf)?;
}
Ok(())
}
fn sink_slow_multi_per_match(&self) -> io::Result<()> {
let line_term = self.searcher.line_terminator().as_byte();
let spec = self.config().colors.matched();
@@ -1150,6 +1246,7 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
self.write_prelude(
self.sunk.absolute_byte_offset() + line.start() as u64,
self.sunk.line_number().map(|n| n + count),
None,
Some(m.start().saturating_sub(line.start()) as u64 + 1),
self.in_file_index(),
)?;
@@ -1190,6 +1287,31 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
Ok(())
}
fn sink_slow_multi_per_match_squash(&self) -> io::Result<()> {
let bytes = self.sunk.bytes();
for &m in self.sunk.matches() {
let line_start = self.line_number_for_offset(
self.sunk.line_number(),
bytes,
m.start(),
);
let (line_number, line_number_end) =
self.line_range(line_start, &bytes[m]);
let column = self.column_number_for_offset(bytes, m.start());
self.write_prelude(
self.sunk.absolute_byte_offset() + m.start() as u64,
line_number,
line_number_end,
Some(column),
self.in_file_index(),
)?;
let buf = &bytes[m];
self.write_colored_line(&[Match::new(0, buf.len())], buf)?;
}
Ok(())
}
/// Write the beginning part of a matching line. This (may) include things
/// like the file path, line number among others, depending on the
/// configuration and the parameters given.
@@ -1198,13 +1320,14 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
&self,
absolute_byte_offset: u64,
line_number: Option<u64>,
line_number_end: Option<u64>,
column: Option<u64>,
in_file_index: Option<u64>,
) -> io::Result<()> {
let mut prelude = PreludeWriter::new(self);
prelude.start(line_number, column)?;
prelude.write_path(in_file_index)?;
prelude.write_line_number(line_number)?;
prelude.write_line_number(line_number, line_number_end)?;
prelude.write_column_number(column)?;
prelude.write_byte_offset(absolute_byte_offset)?;
prelude.end()
@@ -1228,12 +1351,20 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
self.sunk.matches(),
&mut 0,
)?;
} else {
} else if self.config().squash == SquashMode::None {
// self.write_trim(line)?;
self.write(line)?;
if !self.has_line_terminator(line) {
self.write_line_term()?;
}
} else {
let mut range = Match::new(0, line.len());
self.trim_line_terminator(line, &mut range);
let line = &line[range];
let mut squasher = SquashState::new();
self.write_squashed(line, &mut squasher)?;
squasher.finish(self)?;
self.write_line_term()?;
}
Ok(())
}
@@ -1254,7 +1385,11 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
if self.exceeds_max_columns(bytes) {
self.write_exceeded_line(bytes, line, matches, &mut 0)
} else {
self.write_colored_matches(bytes, line, matches, &mut 0)?;
if self.config().squash == SquashMode::None {
self.write_colored_matches(bytes, line, matches, &mut 0)?;
} else {
self.write_colored_matches_squashed(bytes, line, matches, &mut 0)?;
}
self.write_line_term()?;
Ok(())
}
@@ -1309,6 +1444,135 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
Ok(())
}
fn write_colored_matches_squashed(
&self,
bytes: &[u8],
mut line: Match,
matches: &[Match],
match_index: &mut usize,
) -> io::Result<()> {
self.trim_line_terminator(bytes, &mut line);
let mut squasher = SquashState::new();
if matches.is_empty() {
self.write_squashed(&bytes[line], &mut squasher)?;
squasher.finish(self)?;
return Ok(());
}
self.start_line_highlight()?;
while !line.is_empty() {
if matches[*match_index].end() <= line.start() {
if *match_index + 1 < matches.len() {
*match_index += 1;
continue;
} else {
self.end_color_match()?;
self.write_squashed(&bytes[line], &mut squasher)?;
break;
}
}
let m = matches[*match_index];
if line.start() < m.start() {
let upto = cmp::min(line.end(), m.start());
self.end_color_match()?;
self.write_squashed(
&bytes[line.with_end(upto)],
&mut squasher,
)?;
line = line.with_start(upto);
} else {
let upto = cmp::min(line.end(), m.end());
self.start_color_match()?;
self.write_squashed(
&bytes[line.with_end(upto)],
&mut squasher,
)?;
line = line.with_start(upto);
}
}
self.end_color_match()?;
self.end_line_highlight()?;
squasher.finish(self)?;
Ok(())
}
fn write_squashed(
&self,
bytes: &[u8],
squasher: &mut SquashState,
) -> io::Result<()> {
match self.config().squash {
SquashMode::None => self.write(bytes),
SquashMode::Whitespace => self.write_squashed_whitespace(bytes, squasher),
SquashMode::Newlines => self.write_squashed_newlines(bytes, squasher),
}
}
fn write_squashed_whitespace(
&self,
bytes: &[u8],
squasher: &mut SquashState,
) -> io::Result<()> {
let line_term = self.searcher.line_terminator();
let mut iter = bytes.char_indices();
while let Some((start, end, ch)) = iter.next() {
let is_line_term = !line_term.is_crlf()
&& bytes[start] == line_term.as_byte();
if ch.is_whitespace() || is_line_term {
squasher.pending_space = true;
} else {
squasher.flush(self)?;
self.write(&bytes[start..end])?;
}
}
Ok(())
}
fn write_squashed_newlines(
&self,
bytes: &[u8],
squasher: &mut SquashState,
) -> io::Result<()> {
let line_term = self.searcher.line_terminator();
let mut last = 0;
let mut i = 0;
while i < bytes.len() {
let mut newline_start = None;
let mut newline_end = 0;
if line_term.is_crlf()
&& bytes[i] == b'\r'
&& i + 1 < bytes.len()
&& bytes[i + 1] == b'\n'
{
newline_start = Some(i);
newline_end = i + 2;
} else if line_term.is_crlf() && bytes[i] == b'\n' {
newline_start = Some(i);
newline_end = i + 1;
} else if !line_term.is_crlf() && bytes[i] == line_term.as_byte() {
newline_start = Some(i);
newline_end = i + 1;
}
if let Some(start) = newline_start {
if last < start {
squasher.flush(self)?;
self.write(&bytes[last..start])?;
}
squasher.pending_space = true;
i = newline_end;
last = newline_end;
} else {
i += 1;
}
}
if last < bytes.len() {
squasher.flush(self)?;
self.write(&bytes[last..])?;
}
Ok(())
}
fn write_exceeded_line(
&self,
bytes: &[u8],
@@ -1593,6 +1857,56 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
self.config().max_columns.map_or(false, |m| line.len() as u64 > m)
}
fn line_span(&self, bytes: &[u8]) -> u64 {
if bytes.is_empty() {
return 0;
}
let line_term = self.searcher.line_terminator().as_byte();
let count = bytes.iter().filter(|&&b| b == line_term).count() as u64;
let ends_with_term = bytes.last().map_or(false, |&b| b == line_term);
let lines = if ends_with_term { count } else { count + 1 };
lines.saturating_sub(1)
}
fn line_range(
&self,
line_start: Option<u64>,
bytes: &[u8],
) -> (Option<u64>, Option<u64>) {
let Some(start) = line_start else { return (None, None) };
let end = start + self.line_span(bytes);
if end > start {
(Some(start), Some(end))
} else {
(Some(start), None)
}
}
fn line_number_for_offset(
&self,
line_start: Option<u64>,
bytes: &[u8],
offset: usize,
) -> Option<u64> {
let line_start = line_start?;
let line_term = self.searcher.line_terminator().as_byte();
let count = bytes[..offset]
.iter()
.filter(|&&b| b == line_term)
.count() as u64;
Some(line_start + count)
}
fn column_number_for_offset(&self, bytes: &[u8], offset: usize) -> u64 {
let line_term = self.searcher.line_terminator().as_byte();
let line_start = bytes[..offset]
.iter()
.rposition(|&b| b == line_term)
.map(|pos| pos + 1)
.unwrap_or(0);
(offset - line_start) as u64 + 1
}
/// Returns true if and only if the searcher may report matches over
/// multiple lines.
///
@@ -1618,6 +1932,35 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
}
}
#[derive(Debug, Default)]
struct SquashState {
pending_space: bool,
}
impl SquashState {
fn new() -> SquashState {
SquashState { pending_space: false }
}
fn flush<M: Matcher, W: WriteColor>(
&mut self,
std: &StandardImpl<'_, M, W>,
) -> io::Result<()> {
if self.pending_space {
std.write(b" ")?;
self.pending_space = false;
}
Ok(())
}
fn finish<M: Matcher, W: WriteColor>(
&mut self,
std: &StandardImpl<'_, M, W>,
) -> io::Result<()> {
self.flush(std)
}
}
/// A writer for the prelude (the beginning part of a matching line).
///
/// This encapsulates the state needed to print the prelude.
@@ -1719,11 +2062,20 @@ impl<'a, M: Matcher, W: WriteColor> PreludeWriter<'a, M, W> {
/// Writes the line number field if present.
#[inline(always)]
fn write_line_number(&mut self, line: Option<u64>) -> io::Result<()> {
fn write_line_number(
&mut self,
line: Option<u64>,
line_end: Option<u64>,
) -> io::Result<()> {
let Some(line_number) = line else { return Ok(()) };
self.write_separator()?;
let n = DecimalFormatter::new(line_number);
self.std.write_spec(self.config().colors.line(), n.as_bytes())?;
if let Some(end) = line_end {
self.std.write_spec(self.config().colors.line(), b"-")?;
let n = DecimalFormatter::new(end);
self.std.write_spec(self.config().colors.line(), n.as_bytes())?;
}
self.next_separator = PreludeSeparator::FieldSeparator;
Ok(())
}
@@ -2406,6 +2758,50 @@ Watson
assert_eq_printed!(expected, got);
}
#[test]
fn squash_multi_line_range() {
let matcher = RegexMatcher::new("(?s)line 1\\nline 2").unwrap();
let mut printer = StandardBuilder::new()
.squash(SquashMode::Newlines)
.build(NoColor::new(vec![]));
SearcherBuilder::new()
.line_number(true)
.multi_line(true)
.build()
.search_reader(
&matcher,
b"line 1\nline 2\n",
printer.sink(&matcher),
)
.unwrap();
let got = printer_contents(&mut printer);
let expected = "1-2:line 1 line 2\n";
assert_eq_printed!(expected, got);
}
#[test]
fn squash_whitespace_multi_line() {
let matcher = RegexMatcher::new("(?s)line\\s+2").unwrap();
let mut printer = StandardBuilder::new()
.squash(SquashMode::Whitespace)
.build(NoColor::new(vec![]));
SearcherBuilder::new()
.line_number(true)
.multi_line(true)
.build()
.search_reader(
&matcher,
b"line\t\n 2\n",
printer.sink(&matcher),
)
.unwrap();
let got = printer_contents(&mut printer);
let expected = "1-2:line 2\n";
assert_eq_printed!(expected, got);
}
#[test]
fn column_number() {
let matcher = RegexMatcher::new("Watson").unwrap();