rgs: added multiline window limit and in-file result indexing (work in progress)
This commit is contained in:
@@ -30,7 +30,7 @@ rust-version = "1.85"
|
|||||||
[[bin]]
|
[[bin]]
|
||||||
bench = false
|
bench = false
|
||||||
path = "crates/core/main.rs"
|
path = "crates/core/main.rs"
|
||||||
name = "rg"
|
name = "rgs"
|
||||||
|
|
||||||
[[test]]
|
[[test]]
|
||||||
name = "integration"
|
name = "integration"
|
||||||
|
|||||||
@@ -96,6 +96,8 @@ _rg() {
|
|||||||
+ '(file-name)' # File-name options
|
+ '(file-name)' # File-name options
|
||||||
{-H,--with-filename}'[show file name for matches]'
|
{-H,--with-filename}'[show file name for matches]'
|
||||||
{-I,--no-filename}"[don't show file name for matches]"
|
{-I,--no-filename}"[don't show file name for matches]"
|
||||||
|
'--in-file-index[show per-file match index in output]'
|
||||||
|
'--no-in-file-index[hide per-file match index in output]'
|
||||||
|
|
||||||
+ '(file-system)' # File system options
|
+ '(file-system)' # File system options
|
||||||
"--one-file-system[don't descend into directories on other file systems]"
|
"--one-file-system[don't descend into directories on other file systems]"
|
||||||
@@ -210,6 +212,7 @@ _rg() {
|
|||||||
|
|
||||||
+ '(multiline)' # Multiline options
|
+ '(multiline)' # Multiline options
|
||||||
{-U,--multiline}'[permit matching across multiple lines]'
|
{-U,--multiline}'[permit matching across multiple lines]'
|
||||||
|
'--multiline-window=[limit multiline matches to NUM lines (with -U)]:number of lines'
|
||||||
$no'(multiline-dotall)--no-multiline[restrict matches to at most one line each]'
|
$no'(multiline-dotall)--no-multiline[restrict matches to at most one line each]'
|
||||||
|
|
||||||
+ '(multiline-dotall)' # Multiline DOTALL options
|
+ '(multiline-dotall)' # Multiline DOTALL options
|
||||||
|
|||||||
@@ -97,6 +97,7 @@ pub(super) const FLAGS: &[&dyn Flag] = &[
|
|||||||
&MaxFilesize,
|
&MaxFilesize,
|
||||||
&Mmap,
|
&Mmap,
|
||||||
&Multiline,
|
&Multiline,
|
||||||
|
&MultilineWindow,
|
||||||
&MultilineDotall,
|
&MultilineDotall,
|
||||||
&NoConfig,
|
&NoConfig,
|
||||||
&NoIgnore,
|
&NoIgnore,
|
||||||
@@ -142,6 +143,7 @@ pub(super) const FLAGS: &[&dyn Flag] = &[
|
|||||||
&Unrestricted,
|
&Unrestricted,
|
||||||
&Version,
|
&Version,
|
||||||
&Vimgrep,
|
&Vimgrep,
|
||||||
|
&InFileIndex,
|
||||||
&WithFilename,
|
&WithFilename,
|
||||||
&WithFilenameNo,
|
&WithFilenameNo,
|
||||||
&WordRegexp,
|
&WordRegexp,
|
||||||
@@ -4209,6 +4211,59 @@ fn test_multiline() {
|
|||||||
assert_eq!(false, args.multiline);
|
assert_eq!(false, args.multiline);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// --multiline-window
|
||||||
|
#[derive(Debug)]
|
||||||
|
struct MultilineWindow;
|
||||||
|
|
||||||
|
impl Flag for MultilineWindow {
|
||||||
|
fn is_switch(&self) -> bool {
|
||||||
|
false
|
||||||
|
}
|
||||||
|
fn name_long(&self) -> &'static str {
|
||||||
|
"multiline-window"
|
||||||
|
}
|
||||||
|
fn doc_variable(&self) -> Option<&'static str> {
|
||||||
|
Some("NUM")
|
||||||
|
}
|
||||||
|
fn doc_category(&self) -> Category {
|
||||||
|
Category::Search
|
||||||
|
}
|
||||||
|
fn doc_short(&self) -> &'static str {
|
||||||
|
r"Limit multiline matches to a fixed number of lines."
|
||||||
|
}
|
||||||
|
fn doc_long(&self) -> &'static str {
|
||||||
|
r#"
|
||||||
|
Limit the maximum number of lines that a multiline match may span to
|
||||||
|
\fINUM\fP (use \fB--multiline-window=\fP\fINUM\fP).
|
||||||
|
.sp
|
||||||
|
This flag requires \flag{multiline}. Matches are found as if the file being
|
||||||
|
searched were limited to \fINUM\fP lines at a time, which can prevent
|
||||||
|
unintended long matches while still enabling multi-line searching.
|
||||||
|
.sp
|
||||||
|
The value of \fINUM\fP must be at least 1.
|
||||||
|
"#
|
||||||
|
}
|
||||||
|
|
||||||
|
fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> {
|
||||||
|
let lines = convert::usize(&v.unwrap_value())?;
|
||||||
|
if lines == 0 {
|
||||||
|
anyhow::bail!("--multiline-window must be at least 1");
|
||||||
|
}
|
||||||
|
args.multiline_window = Some(lines);
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
#[test]
|
||||||
|
fn test_multiline_window() {
|
||||||
|
let args = parse_low_raw(None::<&str>).unwrap();
|
||||||
|
assert_eq!(None, args.multiline_window);
|
||||||
|
|
||||||
|
let args = parse_low_raw(["--multiline-window=2"]).unwrap();
|
||||||
|
assert_eq!(Some(2), args.multiline_window);
|
||||||
|
}
|
||||||
|
|
||||||
/// --multiline-dotall
|
/// --multiline-dotall
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
struct MultilineDotall;
|
struct MultilineDotall;
|
||||||
@@ -7401,6 +7456,53 @@ fn test_vimgrep() {
|
|||||||
assert_eq!(true, args.vimgrep);
|
assert_eq!(true, args.vimgrep);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// --in-file-index
|
||||||
|
#[derive(Debug)]
|
||||||
|
struct InFileIndex;
|
||||||
|
|
||||||
|
impl Flag for InFileIndex {
|
||||||
|
fn is_switch(&self) -> bool {
|
||||||
|
true
|
||||||
|
}
|
||||||
|
fn name_long(&self) -> &'static str {
|
||||||
|
"in-file-index"
|
||||||
|
}
|
||||||
|
fn name_negated(&self) -> Option<&'static str> {
|
||||||
|
Some("no-in-file-index")
|
||||||
|
}
|
||||||
|
fn doc_category(&self) -> Category {
|
||||||
|
Category::Output
|
||||||
|
}
|
||||||
|
fn doc_short(&self) -> &'static str {
|
||||||
|
r"Prefix matches with an index per file."
|
||||||
|
}
|
||||||
|
fn doc_long(&self) -> &'static str {
|
||||||
|
r"
|
||||||
|
When enabled, ripgrep prefixes each matching line with an index that is
|
||||||
|
incremented per file. The format is \fIFILE\fP[\fIN\fP]:\fILINE\fP:, which can
|
||||||
|
disambiguate multi-line matches that print the same line multiple times.
|
||||||
|
"
|
||||||
|
}
|
||||||
|
|
||||||
|
fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> {
|
||||||
|
args.in_file_index = v.unwrap_switch();
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
#[test]
|
||||||
|
fn test_in_file_index() {
|
||||||
|
let args = parse_low_raw(None::<&str>).unwrap();
|
||||||
|
assert_eq!(false, args.in_file_index);
|
||||||
|
|
||||||
|
let args = parse_low_raw(["--in-file-index"]).unwrap();
|
||||||
|
assert_eq!(true, args.in_file_index);
|
||||||
|
|
||||||
|
let args = parse_low_raw(["--in-file-index", "--no-in-file-index"]).unwrap();
|
||||||
|
assert_eq!(false, args.in_file_index);
|
||||||
|
}
|
||||||
|
|
||||||
/// --with-filename
|
/// --with-filename
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
struct WithFilename;
|
struct WithFilename;
|
||||||
|
|||||||
@@ -61,6 +61,7 @@ pub(crate) struct HiArgs {
|
|||||||
ignore_file_case_insensitive: bool,
|
ignore_file_case_insensitive: bool,
|
||||||
ignore_file: Vec<PathBuf>,
|
ignore_file: Vec<PathBuf>,
|
||||||
include_zero: bool,
|
include_zero: bool,
|
||||||
|
in_file_index: bool,
|
||||||
invert_match: bool,
|
invert_match: bool,
|
||||||
is_terminal_stdout: bool,
|
is_terminal_stdout: bool,
|
||||||
line_number: bool,
|
line_number: bool,
|
||||||
@@ -73,6 +74,7 @@ pub(crate) struct HiArgs {
|
|||||||
mode: Mode,
|
mode: Mode,
|
||||||
multiline: bool,
|
multiline: bool,
|
||||||
multiline_dotall: bool,
|
multiline_dotall: bool,
|
||||||
|
multiline_window: Option<usize>,
|
||||||
no_ignore_dot: bool,
|
no_ignore_dot: bool,
|
||||||
no_ignore_exclude: bool,
|
no_ignore_exclude: bool,
|
||||||
no_ignore_files: bool,
|
no_ignore_files: bool,
|
||||||
@@ -140,6 +142,9 @@ impl HiArgs {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let mut state = State::new()?;
|
let mut state = State::new()?;
|
||||||
|
if low.multiline_window.is_some() && !low.multiline {
|
||||||
|
anyhow::bail!("--multiline-window requires --multiline");
|
||||||
|
}
|
||||||
let patterns = Patterns::from_low_args(&mut state, &mut low)?;
|
let patterns = Patterns::from_low_args(&mut state, &mut low)?;
|
||||||
let paths = Paths::from_low_args(&mut state, &patterns, &mut low)?;
|
let paths = Paths::from_low_args(&mut state, &patterns, &mut low)?;
|
||||||
|
|
||||||
@@ -278,6 +283,7 @@ impl HiArgs {
|
|||||||
ignore_file: low.ignore_file,
|
ignore_file: low.ignore_file,
|
||||||
ignore_file_case_insensitive: low.ignore_file_case_insensitive,
|
ignore_file_case_insensitive: low.ignore_file_case_insensitive,
|
||||||
include_zero: low.include_zero,
|
include_zero: low.include_zero,
|
||||||
|
in_file_index: low.in_file_index,
|
||||||
invert_match: low.invert_match,
|
invert_match: low.invert_match,
|
||||||
is_terminal_stdout: state.is_terminal_stdout,
|
is_terminal_stdout: state.is_terminal_stdout,
|
||||||
line_number,
|
line_number,
|
||||||
@@ -289,6 +295,7 @@ impl HiArgs {
|
|||||||
mmap_choice,
|
mmap_choice,
|
||||||
multiline: low.multiline,
|
multiline: low.multiline,
|
||||||
multiline_dotall: low.multiline_dotall,
|
multiline_dotall: low.multiline_dotall,
|
||||||
|
multiline_window: low.multiline_window,
|
||||||
no_ignore_dot: low.no_ignore_dot,
|
no_ignore_dot: low.no_ignore_dot,
|
||||||
no_ignore_exclude: low.no_ignore_exclude,
|
no_ignore_exclude: low.no_ignore_exclude,
|
||||||
no_ignore_files: low.no_ignore_files,
|
no_ignore_files: low.no_ignore_files,
|
||||||
@@ -616,6 +623,7 @@ impl HiArgs {
|
|||||||
.column(self.column)
|
.column(self.column)
|
||||||
.heading(self.heading)
|
.heading(self.heading)
|
||||||
.hyperlink(self.hyperlink_config.clone())
|
.hyperlink(self.hyperlink_config.clone())
|
||||||
|
.in_file_index(self.in_file_index)
|
||||||
.max_columns_preview(self.max_columns_preview)
|
.max_columns_preview(self.max_columns_preview)
|
||||||
.max_columns(self.max_columns)
|
.max_columns(self.max_columns)
|
||||||
.only_matching(self.only_matching)
|
.only_matching(self.only_matching)
|
||||||
@@ -723,6 +731,7 @@ impl HiArgs {
|
|||||||
.invert_match(self.invert_match)
|
.invert_match(self.invert_match)
|
||||||
.line_number(self.line_number)
|
.line_number(self.line_number)
|
||||||
.multi_line(self.multiline)
|
.multi_line(self.multiline)
|
||||||
|
.multiline_window(self.multiline_window)
|
||||||
.memory_map(self.mmap_choice.clone())
|
.memory_map(self.mmap_choice.clone())
|
||||||
.stop_on_nonmatch(self.stop_on_nonmatch);
|
.stop_on_nonmatch(self.stop_on_nonmatch);
|
||||||
match self.context {
|
match self.context {
|
||||||
|
|||||||
@@ -65,6 +65,7 @@ pub(crate) struct LowArgs {
|
|||||||
pub(crate) ignore_file: Vec<PathBuf>,
|
pub(crate) ignore_file: Vec<PathBuf>,
|
||||||
pub(crate) ignore_file_case_insensitive: bool,
|
pub(crate) ignore_file_case_insensitive: bool,
|
||||||
pub(crate) include_zero: bool,
|
pub(crate) include_zero: bool,
|
||||||
|
pub(crate) in_file_index: bool,
|
||||||
pub(crate) invert_match: bool,
|
pub(crate) invert_match: bool,
|
||||||
pub(crate) line_number: Option<bool>,
|
pub(crate) line_number: Option<bool>,
|
||||||
pub(crate) logging: Option<LoggingMode>,
|
pub(crate) logging: Option<LoggingMode>,
|
||||||
@@ -76,6 +77,7 @@ pub(crate) struct LowArgs {
|
|||||||
pub(crate) mmap: MmapMode,
|
pub(crate) mmap: MmapMode,
|
||||||
pub(crate) multiline: bool,
|
pub(crate) multiline: bool,
|
||||||
pub(crate) multiline_dotall: bool,
|
pub(crate) multiline_dotall: bool,
|
||||||
|
pub(crate) multiline_window: Option<usize>,
|
||||||
pub(crate) no_config: bool,
|
pub(crate) no_config: bool,
|
||||||
pub(crate) no_ignore_dot: bool,
|
pub(crate) no_ignore_dot: bool,
|
||||||
pub(crate) no_ignore_exclude: bool,
|
pub(crate) no_ignore_exclude: bool,
|
||||||
|
|||||||
@@ -39,6 +39,7 @@ struct Config {
|
|||||||
stats: bool,
|
stats: bool,
|
||||||
heading: bool,
|
heading: bool,
|
||||||
path: bool,
|
path: bool,
|
||||||
|
in_file_index: bool,
|
||||||
only_matching: bool,
|
only_matching: bool,
|
||||||
per_match: bool,
|
per_match: bool,
|
||||||
per_match_one_line: bool,
|
per_match_one_line: bool,
|
||||||
@@ -64,6 +65,7 @@ impl Default for Config {
|
|||||||
stats: false,
|
stats: false,
|
||||||
heading: false,
|
heading: false,
|
||||||
path: true,
|
path: true,
|
||||||
|
in_file_index: false,
|
||||||
only_matching: false,
|
only_matching: false,
|
||||||
per_match: false,
|
per_match: false,
|
||||||
per_match_one_line: false,
|
per_match_one_line: false,
|
||||||
@@ -231,6 +233,12 @@ impl StandardBuilder {
|
|||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// When enabled, prefix matching lines with a per-file match index.
|
||||||
|
pub fn in_file_index(&mut self, yes: bool) -> &mut StandardBuilder {
|
||||||
|
self.config.in_file_index = yes;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
/// Only print the specific matches instead of the entire line containing
|
/// Only print the specific matches instead of the entire line containing
|
||||||
/// each match. Each match is printed on its own line. When multi line
|
/// each match. Each match is printed on its own line. When multi line
|
||||||
/// search is enabled, then matches spanning multiple lines are printed
|
/// search is enabled, then matches spanning multiple lines are printed
|
||||||
@@ -528,6 +536,7 @@ impl<W: WriteColor> Standard<W> {
|
|||||||
path: None,
|
path: None,
|
||||||
start_time: Instant::now(),
|
start_time: Instant::now(),
|
||||||
match_count: 0,
|
match_count: 0,
|
||||||
|
in_file_index: 0,
|
||||||
binary_byte_offset: None,
|
binary_byte_offset: None,
|
||||||
stats,
|
stats,
|
||||||
needs_match_granularity,
|
needs_match_granularity,
|
||||||
@@ -564,6 +573,7 @@ impl<W: WriteColor> Standard<W> {
|
|||||||
path: Some(ppath),
|
path: Some(ppath),
|
||||||
start_time: Instant::now(),
|
start_time: Instant::now(),
|
||||||
match_count: 0,
|
match_count: 0,
|
||||||
|
in_file_index: 0,
|
||||||
binary_byte_offset: None,
|
binary_byte_offset: None,
|
||||||
stats,
|
stats,
|
||||||
needs_match_granularity,
|
needs_match_granularity,
|
||||||
@@ -644,6 +654,7 @@ pub struct StandardSink<'p, 's, M: Matcher, W> {
|
|||||||
path: Option<PrinterPath<'p>>,
|
path: Option<PrinterPath<'p>>,
|
||||||
start_time: Instant,
|
start_time: Instant,
|
||||||
match_count: u64,
|
match_count: u64,
|
||||||
|
in_file_index: u64,
|
||||||
binary_byte_offset: Option<u64>,
|
binary_byte_offset: Option<u64>,
|
||||||
stats: Option<Stats>,
|
stats: Option<Stats>,
|
||||||
needs_match_granularity: bool,
|
needs_match_granularity: bool,
|
||||||
@@ -769,6 +780,7 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for StandardSink<'p, 's, M, W> {
|
|||||||
mat: &SinkMatch<'_>,
|
mat: &SinkMatch<'_>,
|
||||||
) -> Result<bool, io::Error> {
|
) -> Result<bool, io::Error> {
|
||||||
self.match_count += 1;
|
self.match_count += 1;
|
||||||
|
self.in_file_index += 1;
|
||||||
|
|
||||||
self.record_matches(
|
self.record_matches(
|
||||||
searcher,
|
searcher,
|
||||||
@@ -842,6 +854,7 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for StandardSink<'p, 's, M, W> {
|
|||||||
self.standard.wtr.borrow_mut().reset_count();
|
self.standard.wtr.borrow_mut().reset_count();
|
||||||
self.start_time = Instant::now();
|
self.start_time = Instant::now();
|
||||||
self.match_count = 0;
|
self.match_count = 0;
|
||||||
|
self.in_file_index = 0;
|
||||||
self.binary_byte_offset = None;
|
self.binary_byte_offset = None;
|
||||||
Ok(true)
|
Ok(true)
|
||||||
}
|
}
|
||||||
@@ -956,6 +969,7 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
|
|||||||
self.sunk.absolute_byte_offset(),
|
self.sunk.absolute_byte_offset(),
|
||||||
self.sunk.line_number(),
|
self.sunk.line_number(),
|
||||||
None,
|
None,
|
||||||
|
self.in_file_index(),
|
||||||
)?;
|
)?;
|
||||||
self.write_line(self.sunk.bytes())
|
self.write_line(self.sunk.bytes())
|
||||||
}
|
}
|
||||||
@@ -981,6 +995,7 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
|
|||||||
absolute_byte_offset,
|
absolute_byte_offset,
|
||||||
self.sunk.line_number().map(|n| n + i as u64),
|
self.sunk.line_number().map(|n| n + i as u64),
|
||||||
None,
|
None,
|
||||||
|
self.in_file_index(),
|
||||||
)?;
|
)?;
|
||||||
absolute_byte_offset += line.len() as u64;
|
absolute_byte_offset += line.len() as u64;
|
||||||
|
|
||||||
@@ -1001,6 +1016,7 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
|
|||||||
self.sunk.absolute_byte_offset() + m.start() as u64,
|
self.sunk.absolute_byte_offset() + m.start() as u64,
|
||||||
self.sunk.line_number(),
|
self.sunk.line_number(),
|
||||||
Some(m.start() as u64 + 1),
|
Some(m.start() as u64 + 1),
|
||||||
|
self.in_file_index(),
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
let buf = &self.sunk.bytes()[m];
|
let buf = &self.sunk.bytes()[m];
|
||||||
@@ -1012,6 +1028,7 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
|
|||||||
self.sunk.absolute_byte_offset() + m.start() as u64,
|
self.sunk.absolute_byte_offset() + m.start() as u64,
|
||||||
self.sunk.line_number(),
|
self.sunk.line_number(),
|
||||||
Some(m.start() as u64 + 1),
|
Some(m.start() as u64 + 1),
|
||||||
|
self.in_file_index(),
|
||||||
)?;
|
)?;
|
||||||
self.write_colored_line(&[m], self.sunk.bytes())?;
|
self.write_colored_line(&[m], self.sunk.bytes())?;
|
||||||
}
|
}
|
||||||
@@ -1020,6 +1037,7 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
|
|||||||
self.sunk.absolute_byte_offset(),
|
self.sunk.absolute_byte_offset(),
|
||||||
self.sunk.line_number(),
|
self.sunk.line_number(),
|
||||||
Some(self.sunk.matches()[0].start() as u64 + 1),
|
Some(self.sunk.matches()[0].start() as u64 + 1),
|
||||||
|
self.in_file_index(),
|
||||||
)?;
|
)?;
|
||||||
self.write_colored_line(self.sunk.matches(), self.sunk.bytes())?;
|
self.write_colored_line(self.sunk.matches(), self.sunk.bytes())?;
|
||||||
}
|
}
|
||||||
@@ -1048,6 +1066,7 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
|
|||||||
self.sunk.absolute_byte_offset() + line.start() as u64,
|
self.sunk.absolute_byte_offset() + line.start() as u64,
|
||||||
self.sunk.line_number().map(|n| n + count),
|
self.sunk.line_number().map(|n| n + count),
|
||||||
Some(matches[0].start() as u64 + 1),
|
Some(matches[0].start() as u64 + 1),
|
||||||
|
self.in_file_index(),
|
||||||
)?;
|
)?;
|
||||||
count += 1;
|
count += 1;
|
||||||
self.trim_ascii_prefix(bytes, &mut line);
|
self.trim_ascii_prefix(bytes, &mut line);
|
||||||
@@ -1093,6 +1112,7 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
|
|||||||
self.sunk.absolute_byte_offset() + m.start() as u64,
|
self.sunk.absolute_byte_offset() + m.start() as u64,
|
||||||
self.sunk.line_number().map(|n| n + count),
|
self.sunk.line_number().map(|n| n + count),
|
||||||
Some(m.start() as u64 + 1),
|
Some(m.start() as u64 + 1),
|
||||||
|
self.in_file_index(),
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
let this_line = line.with_end(upto);
|
let this_line = line.with_end(upto);
|
||||||
@@ -1131,6 +1151,7 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
|
|||||||
self.sunk.absolute_byte_offset() + line.start() as u64,
|
self.sunk.absolute_byte_offset() + line.start() as u64,
|
||||||
self.sunk.line_number().map(|n| n + count),
|
self.sunk.line_number().map(|n| n + count),
|
||||||
Some(m.start().saturating_sub(line.start()) as u64 + 1),
|
Some(m.start().saturating_sub(line.start()) as u64 + 1),
|
||||||
|
self.in_file_index(),
|
||||||
)?;
|
)?;
|
||||||
count += 1;
|
count += 1;
|
||||||
self.trim_line_terminator(bytes, &mut line);
|
self.trim_line_terminator(bytes, &mut line);
|
||||||
@@ -1178,10 +1199,11 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
|
|||||||
absolute_byte_offset: u64,
|
absolute_byte_offset: u64,
|
||||||
line_number: Option<u64>,
|
line_number: Option<u64>,
|
||||||
column: Option<u64>,
|
column: Option<u64>,
|
||||||
|
in_file_index: Option<u64>,
|
||||||
) -> io::Result<()> {
|
) -> io::Result<()> {
|
||||||
let mut prelude = PreludeWriter::new(self);
|
let mut prelude = PreludeWriter::new(self);
|
||||||
prelude.start(line_number, column)?;
|
prelude.start(line_number, column)?;
|
||||||
prelude.write_path()?;
|
prelude.write_path(in_file_index)?;
|
||||||
prelude.write_line_number(line_number)?;
|
prelude.write_line_number(line_number)?;
|
||||||
prelude.write_column_number(column)?;
|
prelude.write_column_number(column)?;
|
||||||
prelude.write_byte_offset(absolute_byte_offset)?;
|
prelude.write_byte_offset(absolute_byte_offset)?;
|
||||||
@@ -1532,6 +1554,14 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
|
|||||||
self.sunk.context_kind().is_some()
|
self.sunk.context_kind().is_some()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn in_file_index(&self) -> Option<u64> {
|
||||||
|
if self.is_context() || !self.config().in_file_index {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(self.sink.in_file_index)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Return the underlying configuration for this printer.
|
/// Return the underlying configuration for this printer.
|
||||||
fn config(&self) -> &'a Config {
|
fn config(&self) -> &'a Config {
|
||||||
&self.sink.standard.config
|
&self.sink.standard.config
|
||||||
@@ -1657,16 +1687,27 @@ impl<'a, M: Matcher, W: WriteColor> PreludeWriter<'a, M, W> {
|
|||||||
/// separator. (If a path terminator is set, then that is used instead of
|
/// separator. (If a path terminator is set, then that is used instead of
|
||||||
/// the field separator.)
|
/// the field separator.)
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn write_path(&mut self) -> io::Result<()> {
|
fn write_path(&mut self, in_file_index: Option<u64>) -> io::Result<()> {
|
||||||
// The prelude doesn't handle headings, only what comes before a match
|
// The prelude doesn't handle headings, only what comes before a match
|
||||||
// on the same line. So if we are emitting paths in headings, we should
|
// on the same line. So if we are emitting paths in headings, we should
|
||||||
// not do it here on each line.
|
// not do it here on each line.
|
||||||
if self.config().heading {
|
if self.config().heading && in_file_index.is_none() {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
let path = self.std.path();
|
||||||
|
if path.is_none() && in_file_index.is_none() {
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
let Some(path) = self.std.path() else { return Ok(()) };
|
|
||||||
self.write_separator()?;
|
self.write_separator()?;
|
||||||
self.std.write_path(path)?;
|
if let Some(path) = path {
|
||||||
|
self.std.write_path(path)?;
|
||||||
|
}
|
||||||
|
if let Some(index) = in_file_index {
|
||||||
|
self.std.write_spec(self.config().colors.path(), b"[")?;
|
||||||
|
let n = DecimalFormatter::new(index);
|
||||||
|
self.std.write_spec(self.config().colors.path(), n.as_bytes())?;
|
||||||
|
self.std.write_spec(self.config().colors.path(), b"]")?;
|
||||||
|
}
|
||||||
|
|
||||||
self.next_separator = if self.config().path_terminator.is_some() {
|
self.next_separator = if self.config().path_terminator.is_some() {
|
||||||
PreludeSeparator::PathTerminator
|
PreludeSeparator::PathTerminator
|
||||||
|
|||||||
@@ -212,6 +212,18 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
|||||||
consumed
|
consumed
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub(crate) fn advance_buffer(&mut self, buf: &[u8], consumed: usize) {
|
||||||
|
if consumed == 0 {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
self.count_lines(buf, consumed);
|
||||||
|
self.absolute_byte_offset += consumed as u64;
|
||||||
|
self.last_line_counted = 0;
|
||||||
|
self.last_line_visited =
|
||||||
|
self.last_line_visited.saturating_sub(consumed);
|
||||||
|
self.set_pos(self.pos().saturating_sub(consumed));
|
||||||
|
}
|
||||||
|
|
||||||
pub(crate) fn detect_binary(
|
pub(crate) fn detect_binary(
|
||||||
&mut self,
|
&mut self,
|
||||||
buf: &[u8],
|
buf: &[u8],
|
||||||
|
|||||||
@@ -1,7 +1,9 @@
|
|||||||
use grep_matcher::Matcher;
|
use grep_matcher::Matcher;
|
||||||
|
|
||||||
|
use std::collections::VecDeque;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
line_buffer::{DEFAULT_BUFFER_CAPACITY, LineBufferReader},
|
line_buffer::{DEFAULT_BUFFER_CAPACITY, LineBufferReader, alloc_error},
|
||||||
lines::{self, LineStep},
|
lines::{self, LineStep},
|
||||||
searcher::{Config, Range, Searcher, core::Core},
|
searcher::{Config, Range, Searcher, core::Core},
|
||||||
sink::{Sink, SinkError},
|
sink::{Sink, SinkError},
|
||||||
@@ -138,6 +140,348 @@ impl<'s, M: Matcher, S: Sink> SliceByLine<'s, M, S> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub(crate) struct WindowedMultiLine<'s, M, S> {
|
||||||
|
config: &'s Config,
|
||||||
|
core: Core<'s, M, S>,
|
||||||
|
window_lines: usize,
|
||||||
|
buf: Vec<u8>,
|
||||||
|
buf_start: usize,
|
||||||
|
line_lens: VecDeque<usize>,
|
||||||
|
abs_start: u64,
|
||||||
|
current_index: usize,
|
||||||
|
eof: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'s, M: Matcher, S: Sink> WindowedMultiLine<'s, M, S> {
|
||||||
|
pub(crate) fn new(
|
||||||
|
searcher: &'s Searcher,
|
||||||
|
matcher: M,
|
||||||
|
window_lines: usize,
|
||||||
|
write_to: S,
|
||||||
|
) -> WindowedMultiLine<'s, M, S> {
|
||||||
|
debug_assert!(searcher.multi_line_with_matcher(&matcher));
|
||||||
|
debug_assert!(window_lines > 0);
|
||||||
|
|
||||||
|
WindowedMultiLine {
|
||||||
|
config: &searcher.config,
|
||||||
|
core: Core::new(searcher, matcher, write_to, true),
|
||||||
|
window_lines,
|
||||||
|
buf: Vec::new(),
|
||||||
|
buf_start: 0,
|
||||||
|
line_lens: VecDeque::new(),
|
||||||
|
abs_start: 0,
|
||||||
|
current_index: 0,
|
||||||
|
eof: false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn run_reader<R: std::io::Read>(
|
||||||
|
mut self,
|
||||||
|
mut rdr: LineBufferReader<'s, R>,
|
||||||
|
) -> Result<(), S::Error> {
|
||||||
|
if self.core.begin()? {
|
||||||
|
let mut already_binary = rdr.binary_byte_offset().is_some();
|
||||||
|
while self.fill_reader(&mut rdr, &mut already_binary)?
|
||||||
|
|| !self.line_lens.is_empty()
|
||||||
|
{
|
||||||
|
if !self.process_current_line()? {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let byte_count = self.byte_count();
|
||||||
|
let binary_byte_offset = self.core.binary_byte_offset();
|
||||||
|
self.core.finish(byte_count, binary_byte_offset)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn run_slice(mut self, slice: &'s [u8]) -> Result<(), S::Error> {
|
||||||
|
if self.core.begin()? {
|
||||||
|
let binary_upto =
|
||||||
|
std::cmp::min(slice.len(), DEFAULT_BUFFER_CAPACITY);
|
||||||
|
let binary_range = Range::new(0, binary_upto);
|
||||||
|
if !self.core.detect_binary(slice, &binary_range)? {
|
||||||
|
let mut stepper = LineStep::new(
|
||||||
|
self.config.line_term.as_byte(),
|
||||||
|
0,
|
||||||
|
slice.len(),
|
||||||
|
);
|
||||||
|
while let Some(line) = stepper.next_match(slice) {
|
||||||
|
self.push_line(&slice[line])?;
|
||||||
|
}
|
||||||
|
self.eof = true;
|
||||||
|
while !self.line_lens.is_empty() {
|
||||||
|
if !self.process_current_line()? {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let byte_count = self.byte_count();
|
||||||
|
let binary_byte_offset = self.core.binary_byte_offset();
|
||||||
|
self.core.finish(byte_count, binary_byte_offset)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn fill_reader<R: std::io::Read>(
|
||||||
|
&mut self,
|
||||||
|
rdr: &mut LineBufferReader<'s, R>,
|
||||||
|
already_binary: &mut bool,
|
||||||
|
) -> Result<bool, S::Error> {
|
||||||
|
while !self.eof
|
||||||
|
&& self.line_lens.len() < self.current_index + self.window_lines
|
||||||
|
{
|
||||||
|
let didread = match rdr.fill() {
|
||||||
|
Err(err) => return Err(S::Error::error_io(err)),
|
||||||
|
Ok(didread) => didread,
|
||||||
|
};
|
||||||
|
if !*already_binary {
|
||||||
|
if let Some(offset) = rdr.binary_byte_offset() {
|
||||||
|
*already_binary = true;
|
||||||
|
if !self.core.binary_data(offset)? {
|
||||||
|
self.eof = true;
|
||||||
|
return Ok(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !didread {
|
||||||
|
self.eof = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
let buf = rdr.buffer();
|
||||||
|
let mut stepper = LineStep::new(
|
||||||
|
self.config.line_term.as_byte(),
|
||||||
|
0,
|
||||||
|
buf.len(),
|
||||||
|
);
|
||||||
|
while let Some(line) = stepper.next_match(buf) {
|
||||||
|
let bytes = &buf[line];
|
||||||
|
self.push_line(bytes)?;
|
||||||
|
}
|
||||||
|
rdr.consume(buf.len());
|
||||||
|
}
|
||||||
|
Ok(!self.eof)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn push_line(&mut self, line: &[u8]) -> Result<(), S::Error> {
|
||||||
|
self.buf.extend_from_slice(line);
|
||||||
|
self.line_lens.push_back(line.len());
|
||||||
|
if let Some(limit) = self.config.heap_limit {
|
||||||
|
let used = self.buf.len() - self.buf_start;
|
||||||
|
if used > limit {
|
||||||
|
return Err(S::Error::error_io(alloc_error(limit)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn process_current_line(&mut self) -> Result<bool, S::Error> {
|
||||||
|
if self.current_index >= self.line_lens.len() {
|
||||||
|
return Ok(false);
|
||||||
|
}
|
||||||
|
let window_end =
|
||||||
|
std::cmp::min(self.line_lens.len(), self.current_index + self.window_lines);
|
||||||
|
let window_start_off = self.line_offset(self.current_index);
|
||||||
|
let window_end_off = self.line_offset(window_end);
|
||||||
|
let line0_len = self.line_lens[self.current_index];
|
||||||
|
|
||||||
|
{
|
||||||
|
let buffer = &self.buf[self.buf_start..];
|
||||||
|
let window_bytes =
|
||||||
|
&self.buf[self.buf_start + window_start_off
|
||||||
|
..self.buf_start + window_end_off];
|
||||||
|
if self.config.invert_match {
|
||||||
|
if !sink_inverted_line(
|
||||||
|
&mut self.core,
|
||||||
|
self.config,
|
||||||
|
buffer,
|
||||||
|
window_bytes,
|
||||||
|
window_start_off,
|
||||||
|
line0_len,
|
||||||
|
)? {
|
||||||
|
return Ok(false);
|
||||||
|
}
|
||||||
|
} else if !sink_matched_line(
|
||||||
|
&mut self.core,
|
||||||
|
self.config,
|
||||||
|
buffer,
|
||||||
|
window_bytes,
|
||||||
|
window_start_off,
|
||||||
|
line0_len,
|
||||||
|
)? {
|
||||||
|
return Ok(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
let drop_upto = window_start_off + line0_len;
|
||||||
|
if self.config.passthru {
|
||||||
|
if !self.core.other_context_by_line(buffer, drop_upto)? {
|
||||||
|
return Ok(false);
|
||||||
|
}
|
||||||
|
} else if !self.core.after_context_by_line(buffer, drop_upto)? {
|
||||||
|
return Ok(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
self.current_index += 1;
|
||||||
|
if self.current_index > self.config.before_context {
|
||||||
|
let drop_len = self.line_lens.pop_front().unwrap();
|
||||||
|
self.shift_buffer(drop_len);
|
||||||
|
self.current_index -= 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if self.eof && self.current_index >= self.line_lens.len() {
|
||||||
|
let buffer = &self.buf[self.buf_start..];
|
||||||
|
if self.config.passthru {
|
||||||
|
if !self.core.other_context_by_line(buffer, buffer.len())? {
|
||||||
|
return Ok(false);
|
||||||
|
}
|
||||||
|
} else if !self.core.after_context_by_line(buffer, buffer.len())? {
|
||||||
|
return Ok(false);
|
||||||
|
}
|
||||||
|
return Ok(false);
|
||||||
|
}
|
||||||
|
Ok(true)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn line_offset(&self, idx: usize) -> usize {
|
||||||
|
self.line_lens.iter().take(idx).sum()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn shift_buffer(&mut self, consumed: usize) {
|
||||||
|
let buffer = &self.buf[self.buf_start..];
|
||||||
|
self.core.advance_buffer(buffer, consumed);
|
||||||
|
self.buf_start += consumed;
|
||||||
|
self.abs_start += consumed as u64;
|
||||||
|
if self.buf_start > 0 && self.buf_start > self.buf.len() / 2 {
|
||||||
|
self.buf.copy_within(self.buf_start.., 0);
|
||||||
|
let new_len = self.buf.len() - self.buf_start;
|
||||||
|
self.buf.truncate(new_len);
|
||||||
|
self.buf_start = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn byte_count(&mut self) -> u64 {
|
||||||
|
match self.core.binary_byte_offset() {
|
||||||
|
Some(offset) if offset < self.core.pos() as u64 => offset,
|
||||||
|
_ => self.abs_start + (self.buf.len() - self.buf_start) as u64,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn sink_matched_line<M: Matcher, S: Sink>(
|
||||||
|
core: &mut Core<'_, M, S>,
|
||||||
|
config: &Config,
|
||||||
|
buffer: &[u8],
|
||||||
|
window_bytes: &[u8],
|
||||||
|
window_start_off: usize,
|
||||||
|
line0_len: usize,
|
||||||
|
) -> Result<bool, S::Error> {
|
||||||
|
let mut pos = 0;
|
||||||
|
let mut last_match: Option<Range> = None;
|
||||||
|
while let Some(mat) = find_in_window(core, window_bytes, pos)? {
|
||||||
|
if mat.start() >= line0_len {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
let line = lines::locate(
|
||||||
|
window_bytes,
|
||||||
|
config.line_term.as_byte(),
|
||||||
|
mat,
|
||||||
|
)
|
||||||
|
.offset(window_start_off);
|
||||||
|
match last_match.take() {
|
||||||
|
None => {
|
||||||
|
last_match = Some(line);
|
||||||
|
}
|
||||||
|
Some(last) => {
|
||||||
|
if last.end() >= line.start() {
|
||||||
|
last_match = Some(last.with_end(line.end()));
|
||||||
|
} else {
|
||||||
|
if !sink_context(core, config, buffer, &last)? {
|
||||||
|
return Ok(false);
|
||||||
|
}
|
||||||
|
if !core.matched(buffer, &last)? {
|
||||||
|
return Ok(false);
|
||||||
|
}
|
||||||
|
last_match = Some(line);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pos = mat.end();
|
||||||
|
if mat.is_empty() && pos < window_bytes.len() {
|
||||||
|
pos += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if let Some(last) = last_match.take() {
|
||||||
|
if !sink_context(core, config, buffer, &last)? {
|
||||||
|
return Ok(false);
|
||||||
|
}
|
||||||
|
if !core.matched(buffer, &last)? {
|
||||||
|
return Ok(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(true)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn sink_inverted_line<M: Matcher, S: Sink>(
|
||||||
|
core: &mut Core<'_, M, S>,
|
||||||
|
config: &Config,
|
||||||
|
buffer: &[u8],
|
||||||
|
window_bytes: &[u8],
|
||||||
|
window_start_off: usize,
|
||||||
|
line0_len: usize,
|
||||||
|
) -> Result<bool, S::Error> {
|
||||||
|
let mut pos = 0;
|
||||||
|
while let Some(mat) = find_in_window(core, window_bytes, pos)? {
|
||||||
|
if mat.start() >= line0_len {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if mat.start() < line0_len {
|
||||||
|
return Ok(true);
|
||||||
|
}
|
||||||
|
pos = mat.end();
|
||||||
|
if mat.is_empty() && pos < window_bytes.len() {
|
||||||
|
pos += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let line = Range::new(window_start_off, window_start_off + line0_len);
|
||||||
|
if !sink_context(core, config, buffer, &line)? {
|
||||||
|
return Ok(false);
|
||||||
|
}
|
||||||
|
if !core.matched(buffer, &line)? {
|
||||||
|
return Ok(false);
|
||||||
|
}
|
||||||
|
Ok(true)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn find_in_window<M: Matcher, S: Sink>(
|
||||||
|
core: &mut Core<'_, M, S>,
|
||||||
|
window_bytes: &[u8],
|
||||||
|
pos: usize,
|
||||||
|
) -> Result<Option<Range>, S::Error> {
|
||||||
|
core.find(&window_bytes[pos..])
|
||||||
|
.map(|m| m.map(|m| m.offset(pos)))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn sink_context<M: Matcher, S: Sink>(
|
||||||
|
core: &mut Core<'_, M, S>,
|
||||||
|
config: &Config,
|
||||||
|
buffer: &[u8],
|
||||||
|
range: &Range,
|
||||||
|
) -> Result<bool, S::Error> {
|
||||||
|
if config.passthru {
|
||||||
|
if !core.other_context_by_line(buffer, range.start())? {
|
||||||
|
return Ok(false);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if !core.after_context_by_line(buffer, range.start())? {
|
||||||
|
return Ok(false);
|
||||||
|
}
|
||||||
|
if !core.before_context_by_line(buffer, range.start())? {
|
||||||
|
return Ok(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(true)
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub(crate) struct MultiLine<'s, M, S> {
|
pub(crate) struct MultiLine<'s, M, S> {
|
||||||
config: &'s Config,
|
config: &'s Config,
|
||||||
@@ -518,6 +862,37 @@ byte count:366
|
|||||||
.test();
|
.test();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn multi_line_window_limits_match() {
|
||||||
|
let haystack = "a\nb\nc\nd\n";
|
||||||
|
let matcher = RegexMatcher::new("a\nb\nc");
|
||||||
|
|
||||||
|
let mut builder = SearcherBuilder::new();
|
||||||
|
builder.multi_line(true).multiline_window(Some(2)).line_number(false);
|
||||||
|
let mut sink = KitchenSink::new();
|
||||||
|
let mut searcher = builder.build();
|
||||||
|
searcher
|
||||||
|
.search_slice(&matcher, haystack.as_bytes(), &mut sink)
|
||||||
|
.unwrap();
|
||||||
|
let got = String::from_utf8(sink.as_bytes().to_vec()).unwrap();
|
||||||
|
let exp = format!("\nbyte count:{}\n", haystack.len());
|
||||||
|
assert_eq!(exp, got);
|
||||||
|
|
||||||
|
let mut builder = SearcherBuilder::new();
|
||||||
|
builder.multi_line(true).multiline_window(Some(3)).line_number(false);
|
||||||
|
let mut sink = KitchenSink::new();
|
||||||
|
let mut searcher = builder.build();
|
||||||
|
searcher
|
||||||
|
.search_slice(&matcher, haystack.as_bytes(), &mut sink)
|
||||||
|
.unwrap();
|
||||||
|
let exp = format!(
|
||||||
|
"0:a\n2:b\n4:c\n\nbyte count:{}\n",
|
||||||
|
haystack.len()
|
||||||
|
);
|
||||||
|
let got = String::from_utf8(sink.as_bytes().to_vec()).unwrap();
|
||||||
|
assert_eq!(exp, got);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn multi_line_overlap2() {
|
fn multi_line_overlap2() {
|
||||||
let haystack = "xxx\nabc\ndefabc\ndefxxx\nxxx";
|
let haystack = "xxx\nabc\ndefabc\ndefxxx\nxxx";
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ use crate::{
|
|||||||
self, BufferAllocation, DEFAULT_BUFFER_CAPACITY, LineBuffer,
|
self, BufferAllocation, DEFAULT_BUFFER_CAPACITY, LineBuffer,
|
||||||
LineBufferBuilder, LineBufferReader, alloc_error,
|
LineBufferBuilder, LineBufferReader, alloc_error,
|
||||||
},
|
},
|
||||||
searcher::glue::{MultiLine, ReadByLine, SliceByLine},
|
searcher::glue::{MultiLine, ReadByLine, SliceByLine, WindowedMultiLine},
|
||||||
sink::{Sink, SinkError},
|
sink::{Sink, SinkError},
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -172,6 +172,8 @@ pub struct Config {
|
|||||||
binary: BinaryDetection,
|
binary: BinaryDetection,
|
||||||
/// Whether to enable matching across multiple lines.
|
/// Whether to enable matching across multiple lines.
|
||||||
multi_line: bool,
|
multi_line: bool,
|
||||||
|
/// The maximum number of lines a multi-line match may span.
|
||||||
|
multiline_window: Option<usize>,
|
||||||
/// An encoding that, when present, causes the searcher to transcode all
|
/// An encoding that, when present, causes the searcher to transcode all
|
||||||
/// input from the encoding to UTF-8.
|
/// input from the encoding to UTF-8.
|
||||||
encoding: Option<Encoding>,
|
encoding: Option<Encoding>,
|
||||||
@@ -197,6 +199,7 @@ impl Default for Config {
|
|||||||
mmap: MmapChoice::default(),
|
mmap: MmapChoice::default(),
|
||||||
binary: BinaryDetection::default(),
|
binary: BinaryDetection::default(),
|
||||||
multi_line: false,
|
multi_line: false,
|
||||||
|
multiline_window: None,
|
||||||
encoding: None,
|
encoding: None,
|
||||||
bom_sniffing: true,
|
bom_sniffing: true,
|
||||||
stop_on_nonmatch: false,
|
stop_on_nonmatch: false,
|
||||||
@@ -390,6 +393,15 @@ impl SearcherBuilder {
|
|||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Limit multi-line matches to a window of at most `line_count` lines.
|
||||||
|
pub fn multiline_window(
|
||||||
|
&mut self,
|
||||||
|
line_count: Option<usize>,
|
||||||
|
) -> &mut SearcherBuilder {
|
||||||
|
self.config.multiline_window = line_count;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
/// Whether to include a fixed number of lines after every match.
|
/// Whether to include a fixed number of lines after every match.
|
||||||
///
|
///
|
||||||
/// When this is set to a non-zero number, then the searcher will report
|
/// When this is set to a non-zero number, then the searcher will report
|
||||||
@@ -694,6 +706,13 @@ impl Searcher {
|
|||||||
// enabled. This pre-allocates a buffer roughly the size of the file,
|
// enabled. This pre-allocates a buffer roughly the size of the file,
|
||||||
// which isn't possible when searching an arbitrary std::io::Read.
|
// which isn't possible when searching an arbitrary std::io::Read.
|
||||||
if self.multi_line_with_matcher(&matcher) {
|
if self.multi_line_with_matcher(&matcher) {
|
||||||
|
if self.config.multiline_window.is_some() {
|
||||||
|
log::trace!(
|
||||||
|
"{:?}: searching via windowed multiline strategy",
|
||||||
|
path
|
||||||
|
);
|
||||||
|
return self.search_reader(matcher, file, write_to);
|
||||||
|
}
|
||||||
log::trace!(
|
log::trace!(
|
||||||
"{:?}: reading entire file on to heap for mulitline",
|
"{:?}: reading entire file on to heap for mulitline",
|
||||||
path
|
path
|
||||||
@@ -744,6 +763,18 @@ impl Searcher {
|
|||||||
.map_err(S::Error::error_io)?;
|
.map_err(S::Error::error_io)?;
|
||||||
|
|
||||||
if self.multi_line_with_matcher(&matcher) {
|
if self.multi_line_with_matcher(&matcher) {
|
||||||
|
if let Some(window_lines) = self.config.multiline_window {
|
||||||
|
let mut line_buffer = self.line_buffer.borrow_mut();
|
||||||
|
let rdr = LineBufferReader::new(decoder, &mut *line_buffer);
|
||||||
|
log::trace!("generic reader: searching via windowed multiline");
|
||||||
|
return WindowedMultiLine::new(
|
||||||
|
self,
|
||||||
|
matcher,
|
||||||
|
window_lines,
|
||||||
|
write_to,
|
||||||
|
)
|
||||||
|
.run_reader(rdr);
|
||||||
|
}
|
||||||
log::trace!(
|
log::trace!(
|
||||||
"generic reader: reading everything to heap for multiline"
|
"generic reader: reading everything to heap for multiline"
|
||||||
);
|
);
|
||||||
@@ -786,6 +817,16 @@ impl Searcher {
|
|||||||
return self.search_reader(matcher, slice, write_to);
|
return self.search_reader(matcher, slice, write_to);
|
||||||
}
|
}
|
||||||
if self.multi_line_with_matcher(&matcher) {
|
if self.multi_line_with_matcher(&matcher) {
|
||||||
|
if let Some(window_lines) = self.config.multiline_window {
|
||||||
|
log::trace!("slice reader: searching via windowed multiline");
|
||||||
|
return WindowedMultiLine::new(
|
||||||
|
self,
|
||||||
|
matcher,
|
||||||
|
window_lines,
|
||||||
|
write_to,
|
||||||
|
)
|
||||||
|
.run_slice(slice);
|
||||||
|
}
|
||||||
log::trace!("slice reader: searching via multiline strategy");
|
log::trace!("slice reader: searching via multiline strategy");
|
||||||
MultiLine::new(self, matcher, slice, write_to).run()
|
MultiLine::new(self, matcher, slice, write_to).run()
|
||||||
} else {
|
} else {
|
||||||
@@ -865,6 +906,12 @@ impl Searcher {
|
|||||||
self.config.multi_line
|
self.config.multi_line
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns the maximum number of lines a multi-line match may span.
|
||||||
|
#[inline]
|
||||||
|
pub fn multiline_window(&self) -> Option<usize> {
|
||||||
|
self.config.multiline_window
|
||||||
|
}
|
||||||
|
|
||||||
/// Returns true if and only if this searcher is configured to stop when it
|
/// Returns true if and only if this searcher is configured to stop when it
|
||||||
/// finds a non-matching line after a matching one.
|
/// finds a non-matching line after a matching one.
|
||||||
#[inline]
|
#[inline]
|
||||||
|
|||||||
Reference in New Issue
Block a user