As suggested by @epage[1].
Ad hoc timings on my i7-12900K:
before cargo build: 4.91s
before cargo build release: 8.05s
after cargo build: 4.69s
after cargo build release: 7.83s
... pretty underwhelming if you ask me. Ah well. And on my M2 mac mini:
before cargo build: 6.18s
before cargo build release: 14.50s
after cargo build: 5.52s
after cargo build release: 13.44s
Still kind of underwhelming, but definitely better. It shaves a full
second off of compile times in release mode. I went back to my
i7-12900K, but passed `-j1` to `cargo build` to force single threaded
mode:
before cargo build: 19.44s
before cargo build release: 50.64s
after cargo build: 16.76s
after cargo build release: 48.00s
Which seems pretty consistent with the modest improvements above.
Looking at `cargo build --timings`, the beefiest chunk of time is spent
in compiling `regex-automata`, by far. This is fine because it's core
functionality. I wish a fast general purpose regex engine with its
internals exposed as a separately versioned library didn't require so
much code... Blech.
[1]: https://old.reddit.com/r/rust/comments/17rd8ww/faster_compilation_with_the_parallel_frontend_in/k8igjlg/
218 lines
6.9 KiB
Rust
218 lines
6.9 KiB
Rust
// This module defines the types we use for JSON serialization. We specifically
|
|
// omit deserialization, partially because there isn't a clear use case for
|
|
// them at this time, but also because deserialization will complicate things.
|
|
// Namely, the types below are designed in a way that permits JSON
|
|
// serialization with little or no allocation. Allocation is often quite
|
|
// convenient for deserialization however, so these types would become a bit
|
|
// more complex.
|
|
|
|
use std::{borrow::Cow, path::Path};
|
|
|
|
pub(crate) enum Message<'a> {
|
|
Begin(Begin<'a>),
|
|
End(End<'a>),
|
|
Match(Match<'a>),
|
|
Context(Context<'a>),
|
|
}
|
|
|
|
impl<'a> serde::Serialize for Message<'a> {
|
|
fn serialize<S: serde::Serializer>(
|
|
&self,
|
|
s: S,
|
|
) -> Result<S::Ok, S::Error> {
|
|
use serde::ser::SerializeStruct;
|
|
|
|
let mut state = s.serialize_struct("Message", 2)?;
|
|
match *self {
|
|
Message::Begin(ref msg) => {
|
|
state.serialize_field("type", &"begin")?;
|
|
state.serialize_field("data", msg)?;
|
|
}
|
|
Message::End(ref msg) => {
|
|
state.serialize_field("type", &"end")?;
|
|
state.serialize_field("data", msg)?;
|
|
}
|
|
Message::Match(ref msg) => {
|
|
state.serialize_field("type", &"match")?;
|
|
state.serialize_field("data", msg)?;
|
|
}
|
|
Message::Context(ref msg) => {
|
|
state.serialize_field("type", &"context")?;
|
|
state.serialize_field("data", msg)?;
|
|
}
|
|
}
|
|
state.end()
|
|
}
|
|
}
|
|
|
|
pub(crate) struct Begin<'a> {
|
|
pub(crate) path: Option<&'a Path>,
|
|
}
|
|
|
|
impl<'a> serde::Serialize for Begin<'a> {
|
|
fn serialize<S: serde::Serializer>(
|
|
&self,
|
|
s: S,
|
|
) -> Result<S::Ok, S::Error> {
|
|
use serde::ser::SerializeStruct;
|
|
|
|
let mut state = s.serialize_struct("Begin", 1)?;
|
|
state.serialize_field("path", &self.path.map(Data::from_path))?;
|
|
state.end()
|
|
}
|
|
}
|
|
|
|
pub(crate) struct End<'a> {
|
|
pub(crate) path: Option<&'a Path>,
|
|
pub(crate) binary_offset: Option<u64>,
|
|
pub(crate) stats: crate::stats::Stats,
|
|
}
|
|
|
|
impl<'a> serde::Serialize for End<'a> {
|
|
fn serialize<S: serde::Serializer>(
|
|
&self,
|
|
s: S,
|
|
) -> Result<S::Ok, S::Error> {
|
|
use serde::ser::SerializeStruct;
|
|
|
|
let mut state = s.serialize_struct("End", 3)?;
|
|
state.serialize_field("path", &self.path.map(Data::from_path))?;
|
|
state.serialize_field("binary_offset", &self.binary_offset)?;
|
|
state.serialize_field("stats", &self.stats)?;
|
|
state.end()
|
|
}
|
|
}
|
|
|
|
pub(crate) struct Match<'a> {
|
|
pub(crate) path: Option<&'a Path>,
|
|
pub(crate) lines: &'a [u8],
|
|
pub(crate) line_number: Option<u64>,
|
|
pub(crate) absolute_offset: u64,
|
|
pub(crate) submatches: &'a [SubMatch<'a>],
|
|
}
|
|
|
|
impl<'a> serde::Serialize for Match<'a> {
|
|
fn serialize<S: serde::Serializer>(
|
|
&self,
|
|
s: S,
|
|
) -> Result<S::Ok, S::Error> {
|
|
use serde::ser::SerializeStruct;
|
|
|
|
let mut state = s.serialize_struct("Match", 5)?;
|
|
state.serialize_field("path", &self.path.map(Data::from_path))?;
|
|
state.serialize_field("lines", &Data::from_bytes(self.lines))?;
|
|
state.serialize_field("line_number", &self.line_number)?;
|
|
state.serialize_field("absolute_offset", &self.absolute_offset)?;
|
|
state.serialize_field("submatches", &self.submatches)?;
|
|
state.end()
|
|
}
|
|
}
|
|
|
|
pub(crate) struct Context<'a> {
|
|
pub(crate) path: Option<&'a Path>,
|
|
pub(crate) lines: &'a [u8],
|
|
pub(crate) line_number: Option<u64>,
|
|
pub(crate) absolute_offset: u64,
|
|
pub(crate) submatches: &'a [SubMatch<'a>],
|
|
}
|
|
|
|
impl<'a> serde::Serialize for Context<'a> {
|
|
fn serialize<S: serde::Serializer>(
|
|
&self,
|
|
s: S,
|
|
) -> Result<S::Ok, S::Error> {
|
|
use serde::ser::SerializeStruct;
|
|
|
|
let mut state = s.serialize_struct("Context", 5)?;
|
|
state.serialize_field("path", &self.path.map(Data::from_path))?;
|
|
state.serialize_field("lines", &Data::from_bytes(self.lines))?;
|
|
state.serialize_field("line_number", &self.line_number)?;
|
|
state.serialize_field("absolute_offset", &self.absolute_offset)?;
|
|
state.serialize_field("submatches", &self.submatches)?;
|
|
state.end()
|
|
}
|
|
}
|
|
|
|
pub(crate) struct SubMatch<'a> {
|
|
pub(crate) m: &'a [u8],
|
|
pub(crate) start: usize,
|
|
pub(crate) end: usize,
|
|
}
|
|
|
|
impl<'a> serde::Serialize for SubMatch<'a> {
|
|
fn serialize<S: serde::Serializer>(
|
|
&self,
|
|
s: S,
|
|
) -> Result<S::Ok, S::Error> {
|
|
use serde::ser::SerializeStruct;
|
|
|
|
let mut state = s.serialize_struct("SubMatch", 3)?;
|
|
state.serialize_field("match", &Data::from_bytes(self.m))?;
|
|
state.serialize_field("start", &self.start)?;
|
|
state.serialize_field("end", &self.end)?;
|
|
state.end()
|
|
}
|
|
}
|
|
|
|
/// Data represents things that look like strings, but may actually not be
|
|
/// valid UTF-8. To handle this, `Data` is serialized as an object with one
|
|
/// of two keys: `text` (for valid UTF-8) or `bytes` (for invalid UTF-8).
|
|
///
|
|
/// The happy path is valid UTF-8, which streams right through as-is, since
|
|
/// it is natively supported by JSON. When invalid UTF-8 is found, then it is
|
|
/// represented as arbitrary bytes and base64 encoded.
|
|
#[derive(Clone, Debug, Hash, PartialEq, Eq)]
|
|
enum Data<'a> {
|
|
Text { text: Cow<'a, str> },
|
|
Bytes { bytes: &'a [u8] },
|
|
}
|
|
|
|
impl<'a> Data<'a> {
|
|
fn from_bytes(bytes: &[u8]) -> Data<'_> {
|
|
match std::str::from_utf8(bytes) {
|
|
Ok(text) => Data::Text { text: Cow::Borrowed(text) },
|
|
Err(_) => Data::Bytes { bytes },
|
|
}
|
|
}
|
|
|
|
#[cfg(unix)]
|
|
fn from_path(path: &Path) -> Data<'_> {
|
|
use std::os::unix::ffi::OsStrExt;
|
|
|
|
match path.to_str() {
|
|
Some(text) => Data::Text { text: Cow::Borrowed(text) },
|
|
None => Data::Bytes { bytes: path.as_os_str().as_bytes() },
|
|
}
|
|
}
|
|
|
|
#[cfg(not(unix))]
|
|
fn from_path(path: &Path) -> Data {
|
|
// Using lossy conversion means some paths won't round trip precisely,
|
|
// but it's not clear what we should actually do. Serde rejects
|
|
// non-UTF-8 paths, and OsStr's are serialized as a sequence of UTF-16
|
|
// code units on Windows. Neither seem appropriate for this use case,
|
|
// so we do the easy thing for now.
|
|
Data::Text { text: path.to_string_lossy() }
|
|
}
|
|
}
|
|
|
|
impl<'a> serde::Serialize for Data<'a> {
|
|
fn serialize<S: serde::Serializer>(
|
|
&self,
|
|
s: S,
|
|
) -> Result<S::Ok, S::Error> {
|
|
use serde::ser::SerializeStruct;
|
|
|
|
let mut state = s.serialize_struct("Data", 1)?;
|
|
match *self {
|
|
Data::Text { ref text } => state.serialize_field("text", text)?,
|
|
Data::Bytes { bytes } => {
|
|
use base64::engine::{general_purpose::STANDARD, Engine};
|
|
let encoded = STANDARD.encode(bytes);
|
|
state.serialize_field("bytes", &encoded)?;
|
|
}
|
|
}
|
|
state.end()
|
|
}
|
|
}
|