grep: upgrade to regex-syntax 0.5

This update brings with it many bug fixes: * Better error messages are printed overall. We also include explicit call out for unsupported features like backreferences and look-around. * Regexes like `\s*{` no longer emit incomprehensible errors. * Unicode escape sequences, such as `\u{..}` are now supported. For the most part, this upgrade was done in a straight-forward way. We resist the urge to refactor the `grep` crate, in anticipation of it being rewritten anyway. Note that we removed the `--fixed-strings` suggestion whenever a regex syntax error occurs. In practice, I've found that it results in a lot of false positives, and I believe that its use is not as paramount now that regex parse errors are much more readable. Closes #268, Closes #395, Closes #702, Closes #853
2018-03-13 20:38:50 -04:00
parent c2e97cd858
commit cd08707c7c
9 changed files with 152 additions and 159 deletions
--- a/grep/src/nonl.rs
+++ b/grep/src/nonl.rs
@@ -1,4 +1,4 @@
-use syntax::Expr;
+use syntax::hir::{self, Hir, HirKind};

 use {Error, Result};

@@ -9,59 +9,66 @@ use {Error, Result};
 ///
 /// If `byte` is not an ASCII character (i.e., greater than `0x7F`), then this
 /// function panics.
-pub fn remove(expr: Expr, byte: u8) -> Result<Expr> {
-    // TODO(burntsushi): There is a bug in this routine where only `\n` is
-    // handled correctly. Namely, `AnyChar` and `AnyByte` need to be translated
-    // to proper character classes instead of the special `AnyCharNoNL` and
-    // `AnyByteNoNL` classes.
-    use syntax::Expr::*;
+pub fn remove(expr: Hir, byte: u8) -> Result<Hir> {
    assert!(byte <= 0x7F);
    let chr = byte as char;
    assert!(chr.len_utf8() == 1);

-    Ok(match expr {
-        Literal { chars, casei } => {
-            if chars.iter().position(|&c| c == chr).is_some() {
+    Ok(match expr.into_kind() {
+        HirKind::Empty => Hir::empty(),
+        HirKind::Literal(hir::Literal::Unicode(c)) => {
+            if c == chr {
                return Err(Error::LiteralNotAllowed(chr));
            }
-            Literal { chars: chars, casei: casei }
+            Hir::literal(hir::Literal::Unicode(c))
        }
-        LiteralBytes { bytes, casei } => {
-            if bytes.iter().position(|&b| b == byte).is_some() {
+        HirKind::Literal(hir::Literal::Byte(b)) => {
+            if b as char == chr {
                return Err(Error::LiteralNotAllowed(chr));
            }
-            LiteralBytes { bytes: bytes, casei: casei }
+            Hir::literal(hir::Literal::Byte(b))
        }
-        AnyChar => AnyCharNoNL,
-        AnyByte => AnyByteNoNL,
-        Class(mut cls) => {
-            cls.remove(chr);
-            Class(cls)
-        }
-        ClassBytes(mut cls) => {
-            cls.remove(byte);
-            ClassBytes(cls)
-        }
-        Group { e, i, name } => {
-            Group {
-                e: Box::new(remove(*e, byte)?),
-                i: i,
-                name: name,
+        HirKind::Class(hir::Class::Unicode(mut cls)) => {
+            let remove = hir::ClassUnicode::new(Some(
+                hir::ClassUnicodeRange::new(chr, chr),
+            ));
+            cls.difference(&remove);
+            if cls.iter().next().is_none() {
+                return Err(Error::LiteralNotAllowed(chr));
            }
+            Hir::class(hir::Class::Unicode(cls))
        }
-        Repeat { e, r, greedy } => {
-            Repeat {
-                e: Box::new(remove(*e, byte)?),
-                r: r,
-                greedy: greedy,
+        HirKind::Class(hir::Class::Bytes(mut cls)) => {
+            let remove = hir::ClassBytes::new(Some(
+                hir::ClassBytesRange::new(byte, byte),
+            ));
+            cls.difference(&remove);
+            if cls.iter().next().is_none() {
+                return Err(Error::LiteralNotAllowed(chr));
            }
+            Hir::class(hir::Class::Bytes(cls))
        }
-        Concat(exprs) => {
-            Concat(exprs.into_iter().map(|e| remove(e, byte)).collect::<Result<Vec<Expr>>>()?)
+        HirKind::Anchor(x) => Hir::anchor(x),
+        HirKind::WordBoundary(x) => Hir::word_boundary(x),
+        HirKind::Repetition(mut x) => {
+            x.hir = Box::new(remove(*x.hir, byte)?);
+            Hir::repetition(x)
        }
-        Alternate(exprs) => {
-            Alternate(exprs.into_iter().map(|e| remove(e, byte)).collect::<Result<Vec<Expr>>>()?)
+        HirKind::Group(mut x) => {
+            x.hir = Box::new(remove(*x.hir, byte)?);
+            Hir::group(x)
+        }
+        HirKind::Concat(xs) => {
+            let xs = xs.into_iter()
+                .map(|e| remove(e, byte))
+                .collect::<Result<Vec<Hir>>>()?;
+            Hir::concat(xs)
+        }
+        HirKind::Alternation(xs) => {
+            let xs = xs.into_iter()
+                .map(|e| remove(e, byte))
+                .collect::<Result<Vec<Hir>>>()?;
+            Hir::alternation(xs)
        }
-        e => e,
    })
 }