[tbb-commits] [tor-browser] 31/76: Bug 1758509 - Update regex crate to 1.5.5 r=emilio, a=RyanVM

gitolite role git at cupani.torproject.org
Wed Mar 30 20:39:59 UTC 2022


This is an automated email from the git hooks/post-receive script.

richard pushed a commit to branch tor-browser-91.8.0esr-11.0-1
in repository tor-browser.

commit 43f4ff7552d63d5e8327a82b6e64c19cc0663c49
Author: Jan-Erik Rediger <jrediger at mozilla.com>
AuthorDate: Wed Mar 9 08:43:35 2022 +0000

    Bug 1758509 - Update regex crate to 1.5.5 r=emilio, a=RyanVM
    
    Differential Revision: https://phabricator.services.mozilla.com/D140532
---
 Cargo.lock                                   |  4 +-
 third_party/rust/regex/.cargo-checksum.json  |  2 +-
 third_party/rust/regex/Cargo.lock            |  6 +--
 third_party/rust/regex/Cargo.toml            | 59 +++++++++++++++++------
 third_party/rust/regex/README.md             |  2 +-
 third_party/rust/regex/src/compile.rs        | 27 ++++++++++-
 third_party/rust/regex/src/dfa.rs            |  2 -
 third_party/rust/regex/src/re_unicode.rs     |  2 +-
 third_party/rust/regex/tests/test_default.rs | 70 ++++++++++++++++++++++++++++
 9 files changed, 149 insertions(+), 25 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 56122da06357a..b0cc79279b1ac 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -4049,9 +4049,9 @@ dependencies = [
 
 [[package]]
 name = "regex"
-version = "1.5.4"
+version = "1.5.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d07a8629359eb56f1e2fb1652bb04212c072a87ba68546a04065d525673ac461"
+checksum = "1a11647b6b25ff05a515cb92c365cec08801e83423a235b51e231e1808747286"
 dependencies = [
  "aho-corasick",
  "memchr",
diff --git a/third_party/rust/regex/.cargo-checksum.json b/third_party/rust/regex/.cargo-checksum.json
index 186d5c5b00515..912ddc14466e3 100644
--- a/third_party/rust/regex/.cargo-checksum.json
+++ b/third_party/rust/regex/.cargo-checksum.json
@@ -1 +1 @@
-{"files":{"CHANGELOG.md":"dc37d7389aa7f626a2a76f565b379378a63a043004674735e2b5859202b255ea","Cargo.lock":"b74b289188546209fb8b9024eb5cf7c43dc24b199cf2aaf2b11601ee24d9575d","Cargo.toml":"d5bf863bcab85700dc82190169003b7781f7985e51f7ba67a757fc2ba069ee6e","HACKING.md":"17818f7a17723608f6bdbe6388ad0a913d4f96f76a16649aaf4e274b1fa0ea97","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"6485b8ed310d3f0340bf1ad1f47645069ce4069dcc6bb46c7d5c6faf41de1 [...]
\ No newline at end of file
+{"files":{"CHANGELOG.md":"dc37d7389aa7f626a2a76f565b379378a63a043004674735e2b5859202b255ea","Cargo.lock":"68805a2737aad9b6520868b2f85dbe638c4efb9f4aef759226a129edb5940434","Cargo.toml":"5678757a6bafcac57c7a3b51655ef978901112eb0629976a402560db5f948f56","HACKING.md":"17818f7a17723608f6bdbe6388ad0a913d4f96f76a16649aaf4e274b1fa0ea97","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"6485b8ed310d3f0340bf1ad1f47645069ce4069dcc6bb46c7d5c6faf41de1 [...]
\ No newline at end of file
diff --git a/third_party/rust/regex/Cargo.lock b/third_party/rust/regex/Cargo.lock
index f2bbb5d850ab1..cc51538bf7ec1 100644
--- a/third_party/rust/regex/Cargo.lock
+++ b/third_party/rust/regex/Cargo.lock
@@ -36,9 +36,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
 
 [[package]]
 name = "libc"
-version = "0.2.80"
+version = "0.2.94"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4d58d1b70b004888f764dfbf6a26a3b0342a1632d33968e4a179d8011c760614"
+checksum = "18794a8ad5b29321f790b55d93dfba91e125cb1a9edbd4f8e3150acc771c1a5e"
 
 [[package]]
 name = "memchr"
@@ -75,7 +75,7 @@ dependencies = [
 
 [[package]]
 name = "regex"
-version = "1.5.4"
+version = "1.5.5"
 dependencies = [
  "aho-corasick",
  "lazy_static",
diff --git a/third_party/rust/regex/Cargo.toml b/third_party/rust/regex/Cargo.toml
index 260acec69df73..d28f676ded14d 100644
--- a/third_party/rust/regex/Cargo.toml
+++ b/third_party/rust/regex/Cargo.toml
@@ -3,27 +3,33 @@
 # When uploading crates to the registry Cargo will automatically
 # "normalize" Cargo.toml files for maximal compatibility
 # with all versions of Cargo and also rewrite `path` dependencies
-# to registry (e.g., crates.io) dependencies
+# to registry (e.g., crates.io) dependencies.
 #
-# If you believe there's an error in this file please file an
-# issue against the rust-lang/cargo repository. If you're
-# editing this file be aware that the upstream Cargo.toml
-# will likely look very different (and much more reasonable)
+# If you are reading this file be aware that the original Cargo.toml
+# will likely look very different (and much more reasonable).
+# See Cargo.toml.orig for the original contents.
 
 [package]
 edition = "2018"
 name = "regex"
-version = "1.5.4"
+version = "1.5.5"
 authors = ["The Rust Project Developers"]
-exclude = ["/scripts/*", "/.github/*"]
+exclude = [
+    "/scripts/*",
+    "/.github/*",
+]
 autotests = false
-description = "An implementation of regular expressions for Rust. This implementation uses\nfinite automata and guarantees linear time matching on all inputs.\n"
+description = """
+An implementation of regular expressions for Rust. This implementation uses
+finite automata and guarantees linear time matching on all inputs.
+"""
 homepage = "https://github.com/rust-lang/regex"
 documentation = "https://docs.rs/regex"
 readme = "README.md"
 categories = ["text-processing"]
 license = "MIT OR Apache-2.0"
 repository = "https://github.com/rust-lang/regex"
+
 [profile.bench]
 debug = true
 
@@ -72,6 +78,7 @@ path = "tests/test_backtrack_bytes.rs"
 [[test]]
 name = "crates-regex"
 path = "tests/test_crates_regex.rs"
+
 [dependencies.aho-corasick]
 version = "0.7.18"
 optional = true
@@ -83,6 +90,7 @@ optional = true
 [dependencies.regex-syntax]
 version = "0.6.25"
 default-features = false
+
 [dev-dependencies.lazy_static]
 version = "1"
 
@@ -92,19 +100,44 @@ default-features = false
 
 [dev-dependencies.rand]
 version = "0.8.3"
-features = ["getrandom", "small_rng"]
+features = [
+    "getrandom",
+    "small_rng",
+]
 default-features = false
 
 [features]
-default = ["std", "perf", "unicode", "regex-syntax/default"]
+default = [
+    "std",
+    "perf",
+    "unicode",
+    "regex-syntax/default",
+]
 pattern = []
-perf = ["perf-cache", "perf-dfa", "perf-inline", "perf-literal"]
+perf = [
+    "perf-cache",
+    "perf-dfa",
+    "perf-inline",
+    "perf-literal",
+]
 perf-cache = []
 perf-dfa = []
 perf-inline = []
-perf-literal = ["aho-corasick", "memchr"]
+perf-literal = [
+    "aho-corasick",
+    "memchr",
+]
 std = []
-unicode = ["unicode-age", "unicode-bool", "unicode-case", "unicode-gencat", "unicode-perl", "unicode-script", "unicode-segment", "regex-syntax/unicode"]
+unicode = [
+    "unicode-age",
+    "unicode-bool",
+    "unicode-case",
+    "unicode-gencat",
+    "unicode-perl",
+    "unicode-script",
+    "unicode-segment",
+    "regex-syntax/unicode",
+]
 unicode-age = ["regex-syntax/unicode-age"]
 unicode-bool = ["regex-syntax/unicode-bool"]
 unicode-case = ["regex-syntax/unicode-case"]
diff --git a/third_party/rust/regex/README.md b/third_party/rust/regex/README.md
index 86d69968caca6..9acd5bb4a02a3 100644
--- a/third_party/rust/regex/README.md
+++ b/third_party/rust/regex/README.md
@@ -8,7 +8,7 @@ Much of the syntax and implementation is inspired
 by [RE2](https://github.com/google/re2).
 
 [![Build status](https://github.com/rust-lang/regex/workflows/ci/badge.svg)](https://github.com/rust-lang/regex/actions)
-[![](https://meritbadge.herokuapp.com/regex)](https://crates.io/crates/regex)
+[![Crates.io](https://img.shields.io/crates/v/regex.svg)](https://crates.io/crates/regex)
 [![Rust](https://img.shields.io/badge/rust-1.41.1%2B-blue.svg?maxAge=3600)](https://github.com/rust-lang/regex)
 
 ### Documentation
diff --git a/third_party/rust/regex/src/compile.rs b/third_party/rust/regex/src/compile.rs
index 9a2ed5e92a0d6..069f445c86f24 100644
--- a/third_party/rust/regex/src/compile.rs
+++ b/third_party/rust/regex/src/compile.rs
@@ -38,6 +38,16 @@ pub struct Compiler {
     suffix_cache: SuffixCache,
     utf8_seqs: Option<Utf8Sequences>,
     byte_classes: ByteClassSet,
+    // This keeps track of extra bytes allocated while compiling the regex
+    // program. Currently, this corresponds to two things. First is the heap
+    // memory allocated by Unicode character classes ('InstRanges'). Second is
+    // a "fake" amount of memory used by empty sub-expressions, so that enough
+    // empty sub-expressions will ultimately trigger the compiler to bail
+    // because of a size limit restriction. (That empty sub-expressions don't
+    // add to heap memory usage is more-or-less an implementation detail.) In
+    // the second case, if we don't bail, then an excessively large repetition
+    // on an empty sub-expression can result in the compiler using a very large
+    // amount of CPU time.
     extra_inst_bytes: usize,
 }
 
@@ -260,7 +270,7 @@ impl Compiler {
 
         self.check_size()?;
         match *expr.kind() {
-            Empty => Ok(None),
+            Empty => self.c_empty(),
             Literal(hir::Literal::Unicode(c)) => self.c_char(c),
             Literal(hir::Literal::Byte(b)) => {
                 assert!(self.compiled.uses_bytes());
@@ -378,6 +388,19 @@ impl Compiler {
         }
     }
 
+    fn c_empty(&mut self) -> ResultOrEmpty {
+        // See: https://github.com/rust-lang/regex/security/advisories/GHSA-m5pq-gvj9-9vr8
+        // See: CVE-2022-24713
+        //
+        // Since 'empty' sub-expressions don't increase the size of
+        // the actual compiled object, we "fake" an increase in its
+        // size so that our 'check_size_limit' routine will eventually
+        // stop compilation if there are too many empty sub-expressions
+        // (e.g., via a large repetition).
+        self.extra_inst_bytes += std::mem::size_of::<Inst>();
+        Ok(None)
+    }
+
     fn c_capture(&mut self, first_slot: usize, expr: &Hir) -> ResultOrEmpty {
         if self.num_exprs > 1 || self.compiled.is_dfa {
             // Don't ever compile Save instructions for regex sets because
@@ -496,7 +519,7 @@ impl Compiler {
         let mut exprs = exprs.into_iter();
         let Patch { mut hole, entry } = loop {
             match exprs.next() {
-                None => return Ok(None),
+                None => return self.c_empty(),
                 Some(e) => {
                     if let Some(p) = self.c(e)? {
                         break p;
diff --git a/third_party/rust/regex/src/dfa.rs b/third_party/rust/regex/src/dfa.rs
index 4b60f4d19bd07..4aee8039c6c1a 100644
--- a/third_party/rust/regex/src/dfa.rs
+++ b/third_party/rust/regex/src/dfa.rs
@@ -1353,7 +1353,6 @@ impl<'a> Fsm<'a> {
         match self.cache.trans.next(si, self.byte_class(b)) {
             STATE_UNKNOWN => self.exec_byte(qcur, qnext, si, b),
             STATE_QUIT => None,
-            STATE_DEAD => Some(STATE_DEAD),
             nsi => Some(nsi),
         }
     }
@@ -1387,7 +1386,6 @@ impl<'a> Fsm<'a> {
         };
         match self.cache.start_states[flagi] {
             STATE_UNKNOWN => {}
-            STATE_DEAD => return Some(STATE_DEAD),
             si => return Some(si),
         }
         q.clear();
diff --git a/third_party/rust/regex/src/re_unicode.rs b/third_party/rust/regex/src/re_unicode.rs
index 142c78fb1c301..e4871a621733d 100644
--- a/third_party/rust/regex/src/re_unicode.rs
+++ b/third_party/rust/regex/src/re_unicode.rs
@@ -538,7 +538,7 @@ impl Regex {
         mut rep: R,
     ) -> Cow<'t, str> {
         // If we know that the replacement doesn't have any capture expansions,
-        // then we can fast path. The fast path can make a tremendous
+        // then we can use the fast path. The fast path can make a tremendous
         // difference:
         //
         //   1) We use `find_iter` instead of `captures_iter`. Not asking for
diff --git a/third_party/rust/regex/tests/test_default.rs b/third_party/rust/regex/tests/test_default.rs
index d4365fbb344ea..be627f7a68f16 100644
--- a/third_party/rust/regex/tests/test_default.rs
+++ b/third_party/rust/regex/tests/test_default.rs
@@ -150,3 +150,73 @@ fn regex_is_reasonably_small() {
     assert_eq!(16, size_of::<bytes::Regex>());
     assert_eq!(16, size_of::<bytes::RegexSet>());
 }
+
+// See: https://github.com/rust-lang/regex/security/advisories/GHSA-m5pq-gvj9-9vr8
+// See: CVE-2022-24713
+//
+// We test that our regex compiler will correctly return a "too big" error when
+// we try to use a very large repetition on an *empty* sub-expression.
+//
+// At the time this test was written, the regex compiler does not represent
+// empty sub-expressions with any bytecode instructions. In effect, it's an
+// "optimization" to leave them out, since they would otherwise correspond
+// to an unconditional JUMP in the regex bytecode (i.e., an unconditional
+// epsilon transition in the NFA graph). Therefore, an empty sub-expression
+// represents an interesting case for the compiler's size limits. Since it
+// doesn't actually contribute any additional memory to the compiled regex
+// instructions, the size limit machinery never detects it. Instead, it just
+// dumbly tries to compile the empty sub-expression N times, where N is the
+// repetition size.
+//
+// When N is very large, this will cause the compiler to essentially spin and
+// do nothing for a decently large amount of time. It causes the regex to take
+// quite a bit of time to compile, despite the concrete syntax of the regex
+// being quite small.
+//
+// The degree to which this is actually a problem is somewhat of a judgment
+// call. Some regexes simply take a long time to compile. But in general, you
+// should be able to reasonably control this by setting lower or higher size
+// limits on the compiled object size. But this mitigation doesn't work at all
+// for this case.
+//
+// This particular test is somewhat narrow. It merely checks that regex
+// compilation will, at some point, return a "too big" error. Before the
+// fix landed, this test would eventually fail because the regex would be
+// successfully compiled (after enough time elapsed). So while this test
+// doesn't check that we exit in a reasonable amount of time, it does at least
+// check that we are properly returning an error at some point.
+#[test]
+fn big_empty_regex_fails() {
+    use regex::Regex;
+
+    let result = Regex::new("(?:){4294967295}");
+    assert!(result.is_err());
+}
+
+// Below is a "billion laughs" variant of the previous test case.
+#[test]
+fn big_empty_reps_chain_regex_fails() {
+    use regex::Regex;
+
+    let result = Regex::new("(?:){64}{64}{64}{64}{64}{64}");
+    assert!(result.is_err());
+}
+
+// Below is another situation where a zero-length sub-expression can be
+// introduced.
+#[test]
+fn big_zero_reps_regex_fails() {
+    use regex::Regex;
+
+    let result = Regex::new(r"x{0}{4294967295}");
+    assert!(result.is_err());
+}
+
+// Testing another case for completeness.
+#[test]
+fn empty_alt_regex_fails() {
+    use regex::Regex;
+
+    let result = Regex::new(r"(?:|){4294967295}");
+    assert!(result.is_err());
+}

-- 
To stop receiving notification emails like this one, please contact
the administrator of this repository.


More information about the tbb-commits mailing list