This is an automated email from the git hooks/post-receive script.
richard pushed a commit to branch tor-browser-91.8.0esr-11.0-1 in repository tor-browser.
commit 43f4ff7552d63d5e8327a82b6e64c19cc0663c49 Author: Jan-Erik Rediger jrediger@mozilla.com AuthorDate: Wed Mar 9 08:43:35 2022 +0000
Bug 1758509 - Update regex crate to 1.5.5 r=emilio, a=RyanVM
Differential Revision: https://phabricator.services.mozilla.com/D140532 --- Cargo.lock | 4 +- third_party/rust/regex/.cargo-checksum.json | 2 +- third_party/rust/regex/Cargo.lock | 6 +-- third_party/rust/regex/Cargo.toml | 59 +++++++++++++++++------ third_party/rust/regex/README.md | 2 +- third_party/rust/regex/src/compile.rs | 27 ++++++++++- third_party/rust/regex/src/dfa.rs | 2 - third_party/rust/regex/src/re_unicode.rs | 2 +- third_party/rust/regex/tests/test_default.rs | 70 ++++++++++++++++++++++++++++ 9 files changed, 149 insertions(+), 25 deletions(-)
diff --git a/Cargo.lock b/Cargo.lock index 56122da06357a..b0cc79279b1ac 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4049,9 +4049,9 @@ dependencies = [
[[package]] name = "regex" -version = "1.5.4" +version = "1.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d07a8629359eb56f1e2fb1652bb04212c072a87ba68546a04065d525673ac461" +checksum = "1a11647b6b25ff05a515cb92c365cec08801e83423a235b51e231e1808747286" dependencies = [ "aho-corasick", "memchr", diff --git a/third_party/rust/regex/.cargo-checksum.json b/third_party/rust/regex/.cargo-checksum.json index 186d5c5b00515..912ddc14466e3 100644 --- a/third_party/rust/regex/.cargo-checksum.json +++ b/third_party/rust/regex/.cargo-checksum.json @@ -1 +1 @@ -{"files":{"CHANGELOG.md":"dc37d7389aa7f626a2a76f565b379378a63a043004674735e2b5859202b255ea","Cargo.lock":"b74b289188546209fb8b9024eb5cf7c43dc24b199cf2aaf2b11601ee24d9575d","Cargo.toml":"d5bf863bcab85700dc82190169003b7781f7985e51f7ba67a757fc2ba069ee6e","HACKING.md":"17818f7a17723608f6bdbe6388ad0a913d4f96f76a16649aaf4e274b1fa0ea97","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"6485b8ed310d3f0340bf1ad1f47645069ce4069dcc6bb46c7d5c6faf41de1 [...] \ No newline at end of file +{"files":{"CHANGELOG.md":"dc37d7389aa7f626a2a76f565b379378a63a043004674735e2b5859202b255ea","Cargo.lock":"68805a2737aad9b6520868b2f85dbe638c4efb9f4aef759226a129edb5940434","Cargo.toml":"5678757a6bafcac57c7a3b51655ef978901112eb0629976a402560db5f948f56","HACKING.md":"17818f7a17723608f6bdbe6388ad0a913d4f96f76a16649aaf4e274b1fa0ea97","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"6485b8ed310d3f0340bf1ad1f47645069ce4069dcc6bb46c7d5c6faf41de1 [...] \ No newline at end of file diff --git a/third_party/rust/regex/Cargo.lock b/third_party/rust/regex/Cargo.lock index f2bbb5d850ab1..cc51538bf7ec1 100644 --- a/third_party/rust/regex/Cargo.lock +++ b/third_party/rust/regex/Cargo.lock @@ -36,9 +36,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]] name = "libc" -version = "0.2.80" +version = "0.2.94" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d58d1b70b004888f764dfbf6a26a3b0342a1632d33968e4a179d8011c760614" +checksum = "18794a8ad5b29321f790b55d93dfba91e125cb1a9edbd4f8e3150acc771c1a5e"
[[package]] name = "memchr" @@ -75,7 +75,7 @@ dependencies = [
[[package]] name = "regex" -version = "1.5.4" +version = "1.5.5" dependencies = [ "aho-corasick", "lazy_static", diff --git a/third_party/rust/regex/Cargo.toml b/third_party/rust/regex/Cargo.toml index 260acec69df73..d28f676ded14d 100644 --- a/third_party/rust/regex/Cargo.toml +++ b/third_party/rust/regex/Cargo.toml @@ -3,27 +3,33 @@ # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies -# to registry (e.g., crates.io) dependencies +# to registry (e.g., crates.io) dependencies. # -# If you believe there's an error in this file please file an -# issue against the rust-lang/cargo repository. If you're -# editing this file be aware that the upstream Cargo.toml -# will likely look very different (and much more reasonable) +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents.
[package] edition = "2018" name = "regex" -version = "1.5.4" +version = "1.5.5" authors = ["The Rust Project Developers"] -exclude = ["/scripts/*", "/.github/*"] +exclude = [ + "/scripts/*", + "/.github/*", +] autotests = false -description = "An implementation of regular expressions for Rust. This implementation uses\nfinite automata and guarantees linear time matching on all inputs.\n" +description = """ +An implementation of regular expressions for Rust. This implementation uses +finite automata and guarantees linear time matching on all inputs. +""" homepage = "https://github.com/rust-lang/regex" documentation = "https://docs.rs/regex" readme = "README.md" categories = ["text-processing"] license = "MIT OR Apache-2.0" repository = "https://github.com/rust-lang/regex" + [profile.bench] debug = true
@@ -72,6 +78,7 @@ path = "tests/test_backtrack_bytes.rs" [[test]] name = "crates-regex" path = "tests/test_crates_regex.rs" + [dependencies.aho-corasick] version = "0.7.18" optional = true @@ -83,6 +90,7 @@ optional = true [dependencies.regex-syntax] version = "0.6.25" default-features = false + [dev-dependencies.lazy_static] version = "1"
@@ -92,19 +100,44 @@ default-features = false
[dev-dependencies.rand] version = "0.8.3" -features = ["getrandom", "small_rng"] +features = [ + "getrandom", + "small_rng", +] default-features = false
[features] -default = ["std", "perf", "unicode", "regex-syntax/default"] +default = [ + "std", + "perf", + "unicode", + "regex-syntax/default", +] pattern = [] -perf = ["perf-cache", "perf-dfa", "perf-inline", "perf-literal"] +perf = [ + "perf-cache", + "perf-dfa", + "perf-inline", + "perf-literal", +] perf-cache = [] perf-dfa = [] perf-inline = [] -perf-literal = ["aho-corasick", "memchr"] +perf-literal = [ + "aho-corasick", + "memchr", +] std = [] -unicode = ["unicode-age", "unicode-bool", "unicode-case", "unicode-gencat", "unicode-perl", "unicode-script", "unicode-segment", "regex-syntax/unicode"] +unicode = [ + "unicode-age", + "unicode-bool", + "unicode-case", + "unicode-gencat", + "unicode-perl", + "unicode-script", + "unicode-segment", + "regex-syntax/unicode", +] unicode-age = ["regex-syntax/unicode-age"] unicode-bool = ["regex-syntax/unicode-bool"] unicode-case = ["regex-syntax/unicode-case"] diff --git a/third_party/rust/regex/README.md b/third_party/rust/regex/README.md index 86d69968caca6..9acd5bb4a02a3 100644 --- a/third_party/rust/regex/README.md +++ b/third_party/rust/regex/README.md @@ -8,7 +8,7 @@ Much of the syntax and implementation is inspired by [RE2](https://github.com/google/re2).
[![Build status](https://github.com/rust-lang/regex/workflows/ci/badge.svg)%5D(https://github...) -[![](https://meritbadge.herokuapp.com/regex)%5D(https://crates.io/crates/regex) +[![Crates.io](https://img.shields.io/crates/v/regex.svg)%5D(https://crates.io/crates/regex) [![Rust](https://img.shields.io/badge/rust-1.41.1%2B-blue.svg?maxAge=3600)%5D(https:/...)
### Documentation diff --git a/third_party/rust/regex/src/compile.rs b/third_party/rust/regex/src/compile.rs index 9a2ed5e92a0d6..069f445c86f24 100644 --- a/third_party/rust/regex/src/compile.rs +++ b/third_party/rust/regex/src/compile.rs @@ -38,6 +38,16 @@ pub struct Compiler { suffix_cache: SuffixCache, utf8_seqs: Option<Utf8Sequences>, byte_classes: ByteClassSet, + // This keeps track of extra bytes allocated while compiling the regex + // program. Currently, this corresponds to two things. First is the heap + // memory allocated by Unicode character classes ('InstRanges'). Second is + // a "fake" amount of memory used by empty sub-expressions, so that enough + // empty sub-expressions will ultimately trigger the compiler to bail + // because of a size limit restriction. (That empty sub-expressions don't + // add to heap memory usage is more-or-less an implementation detail.) In + // the second case, if we don't bail, then an excessively large repetition + // on an empty sub-expression can result in the compiler using a very large + // amount of CPU time. extra_inst_bytes: usize, }
@@ -260,7 +270,7 @@ impl Compiler {
self.check_size()?; match *expr.kind() { - Empty => Ok(None), + Empty => self.c_empty(), Literal(hir::Literal::Unicode(c)) => self.c_char(c), Literal(hir::Literal::Byte(b)) => { assert!(self.compiled.uses_bytes()); @@ -378,6 +388,19 @@ impl Compiler { } }
+ fn c_empty(&mut self) -> ResultOrEmpty { + // See: https://github.com/rust-lang/regex/security/advisories/GHSA-m5pq-gvj9-9vr8 + // See: CVE-2022-24713 + // + // Since 'empty' sub-expressions don't increase the size of + // the actual compiled object, we "fake" an increase in its + // size so that our 'check_size_limit' routine will eventually + // stop compilation if there are too many empty sub-expressions + // (e.g., via a large repetition). + self.extra_inst_bytes += std::mem::size_of::<Inst>(); + Ok(None) + } + fn c_capture(&mut self, first_slot: usize, expr: &Hir) -> ResultOrEmpty { if self.num_exprs > 1 || self.compiled.is_dfa { // Don't ever compile Save instructions for regex sets because @@ -496,7 +519,7 @@ impl Compiler { let mut exprs = exprs.into_iter(); let Patch { mut hole, entry } = loop { match exprs.next() { - None => return Ok(None), + None => return self.c_empty(), Some(e) => { if let Some(p) = self.c(e)? { break p; diff --git a/third_party/rust/regex/src/dfa.rs b/third_party/rust/regex/src/dfa.rs index 4b60f4d19bd07..4aee8039c6c1a 100644 --- a/third_party/rust/regex/src/dfa.rs +++ b/third_party/rust/regex/src/dfa.rs @@ -1353,7 +1353,6 @@ impl<'a> Fsm<'a> { match self.cache.trans.next(si, self.byte_class(b)) { STATE_UNKNOWN => self.exec_byte(qcur, qnext, si, b), STATE_QUIT => None, - STATE_DEAD => Some(STATE_DEAD), nsi => Some(nsi), } } @@ -1387,7 +1386,6 @@ impl<'a> Fsm<'a> { }; match self.cache.start_states[flagi] { STATE_UNKNOWN => {} - STATE_DEAD => return Some(STATE_DEAD), si => return Some(si), } q.clear(); diff --git a/third_party/rust/regex/src/re_unicode.rs b/third_party/rust/regex/src/re_unicode.rs index 142c78fb1c301..e4871a621733d 100644 --- a/third_party/rust/regex/src/re_unicode.rs +++ b/third_party/rust/regex/src/re_unicode.rs @@ -538,7 +538,7 @@ impl Regex { mut rep: R, ) -> Cow<'t, str> { // If we know that the replacement doesn't have any capture expansions, - // then we can fast path. The fast path can make a tremendous + // then we can use the fast path. The fast path can make a tremendous // difference: // // 1) We use `find_iter` instead of `captures_iter`. Not asking for diff --git a/third_party/rust/regex/tests/test_default.rs b/third_party/rust/regex/tests/test_default.rs index d4365fbb344ea..be627f7a68f16 100644 --- a/third_party/rust/regex/tests/test_default.rs +++ b/third_party/rust/regex/tests/test_default.rs @@ -150,3 +150,73 @@ fn regex_is_reasonably_small() { assert_eq!(16, size_of::bytes::Regex()); assert_eq!(16, size_of::bytes::RegexSet()); } + +// See: https://github.com/rust-lang/regex/security/advisories/GHSA-m5pq-gvj9-9vr8 +// See: CVE-2022-24713 +// +// We test that our regex compiler will correctly return a "too big" error when +// we try to use a very large repetition on an *empty* sub-expression. +// +// At the time this test was written, the regex compiler does not represent +// empty sub-expressions with any bytecode instructions. In effect, it's an +// "optimization" to leave them out, since they would otherwise correspond +// to an unconditional JUMP in the regex bytecode (i.e., an unconditional +// epsilon transition in the NFA graph). Therefore, an empty sub-expression +// represents an interesting case for the compiler's size limits. Since it +// doesn't actually contribute any additional memory to the compiled regex +// instructions, the size limit machinery never detects it. Instead, it just +// dumbly tries to compile the empty sub-expression N times, where N is the +// repetition size. +// +// When N is very large, this will cause the compiler to essentially spin and +// do nothing for a decently large amount of time. It causes the regex to take +// quite a bit of time to compile, despite the concrete syntax of the regex +// being quite small. +// +// The degree to which this is actually a problem is somewhat of a judgment +// call. Some regexes simply take a long time to compile. But in general, you +// should be able to reasonably control this by setting lower or higher size +// limits on the compiled object size. But this mitigation doesn't work at all +// for this case. +// +// This particular test is somewhat narrow. It merely checks that regex +// compilation will, at some point, return a "too big" error. Before the +// fix landed, this test would eventually fail because the regex would be +// successfully compiled (after enough time elapsed). So while this test +// doesn't check that we exit in a reasonable amount of time, it does at least +// check that we are properly returning an error at some point. +#[test] +fn big_empty_regex_fails() { + use regex::Regex; + + let result = Regex::new("(?:){4294967295}"); + assert!(result.is_err()); +} + +// Below is a "billion laughs" variant of the previous test case. +#[test] +fn big_empty_reps_chain_regex_fails() { + use regex::Regex; + + let result = Regex::new("(?:){64}{64}{64}{64}{64}{64}"); + assert!(result.is_err()); +} + +// Below is another situation where a zero-length sub-expression can be +// introduced. +#[test] +fn big_zero_reps_regex_fails() { + use regex::Regex; + + let result = Regex::new(r"x{0}{4294967295}"); + assert!(result.is_err()); +} + +// Testing another case for completeness. +#[test] +fn empty_alt_regex_fails() { + use regex::Regex; + + let result = Regex::new(r"(?:|){4294967295}"); + assert!(result.is_err()); +}