[tor-commits] [snowflake/master] Made regular expressions more precise

cohosh at torproject.org cohosh at torproject.org
Tue Apr 9 21:14:13 UTC 2019


commit 611cb889c5f2a12e1ccd3a3bea5bf6c6bf4234cb
Author: Cecylia Bocovich <cohosh at torproject.org>
Date:   Mon Mar 25 12:56:10 2019 -0400

    Made regular expressions more precise
    
    Modified regular expressions to not scrub fingerprints, but catch all
    instances of IPv4 and IPv6 addresses. Expanded test cases with those
    suggested by dcf.
---
 server/server.go      | 20 ++++++++---
 server/server_test.go | 95 ++++++++++++++++++++++++++++++++++-----------------
 2 files changed, 79 insertions(+), 36 deletions(-)

diff --git a/server/server.go b/server/server.go
index 539d0e1..e6b6f10 100644
--- a/server/server.go
+++ b/server/server.go
@@ -56,13 +56,19 @@ additional HTTP listener on port 80 to work with ACME.
 	flag.PrintDefaults()
 }
 
+const ipv4Address = `\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}`
+const ipv6Address = `(([0-9a-fA-F]{0,4}:){2,7}([0-9a-fA-F]{0,4})?(` + ipv4Address + `))` +
+	`|(([0-9a-fA-F]{0,4}:){2,7}([0-9a-fA-F]{0,4})?)`
+const optionalPort = `(:\d{1,5})?`
+const addressPattern = `((` + ipv4Address + `)|(\[(` + ipv6Address + `)\])|(` + ipv6Address + `))` + optionalPort
+const fullAddrPattern = `(^|\s|[^\w:])` + addressPattern + `(\s|(:\s)|[^\w:]|$)`
+
 var scrubberPatterns = []*regexp.Regexp{
-	/* IPv6 */
-	regexp.MustCompile(`\[([0-9a-fA-F]{0,4}:){2,7}([0-9a-fA-F]{0,4})?(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})?\]`),
-	/* IPv4 */
-	regexp.MustCompile(`\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b`),
+	regexp.MustCompile(fullAddrPattern),
 }
 
+var addressRegexp = regexp.MustCompile(addressPattern)
+
 // An io.Writer that can be used as the output for a logger that first
 // sanitizes logs and then writes to the provided io.Writer
 type logScrubber struct {
@@ -73,7 +79,11 @@ type logScrubber struct {
 func scrub(b []byte) []byte {
 	scrubbedBytes := b
 	for _, pattern := range scrubberPatterns {
-		scrubbedBytes = pattern.ReplaceAll(scrubbedBytes, []byte("[scrubbed]"))
+		// this is a workaround since go does not yet support look ahead or look
+		// behind for regular expressions.
+		scrubbedBytes = pattern.ReplaceAllFunc(scrubbedBytes, func(b []byte) []byte {
+			return addressRegexp.ReplaceAll(b, []byte("[scrubbed]"))
+		})
 	}
 	return scrubbedBytes
 }
diff --git a/server/server_test.go b/server/server_test.go
index 846d95e..fe895d2 100644
--- a/server/server_test.go
+++ b/server/server_test.go
@@ -54,7 +54,7 @@ func TestClientAddr(t *testing.T) {
 func TestLogScrubberSplit(t *testing.T) {
 	input := []byte("test\nhttp2: panic serving [2620:101:f000:780:9097:75b1:519f:dbb8]:58344: interface conversion: *http2.responseWriter is not http.Hijacker: missing method Hijack\n")
 
-	expected := "test\nhttp2: panic serving [scrubbed]:58344: interface conversion: *http2.responseWriter is not http.Hijacker: missing method Hijack\n"
+	expected := "test\nhttp2: panic serving [scrubbed]: interface conversion: *http2.responseWriter is not http.Hijacker: missing method Hijack\n"
 
 	var buff bytes.Buffer
 	scrubber := &logScrubber{output: &buff}
@@ -93,17 +93,18 @@ func TestLogScrubberSplit(t *testing.T) {
 
 }
 
-func TestLogScrubberFormats(t *testing.T) {
+//Test the log scrubber on known problematic log messages
+func TestLogScrubberMessages(t *testing.T) {
 	for _, test := range []struct {
 		input, expected string
 	}{
 		{
-			"http: TLS handshake error from 129.97.208.23:38310:",
-			"http: TLS handshake error from [scrubbed]:38310:\n",
+			"http: TLS handshake error from 129.97.208.23:38310: ",
+			"http: TLS handshake error from [scrubbed]: \n",
 		},
 		{
 			"http2: panic serving [2620:101:f000:780:9097:75b1:519f:dbb8]:58344: interface conversion: *http2.responseWriter is not http.Hijacker: missing method Hijack",
-			"http2: panic serving [scrubbed]:58344: interface conversion: *http2.responseWriter is not http.Hijacker: missing method Hijack\n",
+			"http2: panic serving [scrubbed]: interface conversion: *http2.responseWriter is not http.Hijacker: missing method Hijack\n",
 		},
 		{
 			//Make sure it doesn't scrub fingerprint
@@ -111,32 +112,9 @@ func TestLogScrubberFormats(t *testing.T) {
 			"a=fingerprint:sha-256 33:B6:FA:F6:94:CA:74:61:45:4A:D2:1F:2C:2F:75:8A:D9:EB:23:34:B2:30:E9:1B:2A:A6:A9:E0:44:72:CC:74\n",
 		},
 		{
-			"[1::]:58344",
-			"[scrubbed]:58344\n",
-		},
-		{
-			"[1:2:3:4:5:6::8]",
-			"[scrubbed]\n",
-		},
-		{
-			"[1::7:8]",
-			"[scrubbed]\n",
-		},
-		{
-			"[::4:5:6:7:8]",
-			"[scrubbed]\n",
-		},
-		{
-			"[::255.255.255.255]",
-			"[scrubbed]\n",
-		},
-		{
-			"[::ffff:0:255.255.255.255]",
-			"[scrubbed]\n",
-		},
-		{
-			"[2001:db8:3:4::192.0.2.33]",
-			"[scrubbed]\n",
+			//try with enclosing parens
+			"(1:2:3:4:c:d:e:f) {1:2:3:4:c:d:e:f}",
+			"([scrubbed]) {[scrubbed]}\n",
 		},
 	} {
 		var buff bytes.Buffer
@@ -149,3 +127,58 @@ func TestLogScrubberFormats(t *testing.T) {
 	}
 
 }
+
+func TestLogScrubberGoodFormats(t *testing.T) {
+	for _, addr := range []string{
+		// IPv4
+		"1.2.3.4",
+		"255.255.255.255",
+		// IPv4 with port
+		"1.2.3.4:55",
+		"255.255.255.255:65535",
+		// IPv6
+		"1:2:3:4:c:d:e:f",
+		"1111:2222:3333:4444:CCCC:DDDD:EEEE:FFFF",
+		// IPv6 with brackets
+		"[1:2:3:4:c:d:e:f]",
+		"[1111:2222:3333:4444:CCCC:DDDD:EEEE:FFFF]",
+		// IPv6 with brackets and port
+		"[1:2:3:4:c:d:e:f]:55",
+		"[1111:2222:3333:4444:CCCC:DDDD:EEEE:FFFF]:65535",
+		// compressed IPv6
+		"::d:e:f",
+		"1:2:3::",
+		"1:2:3::d:e:f",
+		"1111:2222:3333::DDDD:EEEE:FFFF",
+		// compressed IPv6 with brackets
+		"[::d:e:f]",
+		"[1:2:3::]",
+		"[1:2:3::d:e:f]",
+		"[1111:2222:3333::DDDD:EEEE:FFFF]",
+		"[1:2:3:4:5:6::8]",
+		"[1::7:8]",
+		// compressed IPv6 with brackets and port
+		"[1::]:58344",
+		"[::d:e:f]:55",
+		"[1:2:3::]:55",
+		"[1:2:3::d:e:f]:55",
+		"[1111:2222:3333::DDDD:EEEE:FFFF]:65535",
+		// IPv4-compatible and IPv4-mapped
+		"::255.255.255.255",
+		"::ffff:255.255.255.255",
+		"[::255.255.255.255]",
+		"[::ffff:255.255.255.255]",
+		"[::255.255.255.255]:65535",
+		"[::ffff:255.255.255.255]:65535",
+		"[::ffff:0:255.255.255.255]",
+		"[2001:db8:3:4::192.0.2.33]",
+	} {
+		var buff bytes.Buffer
+		log.SetFlags(0) //remove all extra log output for test comparisons
+		log.SetOutput(&logScrubber{output: &buff})
+		log.Print(addr)
+		if buff.String() != "[scrubbed]\n" {
+			t.Errorf("%q: Got %q, expected %q", addr, buff.String(), "[scrubbed]\n")
+		}
+	}
+}





More information about the tor-commits mailing list