commit 611cb889c5f2a12e1ccd3a3bea5bf6c6bf4234cb Author: Cecylia Bocovich cohosh@torproject.org Date: Mon Mar 25 12:56:10 2019 -0400
Made regular expressions more precise
Modified regular expressions to not scrub fingerprints, but catch all instances of IPv4 and IPv6 addresses. Expanded test cases with those suggested by dcf. --- server/server.go | 20 ++++++++--- server/server_test.go | 95 ++++++++++++++++++++++++++++++++++----------------- 2 files changed, 79 insertions(+), 36 deletions(-)
diff --git a/server/server.go b/server/server.go index 539d0e1..e6b6f10 100644 --- a/server/server.go +++ b/server/server.go @@ -56,13 +56,19 @@ additional HTTP listener on port 80 to work with ACME. flag.PrintDefaults() }
+const ipv4Address = `\d{1,3}.\d{1,3}.\d{1,3}.\d{1,3}` +const ipv6Address = `(([0-9a-fA-F]{0,4}:){2,7}([0-9a-fA-F]{0,4})?(` + ipv4Address + `))` + + `|(([0-9a-fA-F]{0,4}:){2,7}([0-9a-fA-F]{0,4})?)` +const optionalPort = `(:\d{1,5})?` +const addressPattern = `((` + ipv4Address + `)|([(` + ipv6Address + `)])|(` + ipv6Address + `))` + optionalPort +const fullAddrPattern = `(^|\s|[^\w:])` + addressPattern + `(\s|(:\s)|[^\w:]|$)` + var scrubberPatterns = []*regexp.Regexp{ - /* IPv6 */ - regexp.MustCompile(`[([0-9a-fA-F]{0,4}:){2,7}([0-9a-fA-F]{0,4})?(\d{1,3}.\d{1,3}.\d{1,3}.\d{1,3})?]`), - /* IPv4 */ - regexp.MustCompile(`\b\d{1,3}.\d{1,3}.\d{1,3}.\d{1,3}\b`), + regexp.MustCompile(fullAddrPattern), }
+var addressRegexp = regexp.MustCompile(addressPattern) + // An io.Writer that can be used as the output for a logger that first // sanitizes logs and then writes to the provided io.Writer type logScrubber struct { @@ -73,7 +79,11 @@ type logScrubber struct { func scrub(b []byte) []byte { scrubbedBytes := b for _, pattern := range scrubberPatterns { - scrubbedBytes = pattern.ReplaceAll(scrubbedBytes, []byte("[scrubbed]")) + // this is a workaround since go does not yet support look ahead or look + // behind for regular expressions. + scrubbedBytes = pattern.ReplaceAllFunc(scrubbedBytes, func(b []byte) []byte { + return addressRegexp.ReplaceAll(b, []byte("[scrubbed]")) + }) } return scrubbedBytes } diff --git a/server/server_test.go b/server/server_test.go index 846d95e..fe895d2 100644 --- a/server/server_test.go +++ b/server/server_test.go @@ -54,7 +54,7 @@ func TestClientAddr(t *testing.T) { func TestLogScrubberSplit(t *testing.T) { input := []byte("test\nhttp2: panic serving [2620:101:f000:780:9097:75b1:519f:dbb8]:58344: interface conversion: *http2.responseWriter is not http.Hijacker: missing method Hijack\n")
- expected := "test\nhttp2: panic serving [scrubbed]:58344: interface conversion: *http2.responseWriter is not http.Hijacker: missing method Hijack\n" + expected := "test\nhttp2: panic serving [scrubbed]: interface conversion: *http2.responseWriter is not http.Hijacker: missing method Hijack\n"
var buff bytes.Buffer scrubber := &logScrubber{output: &buff} @@ -93,17 +93,18 @@ func TestLogScrubberSplit(t *testing.T) {
}
-func TestLogScrubberFormats(t *testing.T) { +//Test the log scrubber on known problematic log messages +func TestLogScrubberMessages(t *testing.T) { for _, test := range []struct { input, expected string }{ { - "http: TLS handshake error from 129.97.208.23:38310:", - "http: TLS handshake error from [scrubbed]:38310:\n", + "http: TLS handshake error from 129.97.208.23:38310: ", + "http: TLS handshake error from [scrubbed]: \n", }, { "http2: panic serving [2620:101:f000:780:9097:75b1:519f:dbb8]:58344: interface conversion: *http2.responseWriter is not http.Hijacker: missing method Hijack", - "http2: panic serving [scrubbed]:58344: interface conversion: *http2.responseWriter is not http.Hijacker: missing method Hijack\n", + "http2: panic serving [scrubbed]: interface conversion: *http2.responseWriter is not http.Hijacker: missing method Hijack\n", }, { //Make sure it doesn't scrub fingerprint @@ -111,32 +112,9 @@ func TestLogScrubberFormats(t *testing.T) { "a=fingerprint:sha-256 33:B6:FA:F6:94:CA:74:61:45:4A:D2:1F:2C:2F:75:8A:D9:EB:23:34:B2:30:E9:1B:2A:A6:A9:E0:44:72:CC:74\n", }, { - "[1::]:58344", - "[scrubbed]:58344\n", - }, - { - "[1:2:3:4:5:6::8]", - "[scrubbed]\n", - }, - { - "[1::7:8]", - "[scrubbed]\n", - }, - { - "[::4:5:6:7:8]", - "[scrubbed]\n", - }, - { - "[::255.255.255.255]", - "[scrubbed]\n", - }, - { - "[::ffff:0:255.255.255.255]", - "[scrubbed]\n", - }, - { - "[2001:db8:3:4::192.0.2.33]", - "[scrubbed]\n", + //try with enclosing parens + "(1:2:3:4:c:d:e:f) {1:2:3:4:c:d:e:f}", + "([scrubbed]) {[scrubbed]}\n", }, } { var buff bytes.Buffer @@ -149,3 +127,58 @@ func TestLogScrubberFormats(t *testing.T) { }
} + +func TestLogScrubberGoodFormats(t *testing.T) { + for _, addr := range []string{ + // IPv4 + "1.2.3.4", + "255.255.255.255", + // IPv4 with port + "1.2.3.4:55", + "255.255.255.255:65535", + // IPv6 + "1:2:3:4:c:d:e:f", + "1111:2222:3333:4444:CCCC:DDDD:EEEE:FFFF", + // IPv6 with brackets + "[1:2:3:4:c:d:e:f]", + "[1111:2222:3333:4444:CCCC:DDDD:EEEE:FFFF]", + // IPv6 with brackets and port + "[1:2:3:4:c:d:e:f]:55", + "[1111:2222:3333:4444:CCCC:DDDD:EEEE:FFFF]:65535", + // compressed IPv6 + "::d:e:f", + "1:2:3::", + "1:2:3::d:e:f", + "1111:2222:3333::DDDD:EEEE:FFFF", + // compressed IPv6 with brackets + "[::d:e:f]", + "[1:2:3::]", + "[1:2:3::d:e:f]", + "[1111:2222:3333::DDDD:EEEE:FFFF]", + "[1:2:3:4:5:6::8]", + "[1::7:8]", + // compressed IPv6 with brackets and port + "[1::]:58344", + "[::d:e:f]:55", + "[1:2:3::]:55", + "[1:2:3::d:e:f]:55", + "[1111:2222:3333::DDDD:EEEE:FFFF]:65535", + // IPv4-compatible and IPv4-mapped + "::255.255.255.255", + "::ffff:255.255.255.255", + "[::255.255.255.255]", + "[::ffff:255.255.255.255]", + "[::255.255.255.255]:65535", + "[::ffff:255.255.255.255]:65535", + "[::ffff:0:255.255.255.255]", + "[2001:db8:3:4::192.0.2.33]", + } { + var buff bytes.Buffer + log.SetFlags(0) //remove all extra log output for test comparisons + log.SetOutput(&logScrubber{output: &buff}) + log.Print(addr) + if buff.String() != "[scrubbed]\n" { + t.Errorf("%q: Got %q, expected %q", addr, buff.String(), "[scrubbed]\n") + } + } +}
tor-commits@lists.torproject.org