[tor-commits] [snowflake/master] Simplified log scrubber

cohosh at torproject.org cohosh at torproject.org
Tue Apr 9 21:14:13 UTC 2019


commit 5bc881702815942057f5c8d265fbef7d4917c82e
Author: Cecylia Bocovich <cohosh at torproject.org>
Date:   Thu Mar 21 10:11:11 2019 -0400

    Simplified log scrubber
    
    IPv6 regexes didn't need to be that precise, added more tests for
    edge-cases.
---
 server/server.go      |  3 ++-
 server/server_test.go | 15 +++++++++++++++
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/server/server.go b/server/server.go
index ad1cd2a..74cffd6 100644
--- a/server/server.go
+++ b/server/server.go
@@ -64,7 +64,8 @@ type logScrubber struct {
 func (ls *logScrubber) Write(b []byte) (n int, err error) {
 	//First scrub the input of IP addresses
 	reIPv4 := regexp.MustCompile(`\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b`)
-	reIPv6 := regexp.MustCompile(`(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))`)
+        //Note that for embedded IPv4 address, the previous regex will scrub it
+        reIPv6 := regexp.MustCompile(`([0-9a-fA-F]{0,4}:){2,7}([0-9a-fA-F]{0,4})?`)
 	scrubbedBytes := reIPv4.ReplaceAll(b, []byte("X.X.X.X"))
 	scrubbedBytes = reIPv6.ReplaceAll(scrubbedBytes,
 		[]byte("X:X:X:X:X:X:X:X"))
diff --git a/server/server_test.go b/server/server_test.go
index c3514ed..537360a 100644
--- a/server/server_test.go
+++ b/server/server_test.go
@@ -59,6 +59,7 @@ func TestLogScrubber(t *testing.T) {
 
 	log.Printf("%s", "http: TLS handshake error from 129.97.208.23:38310:")
 
+        //Example IPv4 address that ended up in log
 	if bytes.Compare(buff.Bytes(), []byte("http: TLS handshake error from X.X.X.X:38310:\n")) != 0 {
 		t.Errorf("log scrubber didn't scrub IPv4 address. Output: %s", string(buff.Bytes()))
 	}
@@ -66,9 +67,23 @@ func TestLogScrubber(t *testing.T) {
 
 	log.Printf("%s", "http2: panic serving [2620:101:f000:780:9097:75b1:519f:dbb8]:58344: interface conversion: *http2.responseWriter is not http.Hijacker: missing method Hijack")
 
+        //Example IPv6 address that ended up in log
 	if bytes.Compare(buff.Bytes(), []byte("http2: panic serving [X:X:X:X:X:X:X:X]:58344: interface conversion: *http2.responseWriter is not http.Hijacker: missing method Hijack\n")) != 0 {
 		t.Errorf("log scrubber didn't scrub IPv6 address. Output: %s", string(buff.Bytes()))
 	}
 	buff.Reset()
 
+        //Testing IPv6 edge cases
+	log.Printf("%s", "[1::]:58344")
+	log.Printf("%s", "[1:2:3:4:5:6::8]:58344")
+	log.Printf("%s", "[1::7:8]:58344")
+	log.Printf("%s", "[::4:5:6:7:8]:58344")
+	log.Printf("%s", "[::255.255.255.255]:58344")
+	log.Printf("%s", "[::ffff:0:255.255.255.255]:58344")
+	log.Printf("%s", "[2001:db8:3:4::192.0.2.33]:58344")
+
+	if bytes.Compare(buff.Bytes(), []byte("[X:X:X:X:X:X:X:X]:58344\n[X:X:X:X:X:X:X:X]:58344\n[X:X:X:X:X:X:X:X]:58344\n[X:X:X:X:X:X:X:X]:58344\n[X:X:X:X:X:X:X:XX.X.X.X]:58344\n[X:X:X:X:X:X:X:XX.X.X.X]:58344\n[X:X:X:X:X:X:X:XX.X.X.X]:58344\n")) != 0 {
+		t.Errorf("log scrubber didn't scrub IPv6 address. Output: %s", string(buff.Bytes()))
+	}
+	buff.Reset()
 }





More information about the tor-commits mailing list