[tor-commits] [snowflake/main] Use tpo geoip library

meskio at torproject.org meskio at torproject.org
Mon Oct 4 10:26:43 UTC 2021


commit 4396d505a3b872fda43ca6cf43264d0f25cd8e9f
Author: meskio <meskio at torproject.org>
Date:   Thu Sep 30 12:10:59 2021 +0200

    Use tpo geoip library
    
    Now the geoip implmentation has being moved to it's own library to be
    shared between projects.
---
 broker/geoip.go                 | 240 ----------------------------------------
 broker/metrics.go               |  38 ++-----
 broker/snowflake-broker_test.go |  96 +---------------
 go.mod                          |   1 +
 go.sum                          |   2 +
 5 files changed, 13 insertions(+), 364 deletions(-)

diff --git a/broker/geoip.go b/broker/geoip.go
deleted file mode 100644
index 708cdad..0000000
--- a/broker/geoip.go
+++ /dev/null
@@ -1,240 +0,0 @@
-/*
-This code is for loading database data that maps ip addresses to countries
-for collecting and presenting statistics on snowflake use that might alert us
-to censorship events.
-
-The functions here are heavily based off of how tor maintains and searches their
-geoip database
-
-The tables used for geoip data must be structured as follows:
-
-Recognized line format for IPv4 is:
-    INTIPLOW,INTIPHIGH,CC
-        where INTIPLOW and INTIPHIGH are IPv4 addresses encoded as big-endian 4-byte unsigned
-        integers, and CC is a country code.
-
-Note that the IPv4 line format
-    "INTIPLOW","INTIPHIGH","CC","CC3","COUNTRY NAME"
-is not currently supported.
-
-Recognized line format for IPv6 is:
-    IPV6LOW,IPV6HIGH,CC
-        where IPV6LOW and IPV6HIGH are IPv6 addresses and CC is a country code.
-
-It also recognizes, and skips over, blank lines and lines that start
-with '#' (comments).
-
-*/
-package main
-
-import (
-	"bufio"
-	"bytes"
-	"crypto/sha1"
-	"encoding/hex"
-	"fmt"
-	"io"
-	"log"
-	"net"
-	"os"
-	"sort"
-	"strconv"
-	"strings"
-	"sync"
-)
-
-type GeoIPTable interface {
-	parseEntry(string) (*GeoIPEntry, error)
-	Len() int
-	Append(GeoIPEntry)
-	ElementAt(int) GeoIPEntry
-	Lock()
-	Unlock()
-}
-
-type GeoIPEntry struct {
-	ipLow   net.IP
-	ipHigh  net.IP
-	country string
-}
-
-type GeoIPv4Table struct {
-	table []GeoIPEntry
-
-	lock sync.Mutex // synchronization for geoip table accesses and reloads
-}
-
-type GeoIPv6Table struct {
-	table []GeoIPEntry
-
-	lock sync.Mutex // synchronization for geoip table accesses and reloads
-}
-
-func (table *GeoIPv4Table) Len() int { return len(table.table) }
-func (table *GeoIPv6Table) Len() int { return len(table.table) }
-
-func (table *GeoIPv4Table) Append(entry GeoIPEntry) {
-	(*table).table = append(table.table, entry)
-}
-func (table *GeoIPv6Table) Append(entry GeoIPEntry) {
-	(*table).table = append(table.table, entry)
-}
-
-func (table *GeoIPv4Table) ElementAt(i int) GeoIPEntry { return table.table[i] }
-func (table *GeoIPv6Table) ElementAt(i int) GeoIPEntry { return table.table[i] }
-
-func (table *GeoIPv4Table) Lock() { (*table).lock.Lock() }
-func (table *GeoIPv6Table) Lock() { (*table).lock.Lock() }
-
-func (table *GeoIPv4Table) Unlock() { (*table).lock.Unlock() }
-func (table *GeoIPv6Table) Unlock() { (*table).lock.Unlock() }
-
-// Convert a geoip IP address represented as a big-endian unsigned integer to net.IP
-func geoipStringToIP(ipStr string) (net.IP, error) {
-	ip, err := strconv.ParseUint(ipStr, 10, 32)
-	if err != nil {
-		return net.IPv4(0, 0, 0, 0), fmt.Errorf("error parsing IP %s", ipStr)
-	}
-	var bytes [4]byte
-	bytes[0] = byte(ip & 0xFF)
-	bytes[1] = byte((ip >> 8) & 0xFF)
-	bytes[2] = byte((ip >> 16) & 0xFF)
-	bytes[3] = byte((ip >> 24) & 0xFF)
-
-	return net.IPv4(bytes[3], bytes[2], bytes[1], bytes[0]), nil
-}
-
-//Parses a line in the provided geoip file that corresponds
-//to an address range and a two character country code
-func (table *GeoIPv4Table) parseEntry(candidate string) (*GeoIPEntry, error) {
-
-	if candidate[0] == '#' {
-		return nil, nil
-	}
-
-	parsedCandidate := strings.Split(candidate, ",")
-
-	if len(parsedCandidate) != 3 {
-		return nil, fmt.Errorf("provided geoip file is incorrectly formatted. Could not parse line:\n%s", parsedCandidate)
-	}
-
-	low, err := geoipStringToIP(parsedCandidate[0])
-	if err != nil {
-		return nil, err
-	}
-	high, err := geoipStringToIP(parsedCandidate[1])
-	if err != nil {
-		return nil, err
-	}
-
-	geoipEntry := &GeoIPEntry{
-		ipLow:   low,
-		ipHigh:  high,
-		country: parsedCandidate[2],
-	}
-
-	return geoipEntry, nil
-}
-
-//Parses a line in the provided geoip file that corresponds
-//to an address range and a two character country code
-func (table *GeoIPv6Table) parseEntry(candidate string) (*GeoIPEntry, error) {
-
-	if candidate[0] == '#' {
-		return nil, nil
-	}
-
-	parsedCandidate := strings.Split(candidate, ",")
-
-	if len(parsedCandidate) != 3 {
-		return nil, fmt.Errorf("")
-	}
-
-	low := net.ParseIP(parsedCandidate[0])
-	if low == nil {
-		return nil, fmt.Errorf("")
-	}
-	high := net.ParseIP(parsedCandidate[1])
-	if high == nil {
-		return nil, fmt.Errorf("")
-	}
-
-	geoipEntry := &GeoIPEntry{
-		ipLow:   low,
-		ipHigh:  high,
-		country: parsedCandidate[2],
-	}
-
-	return geoipEntry, nil
-}
-
-//Loads provided geoip file into our tables
-//Entries are stored in a table
-func GeoIPLoadFile(table GeoIPTable, pathname string) error {
-	//open file
-	geoipFile, err := os.Open(pathname)
-	if err != nil {
-		return err
-	}
-	defer geoipFile.Close()
-
-	hash := sha1.New()
-
-	table.Lock()
-	defer table.Unlock()
-
-	hashedFile := io.TeeReader(geoipFile, hash)
-
-	//read in strings and call parse function
-	scanner := bufio.NewScanner(hashedFile)
-	for scanner.Scan() {
-		entry, err := table.parseEntry(scanner.Text())
-		if err != nil {
-			return fmt.Errorf("provided geoip file is incorrectly formatted. Line is: %+q", scanner.Text())
-		}
-
-		if entry != nil {
-			table.Append(*entry)
-		}
-
-	}
-	if err := scanner.Err(); err != nil {
-		return err
-	}
-
-	sha1Hash := hex.EncodeToString(hash.Sum(nil))
-	log.Println("Using geoip file ", pathname, " with checksum", sha1Hash)
-	log.Println("Loaded ", table.Len(), " entries into table")
-
-	return nil
-}
-
-//Returns the country location of an IPv4 or IPv6 address, and a boolean value
-//that indicates whether the IP address was present in the geoip database
-func GetCountryByAddr(table GeoIPTable, ip net.IP) (string, bool) {
-
-	table.Lock()
-	defer table.Unlock()
-
-	//look IP up in database
-	index := sort.Search(table.Len(), func(i int) bool {
-		entry := table.ElementAt(i)
-		return (bytes.Compare(ip.To16(), entry.ipHigh.To16()) <= 0)
-	})
-
-	if index == table.Len() {
-		return "", false
-	}
-
-	// check to see if addr is in the range specified by the returned index
-	// search on IPs in invalid ranges (e.g., 127.0.0.0/8) will return the
-	//country code of the next highest range
-	entry := table.ElementAt(index)
-	if !(bytes.Compare(ip.To16(), entry.ipLow.To16()) >= 0 &&
-		bytes.Compare(ip.To16(), entry.ipHigh.To16()) <= 0) {
-		return "", false
-	}
-
-	return table.ElementAt(index).country, true
-
-}
diff --git a/broker/metrics.go b/broker/metrics.go
index e8a6b0c..8229e0f 100644
--- a/broker/metrics.go
+++ b/broker/metrics.go
@@ -15,6 +15,7 @@ import (
 	"time"
 
 	"github.com/prometheus/client_golang/prometheus"
+	"gitlab.torproject.org/tpo/anti-censorship/geoip"
 )
 
 const (
@@ -38,8 +39,7 @@ type CountryStats struct {
 // Implements Observable
 type Metrics struct {
 	logger  *log.Logger
-	tablev4 *GeoIPv4Table
-	tablev6 *GeoIPv6Table
+	geoipdb *geoip.Geoip
 
 	countryStats                  CountryStats
 	clientRoundtripEstimate       time.Duration
@@ -115,19 +115,10 @@ func (m *Metrics) UpdateCountryStats(addr string, proxyType string, natType stri
 	}
 
 	ip := net.ParseIP(addr)
-	if ip.To4() != nil {
-		//This is an IPv4 address
-		if m.tablev4 == nil {
-			return
-		}
-		country, ok = GetCountryByAddr(m.tablev4, ip)
-	} else {
-		if m.tablev6 == nil {
-			return
-		}
-		country, ok = GetCountryByAddr(m.tablev6, ip)
+	if m.geoipdb == nil {
+		return
 	}
-
+	country, ok = m.geoipdb.GetCountryByAddr(ip)
 	if !ok {
 		country = "??"
 	}
@@ -164,23 +155,10 @@ func (m *Metrics) UpdateCountryStats(addr string, proxyType string, natType stri
 func (m *Metrics) LoadGeoipDatabases(geoipDB string, geoip6DB string) error {
 
 	// Load geoip databases
+	var err error
 	log.Println("Loading geoip databases")
-	tablev4 := new(GeoIPv4Table)
-	err := GeoIPLoadFile(tablev4, geoipDB)
-	if err != nil {
-		m.tablev4 = nil
-		return err
-	}
-	m.tablev4 = tablev4
-
-	tablev6 := new(GeoIPv6Table)
-	err = GeoIPLoadFile(tablev6, geoip6DB)
-	if err != nil {
-		m.tablev6 = nil
-		return err
-	}
-	m.tablev6 = tablev6
-	return nil
+	m.geoipdb, err = geoip.New(geoipDB, geoip6DB)
+	return err
 }
 
 func NewMetrics(metricsLogger *log.Logger) (*Metrics, error) {
diff --git a/broker/snowflake-broker_test.go b/broker/snowflake-broker_test.go
index 233cfea..25a947c 100644
--- a/broker/snowflake-broker_test.go
+++ b/broker/snowflake-broker_test.go
@@ -6,7 +6,6 @@ import (
 	"io"
 	"io/ioutil"
 	"log"
-	"net"
 	"net/http"
 	"net/http/httptest"
 	"os"
@@ -473,106 +472,15 @@ func TestSnowflakeHeap(t *testing.T) {
 	})
 }
 
-func TestGeoip(t *testing.T) {
+func TestInvalidGeoipFile(t *testing.T) {
 	Convey("Geoip", t, func() {
-		tv4 := new(GeoIPv4Table)
-		err := GeoIPLoadFile(tv4, "test_geoip")
-		So(err, ShouldEqual, nil)
-		tv6 := new(GeoIPv6Table)
-		err = GeoIPLoadFile(tv6, "test_geoip6")
-		So(err, ShouldEqual, nil)
-
-		Convey("IPv4 Country Mapping Tests", func() {
-			for _, test := range []struct {
-				addr, cc string
-				ok       bool
-			}{
-				{
-					"129.97.208.23", //uwaterloo
-					"CA",
-					true,
-				},
-				{
-					"127.0.0.1",
-					"",
-					false,
-				},
-				{
-					"255.255.255.255",
-					"",
-					false,
-				},
-				{
-					"0.0.0.0",
-					"",
-					false,
-				},
-				{
-					"223.252.127.255", //test high end of range
-					"JP",
-					true,
-				},
-				{
-					"223.252.127.255", //test low end of range
-					"JP",
-					true,
-				},
-			} {
-				country, ok := GetCountryByAddr(tv4, net.ParseIP(test.addr))
-				So(country, ShouldEqual, test.cc)
-				So(ok, ShouldResemble, test.ok)
-			}
-		})
-
-		Convey("IPv6 Country Mapping Tests", func() {
-			for _, test := range []struct {
-				addr, cc string
-				ok       bool
-			}{
-				{
-					"2620:101:f000:0:250:56ff:fe80:168e", //uwaterloo
-					"CA",
-					true,
-				},
-				{
-					"fd00:0:0:0:0:0:0:1",
-					"",
-					false,
-				},
-				{
-					"0:0:0:0:0:0:0:0",
-					"",
-					false,
-				},
-				{
-					"ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff",
-					"",
-					false,
-				},
-				{
-					"2a07:2e47:ffff:ffff:ffff:ffff:ffff:ffff", //test high end of range
-					"FR",
-					true,
-				},
-				{
-					"2a07:2e40::", //test low end of range
-					"FR",
-					true,
-				},
-			} {
-				country, ok := GetCountryByAddr(tv6, net.ParseIP(test.addr))
-				So(country, ShouldEqual, test.cc)
-				So(ok, ShouldResemble, test.ok)
-			}
-		})
-
 		// Make sure things behave properly if geoip file fails to load
 		ctx := NewBrokerContext(NullLogger())
 		if err := ctx.metrics.LoadGeoipDatabases("invalid_filename", "invalid_filename6"); err != nil {
 			log.Printf("loading geo ip databases returned error: %v", err)
 		}
 		ctx.metrics.UpdateCountryStats("127.0.0.1", "", NATUnrestricted)
-		So(ctx.metrics.tablev4, ShouldEqual, nil)
+		So(ctx.metrics.geoipdb, ShouldEqual, nil)
 
 	})
 }
diff --git a/go.mod b/go.mod
index 36585aa..9d6b6ac 100644
--- a/go.mod
+++ b/go.mod
@@ -16,6 +16,7 @@ require (
 	github.com/smartystreets/goconvey v1.6.4
 	github.com/xtaci/kcp-go/v5 v5.6.1
 	github.com/xtaci/smux v1.5.15
+	gitlab.torproject.org/tpo/anti-censorship/geoip v0.0.0-20210928150955-7ce4b3d98d01
 	golang.org/x/crypto v0.0.0-20210317152858-513c2a44f670
 	golang.org/x/net v0.0.0-20210316092652-d523dce5a7f4
 	golang.org/x/sys v0.0.0-20210317225723-c4fcb01b228e // indirect
diff --git a/go.sum b/go.sum
index f0b3927..34bc936 100644
--- a/go.sum
+++ b/go.sum
@@ -358,6 +358,8 @@ github.com/xtaci/smux v1.5.15 h1:6hMiXswcleXj5oNfcJc+DXS8Vj36XX2LaX98udog6Kc=
 github.com/xtaci/smux v1.5.15/go.mod h1:OMlQbT5vcgl2gb49mFkYo6SMf+zP3rcjcwQz7ZU7IGY=
 github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
 github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
+gitlab.torproject.org/tpo/anti-censorship/geoip v0.0.0-20210928150955-7ce4b3d98d01 h1:4949mHh9Vj2/okk48yG8nhP6TosFWOUfSfSr502sKGE=
+gitlab.torproject.org/tpo/anti-censorship/geoip v0.0.0-20210928150955-7ce4b3d98d01/go.mod h1:K3LOI4H8fa6j+7E10ViHeGEQV10304FG4j94ypmKLjY=
 go.etcd.io/bbolt v1.3.3/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU=
 go.etcd.io/etcd v0.0.0-20191023171146-3cf2f69b5738/go.mod h1:dnLIgRNXwCJa5e+c6mIZCrds/GIG4ncV9HhK5PX7jPg=
 go.opencensus.io v0.20.1/go.mod h1:6WKK9ahsWS3RSO+PY9ZHZUfv2irvY6gN279GOPZjmmk=



More information about the tor-commits mailing list