commit 4396d505a3b872fda43ca6cf43264d0f25cd8e9f Author: meskio meskio@torproject.org Date: Thu Sep 30 12:10:59 2021 +0200
Use tpo geoip library
Now the geoip implmentation has being moved to it's own library to be shared between projects. --- broker/geoip.go | 240 ---------------------------------------- broker/metrics.go | 38 ++----- broker/snowflake-broker_test.go | 96 +--------------- go.mod | 1 + go.sum | 2 + 5 files changed, 13 insertions(+), 364 deletions(-)
diff --git a/broker/geoip.go b/broker/geoip.go deleted file mode 100644 index 708cdad..0000000 --- a/broker/geoip.go +++ /dev/null @@ -1,240 +0,0 @@ -/* -This code is for loading database data that maps ip addresses to countries -for collecting and presenting statistics on snowflake use that might alert us -to censorship events. - -The functions here are heavily based off of how tor maintains and searches their -geoip database - -The tables used for geoip data must be structured as follows: - -Recognized line format for IPv4 is: - INTIPLOW,INTIPHIGH,CC - where INTIPLOW and INTIPHIGH are IPv4 addresses encoded as big-endian 4-byte unsigned - integers, and CC is a country code. - -Note that the IPv4 line format - "INTIPLOW","INTIPHIGH","CC","CC3","COUNTRY NAME" -is not currently supported. - -Recognized line format for IPv6 is: - IPV6LOW,IPV6HIGH,CC - where IPV6LOW and IPV6HIGH are IPv6 addresses and CC is a country code. - -It also recognizes, and skips over, blank lines and lines that start -with '#' (comments). - -*/ -package main - -import ( - "bufio" - "bytes" - "crypto/sha1" - "encoding/hex" - "fmt" - "io" - "log" - "net" - "os" - "sort" - "strconv" - "strings" - "sync" -) - -type GeoIPTable interface { - parseEntry(string) (*GeoIPEntry, error) - Len() int - Append(GeoIPEntry) - ElementAt(int) GeoIPEntry - Lock() - Unlock() -} - -type GeoIPEntry struct { - ipLow net.IP - ipHigh net.IP - country string -} - -type GeoIPv4Table struct { - table []GeoIPEntry - - lock sync.Mutex // synchronization for geoip table accesses and reloads -} - -type GeoIPv6Table struct { - table []GeoIPEntry - - lock sync.Mutex // synchronization for geoip table accesses and reloads -} - -func (table *GeoIPv4Table) Len() int { return len(table.table) } -func (table *GeoIPv6Table) Len() int { return len(table.table) } - -func (table *GeoIPv4Table) Append(entry GeoIPEntry) { - (*table).table = append(table.table, entry) -} -func (table *GeoIPv6Table) Append(entry GeoIPEntry) { - (*table).table = append(table.table, entry) -} - -func (table *GeoIPv4Table) ElementAt(i int) GeoIPEntry { return table.table[i] } -func (table *GeoIPv6Table) ElementAt(i int) GeoIPEntry { return table.table[i] } - -func (table *GeoIPv4Table) Lock() { (*table).lock.Lock() } -func (table *GeoIPv6Table) Lock() { (*table).lock.Lock() } - -func (table *GeoIPv4Table) Unlock() { (*table).lock.Unlock() } -func (table *GeoIPv6Table) Unlock() { (*table).lock.Unlock() } - -// Convert a geoip IP address represented as a big-endian unsigned integer to net.IP -func geoipStringToIP(ipStr string) (net.IP, error) { - ip, err := strconv.ParseUint(ipStr, 10, 32) - if err != nil { - return net.IPv4(0, 0, 0, 0), fmt.Errorf("error parsing IP %s", ipStr) - } - var bytes [4]byte - bytes[0] = byte(ip & 0xFF) - bytes[1] = byte((ip >> 8) & 0xFF) - bytes[2] = byte((ip >> 16) & 0xFF) - bytes[3] = byte((ip >> 24) & 0xFF) - - return net.IPv4(bytes[3], bytes[2], bytes[1], bytes[0]), nil -} - -//Parses a line in the provided geoip file that corresponds -//to an address range and a two character country code -func (table *GeoIPv4Table) parseEntry(candidate string) (*GeoIPEntry, error) { - - if candidate[0] == '#' { - return nil, nil - } - - parsedCandidate := strings.Split(candidate, ",") - - if len(parsedCandidate) != 3 { - return nil, fmt.Errorf("provided geoip file is incorrectly formatted. Could not parse line:\n%s", parsedCandidate) - } - - low, err := geoipStringToIP(parsedCandidate[0]) - if err != nil { - return nil, err - } - high, err := geoipStringToIP(parsedCandidate[1]) - if err != nil { - return nil, err - } - - geoipEntry := &GeoIPEntry{ - ipLow: low, - ipHigh: high, - country: parsedCandidate[2], - } - - return geoipEntry, nil -} - -//Parses a line in the provided geoip file that corresponds -//to an address range and a two character country code -func (table *GeoIPv6Table) parseEntry(candidate string) (*GeoIPEntry, error) { - - if candidate[0] == '#' { - return nil, nil - } - - parsedCandidate := strings.Split(candidate, ",") - - if len(parsedCandidate) != 3 { - return nil, fmt.Errorf("") - } - - low := net.ParseIP(parsedCandidate[0]) - if low == nil { - return nil, fmt.Errorf("") - } - high := net.ParseIP(parsedCandidate[1]) - if high == nil { - return nil, fmt.Errorf("") - } - - geoipEntry := &GeoIPEntry{ - ipLow: low, - ipHigh: high, - country: parsedCandidate[2], - } - - return geoipEntry, nil -} - -//Loads provided geoip file into our tables -//Entries are stored in a table -func GeoIPLoadFile(table GeoIPTable, pathname string) error { - //open file - geoipFile, err := os.Open(pathname) - if err != nil { - return err - } - defer geoipFile.Close() - - hash := sha1.New() - - table.Lock() - defer table.Unlock() - - hashedFile := io.TeeReader(geoipFile, hash) - - //read in strings and call parse function - scanner := bufio.NewScanner(hashedFile) - for scanner.Scan() { - entry, err := table.parseEntry(scanner.Text()) - if err != nil { - return fmt.Errorf("provided geoip file is incorrectly formatted. Line is: %+q", scanner.Text()) - } - - if entry != nil { - table.Append(*entry) - } - - } - if err := scanner.Err(); err != nil { - return err - } - - sha1Hash := hex.EncodeToString(hash.Sum(nil)) - log.Println("Using geoip file ", pathname, " with checksum", sha1Hash) - log.Println("Loaded ", table.Len(), " entries into table") - - return nil -} - -//Returns the country location of an IPv4 or IPv6 address, and a boolean value -//that indicates whether the IP address was present in the geoip database -func GetCountryByAddr(table GeoIPTable, ip net.IP) (string, bool) { - - table.Lock() - defer table.Unlock() - - //look IP up in database - index := sort.Search(table.Len(), func(i int) bool { - entry := table.ElementAt(i) - return (bytes.Compare(ip.To16(), entry.ipHigh.To16()) <= 0) - }) - - if index == table.Len() { - return "", false - } - - // check to see if addr is in the range specified by the returned index - // search on IPs in invalid ranges (e.g., 127.0.0.0/8) will return the - //country code of the next highest range - entry := table.ElementAt(index) - if !(bytes.Compare(ip.To16(), entry.ipLow.To16()) >= 0 && - bytes.Compare(ip.To16(), entry.ipHigh.To16()) <= 0) { - return "", false - } - - return table.ElementAt(index).country, true - -} diff --git a/broker/metrics.go b/broker/metrics.go index e8a6b0c..8229e0f 100644 --- a/broker/metrics.go +++ b/broker/metrics.go @@ -15,6 +15,7 @@ import ( "time"
"github.com/prometheus/client_golang/prometheus" + "gitlab.torproject.org/tpo/anti-censorship/geoip" )
const ( @@ -38,8 +39,7 @@ type CountryStats struct { // Implements Observable type Metrics struct { logger *log.Logger - tablev4 *GeoIPv4Table - tablev6 *GeoIPv6Table + geoipdb *geoip.Geoip
countryStats CountryStats clientRoundtripEstimate time.Duration @@ -115,19 +115,10 @@ func (m *Metrics) UpdateCountryStats(addr string, proxyType string, natType stri }
ip := net.ParseIP(addr) - if ip.To4() != nil { - //This is an IPv4 address - if m.tablev4 == nil { - return - } - country, ok = GetCountryByAddr(m.tablev4, ip) - } else { - if m.tablev6 == nil { - return - } - country, ok = GetCountryByAddr(m.tablev6, ip) + if m.geoipdb == nil { + return } - + country, ok = m.geoipdb.GetCountryByAddr(ip) if !ok { country = "??" } @@ -164,23 +155,10 @@ func (m *Metrics) UpdateCountryStats(addr string, proxyType string, natType stri func (m *Metrics) LoadGeoipDatabases(geoipDB string, geoip6DB string) error {
// Load geoip databases + var err error log.Println("Loading geoip databases") - tablev4 := new(GeoIPv4Table) - err := GeoIPLoadFile(tablev4, geoipDB) - if err != nil { - m.tablev4 = nil - return err - } - m.tablev4 = tablev4 - - tablev6 := new(GeoIPv6Table) - err = GeoIPLoadFile(tablev6, geoip6DB) - if err != nil { - m.tablev6 = nil - return err - } - m.tablev6 = tablev6 - return nil + m.geoipdb, err = geoip.New(geoipDB, geoip6DB) + return err }
func NewMetrics(metricsLogger *log.Logger) (*Metrics, error) { diff --git a/broker/snowflake-broker_test.go b/broker/snowflake-broker_test.go index 233cfea..25a947c 100644 --- a/broker/snowflake-broker_test.go +++ b/broker/snowflake-broker_test.go @@ -6,7 +6,6 @@ import ( "io" "io/ioutil" "log" - "net" "net/http" "net/http/httptest" "os" @@ -473,106 +472,15 @@ func TestSnowflakeHeap(t *testing.T) { }) }
-func TestGeoip(t *testing.T) { +func TestInvalidGeoipFile(t *testing.T) { Convey("Geoip", t, func() { - tv4 := new(GeoIPv4Table) - err := GeoIPLoadFile(tv4, "test_geoip") - So(err, ShouldEqual, nil) - tv6 := new(GeoIPv6Table) - err = GeoIPLoadFile(tv6, "test_geoip6") - So(err, ShouldEqual, nil) - - Convey("IPv4 Country Mapping Tests", func() { - for _, test := range []struct { - addr, cc string - ok bool - }{ - { - "129.97.208.23", //uwaterloo - "CA", - true, - }, - { - "127.0.0.1", - "", - false, - }, - { - "255.255.255.255", - "", - false, - }, - { - "0.0.0.0", - "", - false, - }, - { - "223.252.127.255", //test high end of range - "JP", - true, - }, - { - "223.252.127.255", //test low end of range - "JP", - true, - }, - } { - country, ok := GetCountryByAddr(tv4, net.ParseIP(test.addr)) - So(country, ShouldEqual, test.cc) - So(ok, ShouldResemble, test.ok) - } - }) - - Convey("IPv6 Country Mapping Tests", func() { - for _, test := range []struct { - addr, cc string - ok bool - }{ - { - "2620:101:f000:0:250:56ff:fe80:168e", //uwaterloo - "CA", - true, - }, - { - "fd00:0:0:0:0:0:0:1", - "", - false, - }, - { - "0:0:0:0:0:0:0:0", - "", - false, - }, - { - "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff", - "", - false, - }, - { - "2a07:2e47:ffff:ffff:ffff:ffff:ffff:ffff", //test high end of range - "FR", - true, - }, - { - "2a07:2e40::", //test low end of range - "FR", - true, - }, - } { - country, ok := GetCountryByAddr(tv6, net.ParseIP(test.addr)) - So(country, ShouldEqual, test.cc) - So(ok, ShouldResemble, test.ok) - } - }) - // Make sure things behave properly if geoip file fails to load ctx := NewBrokerContext(NullLogger()) if err := ctx.metrics.LoadGeoipDatabases("invalid_filename", "invalid_filename6"); err != nil { log.Printf("loading geo ip databases returned error: %v", err) } ctx.metrics.UpdateCountryStats("127.0.0.1", "", NATUnrestricted) - So(ctx.metrics.tablev4, ShouldEqual, nil) + So(ctx.metrics.geoipdb, ShouldEqual, nil)
}) } diff --git a/go.mod b/go.mod index 36585aa..9d6b6ac 100644 --- a/go.mod +++ b/go.mod @@ -16,6 +16,7 @@ require ( github.com/smartystreets/goconvey v1.6.4 github.com/xtaci/kcp-go/v5 v5.6.1 github.com/xtaci/smux v1.5.15 + gitlab.torproject.org/tpo/anti-censorship/geoip v0.0.0-20210928150955-7ce4b3d98d01 golang.org/x/crypto v0.0.0-20210317152858-513c2a44f670 golang.org/x/net v0.0.0-20210316092652-d523dce5a7f4 golang.org/x/sys v0.0.0-20210317225723-c4fcb01b228e // indirect diff --git a/go.sum b/go.sum index f0b3927..34bc936 100644 --- a/go.sum +++ b/go.sum @@ -358,6 +358,8 @@ github.com/xtaci/smux v1.5.15 h1:6hMiXswcleXj5oNfcJc+DXS8Vj36XX2LaX98udog6Kc= github.com/xtaci/smux v1.5.15/go.mod h1:OMlQbT5vcgl2gb49mFkYo6SMf+zP3rcjcwQz7ZU7IGY= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +gitlab.torproject.org/tpo/anti-censorship/geoip v0.0.0-20210928150955-7ce4b3d98d01 h1:4949mHh9Vj2/okk48yG8nhP6TosFWOUfSfSr502sKGE= +gitlab.torproject.org/tpo/anti-censorship/geoip v0.0.0-20210928150955-7ce4b3d98d01/go.mod h1:K3LOI4H8fa6j+7E10ViHeGEQV10304FG4j94ypmKLjY= go.etcd.io/bbolt v1.3.3/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= go.etcd.io/etcd v0.0.0-20191023171146-3cf2f69b5738/go.mod h1:dnLIgRNXwCJa5e+c6mIZCrds/GIG4ncV9HhK5PX7jPg= go.opencensus.io v0.20.1/go.mod h1:6WKK9ahsWS3RSO+PY9ZHZUfv2irvY6gN279GOPZjmmk=
tor-commits@lists.torproject.org