[tor-commits] [tor/master] geoip script: add options to output AS numbers.

nickm at torproject.org nickm at torproject.org
Wed Apr 14 14:30:28 UTC 2021


commit e71154428eed922e538e6e61830dd1affe7eb188
Author: Nick Mathewson <nickm at torproject.org>
Date:   Wed Apr 14 10:28:44 2021 -0400

    geoip script: add options to output AS numbers.
    
    The --include-asn option includes AS numbers in the geoip mapping.
    
    The --output-asn option makes the program generate a number-to-name
    mapping file.
    
    Additionally, the script now outputs ?? CC entries for networks that
    are listed but which have no country known.
---
 scripts/maint/geoip/geoip-db-tool/src/db.rs   |  38 +++++----
 scripts/maint/geoip/geoip-db-tool/src/main.rs | 106 ++++++++++++++++++++++----
 2 files changed, 115 insertions(+), 29 deletions(-)

diff --git a/scripts/maint/geoip/geoip-db-tool/src/db.rs b/scripts/maint/geoip/geoip-db-tool/src/db.rs
index eaadd4c612..316182d823 100644
--- a/scripts/maint/geoip/geoip-db-tool/src/db.rs
+++ b/scripts/maint/geoip/geoip-db-tool/src/db.rs
@@ -3,7 +3,7 @@ use std::collections::HashMap;
 use std::convert::TryInto;
 use std::iter::Peekable;
 
-use super::NetBlock;
+use super::{AsBlock, NetBlock};
 
 pub struct BlockReader<I>
 where
@@ -12,9 +12,10 @@ where
     iter: Peekable<I>,
 }
 
-enum AnyBlock {
-    NotNet,
+pub enum AnyBlock {
     NetBlock(NetBlock),
+    AsBlock(AsBlock),
+    OtherBlock,
 }
 
 impl<I> BlockReader<I>
@@ -74,17 +75,31 @@ where
             return None;
         }
 
+        if let Some(name) = kv.remove("name") {
+            // This is an AS block.
+            let asn = kv.get("aut-num").unwrap(); // XXXX handle error better
+            assert!(asn.starts_with("AS"));
+            let asn = asn[2..].parse().unwrap();
+            return Some(Ok(AnyBlock::AsBlock(AsBlock { name, asn })));
+        }
+
         let net = if let Some(net) = kv.get("net") {
             net.parse().unwrap() //XXXX handle the error better.
         } else {
-            return Some(Ok(AnyBlock::NotNet));
+            return Some(Ok(AnyBlock::OtherBlock));
+        };
+
+        let asn = if let Some(asn) = kv.get("aut-num") {
+            asn.parse().ok()
+        } else {
+            None
         };
 
         let cc = if let Some(country) = kv.get("country") {
             assert!(country.as_bytes().len() == 2);
             country.as_bytes()[0..2].try_into().unwrap()
         } else {
-            return Some(Ok(AnyBlock::NotNet));
+            *b"??"
         };
 
         fn is_true(v: Option<&String>) -> bool {
@@ -100,6 +115,7 @@ where
 
         Some(Ok(AnyBlock::NetBlock(NetBlock {
             net,
+            asn,
             cc,
             is_anon_proxy,
             is_anycast,
@@ -112,15 +128,11 @@ impl<I> Iterator for BlockReader<I>
 where
     I: Iterator<Item = std::io::Result<String>>,
 {
-    type Item = NetBlock;
+    type Item = AnyBlock;
     fn next(&mut self) -> Option<Self::Item> {
-        loop {
-            match self.get_block() {
-                None => return None,
-                Some(Err(_)) => return None,
-                Some(Ok(AnyBlock::NotNet)) => continue,
-                Some(Ok(AnyBlock::NetBlock(n))) => return Some(n),
-            }
+        match self.get_block() {
+            Some(Ok(b)) => Some(b),
+            _ => None,
         }
     }
 }
diff --git a/scripts/maint/geoip/geoip-db-tool/src/main.rs b/scripts/maint/geoip/geoip-db-tool/src/main.rs
index 38d70f7e1b..9a22598a35 100644
--- a/scripts/maint/geoip/geoip-db-tool/src/main.rs
+++ b/scripts/maint/geoip/geoip-db-tool/src/main.rs
@@ -9,7 +9,8 @@ use rangemap::RangeInclusiveMap;
 use std::fs::File;
 use std::io::{BufRead, BufReader, BufWriter, Write};
 use std::net::{IpAddr, Ipv6Addr};
-use std::path::{Path, PathBuf};
+use std::num::NonZeroU32;
+use std::path::PathBuf;
 
 fn default_ipv4_path() -> PathBuf {
     "./geoip".into()
@@ -32,6 +33,14 @@ struct Args {
     /// where to find the dump file
     #[argh(option, short = 'i')]
     input: PathBuf,
+
+    /// whether to include AS information in our output
+    #[argh(switch)]
+    include_asn: bool,
+
+    /// where to store the AS map.
+    #[argh(option)]
+    output_asn: Option<PathBuf>,
 }
 
 /// Represents a network block from running `location dump`.
@@ -39,11 +48,19 @@ struct Args {
 pub struct NetBlock {
     pub net: IpNetwork,
     pub cc: [u8; 2],
+    pub asn: Option<NonZeroU32>,
     pub is_anon_proxy: bool,
     pub is_anycast: bool,
     pub is_satellite: bool,
 }
 
+/// Represents an AS definition from running `location dump`.
+#[derive(Debug, Clone, Ord, PartialOrd, Eq, PartialEq)]
+pub struct AsBlock {
+    pub asn: NonZeroU32,
+    pub name: String,
+}
+
 impl PartialEq for NetBlock {
     fn eq(&self, other: &Self) -> bool {
         self.net == other.net
@@ -69,6 +86,40 @@ impl PartialOrd for NetBlock {
 
 impl Eq for NetBlock {}
 
+#[derive(Copy, Clone, Eq, PartialEq, Debug)]
+struct NetDefn {
+    cc: [u8; 2],
+    asn: Option<NonZeroU32>,
+}
+
+impl NetBlock {
+    fn into_defn(self, include_asn: bool) -> NetDefn {
+        if include_asn {
+            NetDefn {
+                cc: self.cc,
+                asn: self.asn,
+            }
+        } else {
+            NetDefn {
+                cc: self.cc,
+                asn: None,
+            }
+        }
+    }
+}
+
+impl NetDefn {
+    fn cc(&self) -> &str {
+        std::str::from_utf8(&self.cc).unwrap()
+    }
+    fn asn(&self) -> u32 {
+        match self.asn {
+            Some(v) => v.into(),
+            None => 0,
+        }
+    }
+}
+
 const PROLOGUE: &str = "\
 # This file has been converted from the IPFire Location database
 # using Tor's geoip-db-tool.  For more information on the data, see
@@ -82,16 +133,26 @@ const PROLOGUE: &str = "\
 ///
 /// This code tries to be "efficient enough"; most of the logic is handled by
 /// using the rangemap crate.
-fn convert(input: &Path, output_v4: &Path, output_v6: &Path) -> std::io::Result<()> {
+fn convert(args: Args) -> std::io::Result<()> {
+    let input = args.input.as_path();
+    let output_v4 = args.output_ipv4.as_path();
+    let output_v6 = args.output_ipv6.as_path();
+    let include_asn = args.include_asn;
+
     let f = File::open(input)?;
     let f = BufReader::new(f);
     let mut blocks = Vec::new();
+    let mut networks = Vec::new();
 
     let mut reader = db::BlockReader::new(f.lines());
     let hdr = reader.extract_header();
     // Read blocks, and then sort them by specificity and address.
     for nb in reader {
-        blocks.push(nb);
+        match nb {
+            db::AnyBlock::AsBlock(a) => networks.push(a),
+            db::AnyBlock::NetBlock(n) => blocks.push(n),
+            _ => {}
+        }
     }
     blocks.sort();
 
@@ -104,8 +165,8 @@ fn convert(input: &Path, output_v4: &Path, output_v6: &Path) -> std::io::Result<
     //
     // We use u32 and u128 as the index types for these RangeInclusiveMaps,
     // so that we don't need to implement a step function for IpAddr.
-    let mut v4map: RangeInclusiveMap<u32, [u8; 2], _> = RangeInclusiveMap::new();
-    let mut v6map: RangeInclusiveMap<u128, [u8; 2], _> = RangeInclusiveMap::new();
+    let mut v4map: RangeInclusiveMap<u32, NetDefn, _> = RangeInclusiveMap::new();
+    let mut v6map: RangeInclusiveMap<u128, NetDefn, _> = RangeInclusiveMap::new();
 
     let mut n = 0usize;
     let num_blocks = blocks.len();
@@ -118,10 +179,10 @@ fn convert(input: &Path, output_v4: &Path, output_v6: &Path) -> std::io::Result<
         let end = nb.net.broadcast();
         match (start, end) {
             (IpAddr::V4(a), IpAddr::V4(b)) => {
-                v4map.insert(a.into()..=b.into(), nb.cc);
+                v4map.insert(a.into()..=b.into(), nb.into_defn(include_asn));
             }
             (IpAddr::V6(a), IpAddr::V6(b)) => {
-                v6map.insert(a.into()..=b.into(), nb.cc);
+                v6map.insert(a.into()..=b.into(), nb.into_defn(include_asn));
             }
             (_, _) => panic!("network started and ended in different families!?"),
         }
@@ -133,33 +194,46 @@ fn convert(input: &Path, output_v4: &Path, output_v6: &Path) -> std::io::Result<
 
     v4.write_all(PROLOGUE.as_bytes())?;
     v4.write_all(hdr.as_bytes())?;
-    for (r, cc) in v4map.iter() {
+    for (r, defn) in v4map.iter() {
         let a: u32 = *r.start();
         let b: u32 = *r.end();
-        writeln!(&mut v4, "{},{},{}", a, b, std::str::from_utf8(cc).unwrap())?;
+        if include_asn {
+            writeln!(&mut v4, "{},{},{},{}", a, b, defn.cc(), defn.asn())?;
+        } else {
+            writeln!(&mut v4, "{},{},{}", a, b, defn.cc())?;
+        }
     }
 
     v6.write_all(PROLOGUE.as_bytes())?;
     v6.write_all(hdr.as_bytes())?;
-    for (r, cc) in v6map.iter() {
+    for (r, defn) in v6map.iter() {
         let a: Ipv6Addr = (*r.start()).into();
         let b: Ipv6Addr = (*r.end()).into();
-        writeln!(&mut v6, "{},{},{}", a, b, std::str::from_utf8(cc).unwrap())?;
+        if include_asn {
+            writeln!(&mut v6, "{},{},{},{}", a, b, defn.cc(), defn.asn())?;
+        } else {
+            writeln!(&mut v6, "{},{},{}", a, b, defn.cc())?;
+        }
     }
 
     // The documentation says you should always flush a BufWriter.
     v4.flush()?;
     v6.flush()?;
 
+    if let Some(output_asn) = args.output_asn {
+        networks.sort();
+        let mut asn = BufWriter::new(File::create(output_asn)?);
+        for net in networks {
+            writeln!(&mut asn, "{},{}", net.asn, net.name)?;
+        }
+        asn.flush()?;
+    }
+
     Ok(())
 }
 
 fn main() -> std::io::Result<()> {
     let args: Args = argh::from_env();
 
-    convert(
-        args.input.as_path(),
-        args.output_ipv4.as_path(),
-        args.output_ipv6.as_path(),
-    )
+    convert(args)
 }



More information about the tor-commits mailing list