commit e71154428eed922e538e6e61830dd1affe7eb188 Author: Nick Mathewson nickm@torproject.org Date: Wed Apr 14 10:28:44 2021 -0400
geoip script: add options to output AS numbers.
The --include-asn option includes AS numbers in the geoip mapping.
The --output-asn option makes the program generate a number-to-name mapping file.
Additionally, the script now outputs ?? CC entries for networks that are listed but which have no country known. --- scripts/maint/geoip/geoip-db-tool/src/db.rs | 38 +++++---- scripts/maint/geoip/geoip-db-tool/src/main.rs | 106 ++++++++++++++++++++++---- 2 files changed, 115 insertions(+), 29 deletions(-)
diff --git a/scripts/maint/geoip/geoip-db-tool/src/db.rs b/scripts/maint/geoip/geoip-db-tool/src/db.rs index eaadd4c612..316182d823 100644 --- a/scripts/maint/geoip/geoip-db-tool/src/db.rs +++ b/scripts/maint/geoip/geoip-db-tool/src/db.rs @@ -3,7 +3,7 @@ use std::collections::HashMap; use std::convert::TryInto; use std::iter::Peekable;
-use super::NetBlock; +use super::{AsBlock, NetBlock};
pub struct BlockReader<I> where @@ -12,9 +12,10 @@ where iter: Peekable<I>, }
-enum AnyBlock { - NotNet, +pub enum AnyBlock { NetBlock(NetBlock), + AsBlock(AsBlock), + OtherBlock, }
impl<I> BlockReader<I> @@ -74,17 +75,31 @@ where return None; }
+ if let Some(name) = kv.remove("name") { + // This is an AS block. + let asn = kv.get("aut-num").unwrap(); // XXXX handle error better + assert!(asn.starts_with("AS")); + let asn = asn[2..].parse().unwrap(); + return Some(Ok(AnyBlock::AsBlock(AsBlock { name, asn }))); + } + let net = if let Some(net) = kv.get("net") { net.parse().unwrap() //XXXX handle the error better. } else { - return Some(Ok(AnyBlock::NotNet)); + return Some(Ok(AnyBlock::OtherBlock)); + }; + + let asn = if let Some(asn) = kv.get("aut-num") { + asn.parse().ok() + } else { + None };
let cc = if let Some(country) = kv.get("country") { assert!(country.as_bytes().len() == 2); country.as_bytes()[0..2].try_into().unwrap() } else { - return Some(Ok(AnyBlock::NotNet)); + *b"??" };
fn is_true(v: Option<&String>) -> bool { @@ -100,6 +115,7 @@ where
Some(Ok(AnyBlock::NetBlock(NetBlock { net, + asn, cc, is_anon_proxy, is_anycast, @@ -112,15 +128,11 @@ impl<I> Iterator for BlockReader<I> where I: Iterator<Item = std::io::Result<String>>, { - type Item = NetBlock; + type Item = AnyBlock; fn next(&mut self) -> OptionSelf::Item { - loop { - match self.get_block() { - None => return None, - Some(Err(_)) => return None, - Some(Ok(AnyBlock::NotNet)) => continue, - Some(Ok(AnyBlock::NetBlock(n))) => return Some(n), - } + match self.get_block() { + Some(Ok(b)) => Some(b), + _ => None, } } } diff --git a/scripts/maint/geoip/geoip-db-tool/src/main.rs b/scripts/maint/geoip/geoip-db-tool/src/main.rs index 38d70f7e1b..9a22598a35 100644 --- a/scripts/maint/geoip/geoip-db-tool/src/main.rs +++ b/scripts/maint/geoip/geoip-db-tool/src/main.rs @@ -9,7 +9,8 @@ use rangemap::RangeInclusiveMap; use std::fs::File; use std::io::{BufRead, BufReader, BufWriter, Write}; use std::net::{IpAddr, Ipv6Addr}; -use std::path::{Path, PathBuf}; +use std::num::NonZeroU32; +use std::path::PathBuf;
fn default_ipv4_path() -> PathBuf { "./geoip".into() @@ -32,6 +33,14 @@ struct Args { /// where to find the dump file #[argh(option, short = 'i')] input: PathBuf, + + /// whether to include AS information in our output + #[argh(switch)] + include_asn: bool, + + /// where to store the AS map. + #[argh(option)] + output_asn: Option<PathBuf>, }
/// Represents a network block from running `location dump`. @@ -39,11 +48,19 @@ struct Args { pub struct NetBlock { pub net: IpNetwork, pub cc: [u8; 2], + pub asn: Option<NonZeroU32>, pub is_anon_proxy: bool, pub is_anycast: bool, pub is_satellite: bool, }
+/// Represents an AS definition from running `location dump`. +#[derive(Debug, Clone, Ord, PartialOrd, Eq, PartialEq)] +pub struct AsBlock { + pub asn: NonZeroU32, + pub name: String, +} + impl PartialEq for NetBlock { fn eq(&self, other: &Self) -> bool { self.net == other.net @@ -69,6 +86,40 @@ impl PartialOrd for NetBlock {
impl Eq for NetBlock {}
+#[derive(Copy, Clone, Eq, PartialEq, Debug)] +struct NetDefn { + cc: [u8; 2], + asn: Option<NonZeroU32>, +} + +impl NetBlock { + fn into_defn(self, include_asn: bool) -> NetDefn { + if include_asn { + NetDefn { + cc: self.cc, + asn: self.asn, + } + } else { + NetDefn { + cc: self.cc, + asn: None, + } + } + } +} + +impl NetDefn { + fn cc(&self) -> &str { + std::str::from_utf8(&self.cc).unwrap() + } + fn asn(&self) -> u32 { + match self.asn { + Some(v) => v.into(), + None => 0, + } + } +} + const PROLOGUE: &str = "\ # This file has been converted from the IPFire Location database # using Tor's geoip-db-tool. For more information on the data, see @@ -82,16 +133,26 @@ const PROLOGUE: &str = "\ /// /// This code tries to be "efficient enough"; most of the logic is handled by /// using the rangemap crate. -fn convert(input: &Path, output_v4: &Path, output_v6: &Path) -> std::io::Result<()> { +fn convert(args: Args) -> std::io::Result<()> { + let input = args.input.as_path(); + let output_v4 = args.output_ipv4.as_path(); + let output_v6 = args.output_ipv6.as_path(); + let include_asn = args.include_asn; + let f = File::open(input)?; let f = BufReader::new(f); let mut blocks = Vec::new(); + let mut networks = Vec::new();
let mut reader = db::BlockReader::new(f.lines()); let hdr = reader.extract_header(); // Read blocks, and then sort them by specificity and address. for nb in reader { - blocks.push(nb); + match nb { + db::AnyBlock::AsBlock(a) => networks.push(a), + db::AnyBlock::NetBlock(n) => blocks.push(n), + _ => {} + } } blocks.sort();
@@ -104,8 +165,8 @@ fn convert(input: &Path, output_v4: &Path, output_v6: &Path) -> std::io::Result< // // We use u32 and u128 as the index types for these RangeInclusiveMaps, // so that we don't need to implement a step function for IpAddr. - let mut v4map: RangeInclusiveMap<u32, [u8; 2], _> = RangeInclusiveMap::new(); - let mut v6map: RangeInclusiveMap<u128, [u8; 2], _> = RangeInclusiveMap::new(); + let mut v4map: RangeInclusiveMap<u32, NetDefn, _> = RangeInclusiveMap::new(); + let mut v6map: RangeInclusiveMap<u128, NetDefn, _> = RangeInclusiveMap::new();
let mut n = 0usize; let num_blocks = blocks.len(); @@ -118,10 +179,10 @@ fn convert(input: &Path, output_v4: &Path, output_v6: &Path) -> std::io::Result< let end = nb.net.broadcast(); match (start, end) { (IpAddr::V4(a), IpAddr::V4(b)) => { - v4map.insert(a.into()..=b.into(), nb.cc); + v4map.insert(a.into()..=b.into(), nb.into_defn(include_asn)); } (IpAddr::V6(a), IpAddr::V6(b)) => { - v6map.insert(a.into()..=b.into(), nb.cc); + v6map.insert(a.into()..=b.into(), nb.into_defn(include_asn)); } (_, _) => panic!("network started and ended in different families!?"), } @@ -133,33 +194,46 @@ fn convert(input: &Path, output_v4: &Path, output_v6: &Path) -> std::io::Result<
v4.write_all(PROLOGUE.as_bytes())?; v4.write_all(hdr.as_bytes())?; - for (r, cc) in v4map.iter() { + for (r, defn) in v4map.iter() { let a: u32 = *r.start(); let b: u32 = *r.end(); - writeln!(&mut v4, "{},{},{}", a, b, std::str::from_utf8(cc).unwrap())?; + if include_asn { + writeln!(&mut v4, "{},{},{},{}", a, b, defn.cc(), defn.asn())?; + } else { + writeln!(&mut v4, "{},{},{}", a, b, defn.cc())?; + } }
v6.write_all(PROLOGUE.as_bytes())?; v6.write_all(hdr.as_bytes())?; - for (r, cc) in v6map.iter() { + for (r, defn) in v6map.iter() { let a: Ipv6Addr = (*r.start()).into(); let b: Ipv6Addr = (*r.end()).into(); - writeln!(&mut v6, "{},{},{}", a, b, std::str::from_utf8(cc).unwrap())?; + if include_asn { + writeln!(&mut v6, "{},{},{},{}", a, b, defn.cc(), defn.asn())?; + } else { + writeln!(&mut v6, "{},{},{}", a, b, defn.cc())?; + } }
// The documentation says you should always flush a BufWriter. v4.flush()?; v6.flush()?;
+ if let Some(output_asn) = args.output_asn { + networks.sort(); + let mut asn = BufWriter::new(File::create(output_asn)?); + for net in networks { + writeln!(&mut asn, "{},{}", net.asn, net.name)?; + } + asn.flush()?; + } + Ok(()) }
fn main() -> std::io::Result<()> { let args: Args = argh::from_env();
- convert( - args.input.as_path(), - args.output_ipv4.as_path(), - args.output_ipv6.as_path(), - ) + convert(args) }