geoip script: add options to output AS numbers.

The --include-asn option includes AS numbers in the geoip mapping.

The --output-asn option makes the program generate a number-to-name
mapping file.

Additionally, the script now outputs ?? CC entries for networks that
are listed but which have no country known.
This commit is contained in:
Nick Mathewson 2021-04-14 10:28:44 -04:00
parent 91569c4dad
commit e71154428e
2 changed files with 115 additions and 29 deletions

View File

@ -3,7 +3,7 @@ use std::collections::HashMap;
use std::convert::TryInto;
use std::iter::Peekable;
use super::NetBlock;
use super::{AsBlock, NetBlock};
pub struct BlockReader<I>
where
@ -12,9 +12,10 @@ where
iter: Peekable<I>,
}
enum AnyBlock {
NotNet,
pub enum AnyBlock {
NetBlock(NetBlock),
AsBlock(AsBlock),
OtherBlock,
}
impl<I> BlockReader<I>
@ -74,17 +75,31 @@ where
return None;
}
if let Some(name) = kv.remove("name") {
// This is an AS block.
let asn = kv.get("aut-num").unwrap(); // XXXX handle error better
assert!(asn.starts_with("AS"));
let asn = asn[2..].parse().unwrap();
return Some(Ok(AnyBlock::AsBlock(AsBlock { name, asn })));
}
let net = if let Some(net) = kv.get("net") {
net.parse().unwrap() //XXXX handle the error better.
} else {
return Some(Ok(AnyBlock::NotNet));
return Some(Ok(AnyBlock::OtherBlock));
};
let asn = if let Some(asn) = kv.get("aut-num") {
asn.parse().ok()
} else {
None
};
let cc = if let Some(country) = kv.get("country") {
assert!(country.as_bytes().len() == 2);
country.as_bytes()[0..2].try_into().unwrap()
} else {
return Some(Ok(AnyBlock::NotNet));
*b"??"
};
fn is_true(v: Option<&String>) -> bool {
@ -100,6 +115,7 @@ where
Some(Ok(AnyBlock::NetBlock(NetBlock {
net,
asn,
cc,
is_anon_proxy,
is_anycast,
@ -112,15 +128,11 @@ impl<I> Iterator for BlockReader<I>
where
I: Iterator<Item = std::io::Result<String>>,
{
type Item = NetBlock;
type Item = AnyBlock;
fn next(&mut self) -> Option<Self::Item> {
loop {
match self.get_block() {
None => return None,
Some(Err(_)) => return None,
Some(Ok(AnyBlock::NotNet)) => continue,
Some(Ok(AnyBlock::NetBlock(n))) => return Some(n),
}
match self.get_block() {
Some(Ok(b)) => Some(b),
_ => None,
}
}
}

View File

@ -9,7 +9,8 @@ use rangemap::RangeInclusiveMap;
use std::fs::File;
use std::io::{BufRead, BufReader, BufWriter, Write};
use std::net::{IpAddr, Ipv6Addr};
use std::path::{Path, PathBuf};
use std::num::NonZeroU32;
use std::path::PathBuf;
fn default_ipv4_path() -> PathBuf {
"./geoip".into()
@ -32,6 +33,14 @@ struct Args {
/// where to find the dump file
#[argh(option, short = 'i')]
input: PathBuf,
/// whether to include AS information in our output
#[argh(switch)]
include_asn: bool,
/// where to store the AS map.
#[argh(option)]
output_asn: Option<PathBuf>,
}
/// Represents a network block from running `location dump`.
@ -39,11 +48,19 @@ struct Args {
pub struct NetBlock {
pub net: IpNetwork,
pub cc: [u8; 2],
pub asn: Option<NonZeroU32>,
pub is_anon_proxy: bool,
pub is_anycast: bool,
pub is_satellite: bool,
}
/// Represents an AS definition from running `location dump`.
#[derive(Debug, Clone, Ord, PartialOrd, Eq, PartialEq)]
pub struct AsBlock {
pub asn: NonZeroU32,
pub name: String,
}
impl PartialEq for NetBlock {
fn eq(&self, other: &Self) -> bool {
self.net == other.net
@ -69,6 +86,40 @@ impl PartialOrd for NetBlock {
impl Eq for NetBlock {}
#[derive(Copy, Clone, Eq, PartialEq, Debug)]
struct NetDefn {
cc: [u8; 2],
asn: Option<NonZeroU32>,
}
impl NetBlock {
fn into_defn(self, include_asn: bool) -> NetDefn {
if include_asn {
NetDefn {
cc: self.cc,
asn: self.asn,
}
} else {
NetDefn {
cc: self.cc,
asn: None,
}
}
}
}
impl NetDefn {
fn cc(&self) -> &str {
std::str::from_utf8(&self.cc).unwrap()
}
fn asn(&self) -> u32 {
match self.asn {
Some(v) => v.into(),
None => 0,
}
}
}
const PROLOGUE: &str = "\
# This file has been converted from the IPFire Location database
# using Tor's geoip-db-tool. For more information on the data, see
@ -82,16 +133,26 @@ const PROLOGUE: &str = "\
///
/// This code tries to be "efficient enough"; most of the logic is handled by
/// using the rangemap crate.
fn convert(input: &Path, output_v4: &Path, output_v6: &Path) -> std::io::Result<()> {
fn convert(args: Args) -> std::io::Result<()> {
let input = args.input.as_path();
let output_v4 = args.output_ipv4.as_path();
let output_v6 = args.output_ipv6.as_path();
let include_asn = args.include_asn;
let f = File::open(input)?;
let f = BufReader::new(f);
let mut blocks = Vec::new();
let mut networks = Vec::new();
let mut reader = db::BlockReader::new(f.lines());
let hdr = reader.extract_header();
// Read blocks, and then sort them by specificity and address.
for nb in reader {
blocks.push(nb);
match nb {
db::AnyBlock::AsBlock(a) => networks.push(a),
db::AnyBlock::NetBlock(n) => blocks.push(n),
_ => {}
}
}
blocks.sort();
@ -104,8 +165,8 @@ fn convert(input: &Path, output_v4: &Path, output_v6: &Path) -> std::io::Result<
//
// We use u32 and u128 as the index types for these RangeInclusiveMaps,
// so that we don't need to implement a step function for IpAddr.
let mut v4map: RangeInclusiveMap<u32, [u8; 2], _> = RangeInclusiveMap::new();
let mut v6map: RangeInclusiveMap<u128, [u8; 2], _> = RangeInclusiveMap::new();
let mut v4map: RangeInclusiveMap<u32, NetDefn, _> = RangeInclusiveMap::new();
let mut v6map: RangeInclusiveMap<u128, NetDefn, _> = RangeInclusiveMap::new();
let mut n = 0usize;
let num_blocks = blocks.len();
@ -118,10 +179,10 @@ fn convert(input: &Path, output_v4: &Path, output_v6: &Path) -> std::io::Result<
let end = nb.net.broadcast();
match (start, end) {
(IpAddr::V4(a), IpAddr::V4(b)) => {
v4map.insert(a.into()..=b.into(), nb.cc);
v4map.insert(a.into()..=b.into(), nb.into_defn(include_asn));
}
(IpAddr::V6(a), IpAddr::V6(b)) => {
v6map.insert(a.into()..=b.into(), nb.cc);
v6map.insert(a.into()..=b.into(), nb.into_defn(include_asn));
}
(_, _) => panic!("network started and ended in different families!?"),
}
@ -133,33 +194,46 @@ fn convert(input: &Path, output_v4: &Path, output_v6: &Path) -> std::io::Result<
v4.write_all(PROLOGUE.as_bytes())?;
v4.write_all(hdr.as_bytes())?;
for (r, cc) in v4map.iter() {
for (r, defn) in v4map.iter() {
let a: u32 = *r.start();
let b: u32 = *r.end();
writeln!(&mut v4, "{},{},{}", a, b, std::str::from_utf8(cc).unwrap())?;
if include_asn {
writeln!(&mut v4, "{},{},{},{}", a, b, defn.cc(), defn.asn())?;
} else {
writeln!(&mut v4, "{},{},{}", a, b, defn.cc())?;
}
}
v6.write_all(PROLOGUE.as_bytes())?;
v6.write_all(hdr.as_bytes())?;
for (r, cc) in v6map.iter() {
for (r, defn) in v6map.iter() {
let a: Ipv6Addr = (*r.start()).into();
let b: Ipv6Addr = (*r.end()).into();
writeln!(&mut v6, "{},{},{}", a, b, std::str::from_utf8(cc).unwrap())?;
if include_asn {
writeln!(&mut v6, "{},{},{},{}", a, b, defn.cc(), defn.asn())?;
} else {
writeln!(&mut v6, "{},{},{}", a, b, defn.cc())?;
}
}
// The documentation says you should always flush a BufWriter.
v4.flush()?;
v6.flush()?;
if let Some(output_asn) = args.output_asn {
networks.sort();
let mut asn = BufWriter::new(File::create(output_asn)?);
for net in networks {
writeln!(&mut asn, "{},{}", net.asn, net.name)?;
}
asn.flush()?;
}
Ok(())
}
fn main() -> std::io::Result<()> {
let args: Args = argh::from_env();
convert(
args.input.as_path(),
args.output_ipv4.as_path(),
args.output_ipv6.as_path(),
)
convert(args)
}