mirror of
https://gitlab.torproject.org/tpo/core/tor.git
synced 2024-11-10 13:13:44 +01:00
geoip script: add options to output AS numbers.
The --include-asn option includes AS numbers in the geoip mapping. The --output-asn option makes the program generate a number-to-name mapping file. Additionally, the script now outputs ?? CC entries for networks that are listed but which have no country known.
This commit is contained in:
parent
91569c4dad
commit
e71154428e
@ -3,7 +3,7 @@ use std::collections::HashMap;
|
||||
use std::convert::TryInto;
|
||||
use std::iter::Peekable;
|
||||
|
||||
use super::NetBlock;
|
||||
use super::{AsBlock, NetBlock};
|
||||
|
||||
pub struct BlockReader<I>
|
||||
where
|
||||
@ -12,9 +12,10 @@ where
|
||||
iter: Peekable<I>,
|
||||
}
|
||||
|
||||
enum AnyBlock {
|
||||
NotNet,
|
||||
pub enum AnyBlock {
|
||||
NetBlock(NetBlock),
|
||||
AsBlock(AsBlock),
|
||||
OtherBlock,
|
||||
}
|
||||
|
||||
impl<I> BlockReader<I>
|
||||
@ -74,17 +75,31 @@ where
|
||||
return None;
|
||||
}
|
||||
|
||||
if let Some(name) = kv.remove("name") {
|
||||
// This is an AS block.
|
||||
let asn = kv.get("aut-num").unwrap(); // XXXX handle error better
|
||||
assert!(asn.starts_with("AS"));
|
||||
let asn = asn[2..].parse().unwrap();
|
||||
return Some(Ok(AnyBlock::AsBlock(AsBlock { name, asn })));
|
||||
}
|
||||
|
||||
let net = if let Some(net) = kv.get("net") {
|
||||
net.parse().unwrap() //XXXX handle the error better.
|
||||
} else {
|
||||
return Some(Ok(AnyBlock::NotNet));
|
||||
return Some(Ok(AnyBlock::OtherBlock));
|
||||
};
|
||||
|
||||
let asn = if let Some(asn) = kv.get("aut-num") {
|
||||
asn.parse().ok()
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let cc = if let Some(country) = kv.get("country") {
|
||||
assert!(country.as_bytes().len() == 2);
|
||||
country.as_bytes()[0..2].try_into().unwrap()
|
||||
} else {
|
||||
return Some(Ok(AnyBlock::NotNet));
|
||||
*b"??"
|
||||
};
|
||||
|
||||
fn is_true(v: Option<&String>) -> bool {
|
||||
@ -100,6 +115,7 @@ where
|
||||
|
||||
Some(Ok(AnyBlock::NetBlock(NetBlock {
|
||||
net,
|
||||
asn,
|
||||
cc,
|
||||
is_anon_proxy,
|
||||
is_anycast,
|
||||
@ -112,15 +128,11 @@ impl<I> Iterator for BlockReader<I>
|
||||
where
|
||||
I: Iterator<Item = std::io::Result<String>>,
|
||||
{
|
||||
type Item = NetBlock;
|
||||
type Item = AnyBlock;
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
loop {
|
||||
match self.get_block() {
|
||||
None => return None,
|
||||
Some(Err(_)) => return None,
|
||||
Some(Ok(AnyBlock::NotNet)) => continue,
|
||||
Some(Ok(AnyBlock::NetBlock(n))) => return Some(n),
|
||||
}
|
||||
match self.get_block() {
|
||||
Some(Ok(b)) => Some(b),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -9,7 +9,8 @@ use rangemap::RangeInclusiveMap;
|
||||
use std::fs::File;
|
||||
use std::io::{BufRead, BufReader, BufWriter, Write};
|
||||
use std::net::{IpAddr, Ipv6Addr};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::num::NonZeroU32;
|
||||
use std::path::PathBuf;
|
||||
|
||||
fn default_ipv4_path() -> PathBuf {
|
||||
"./geoip".into()
|
||||
@ -32,6 +33,14 @@ struct Args {
|
||||
/// where to find the dump file
|
||||
#[argh(option, short = 'i')]
|
||||
input: PathBuf,
|
||||
|
||||
/// whether to include AS information in our output
|
||||
#[argh(switch)]
|
||||
include_asn: bool,
|
||||
|
||||
/// where to store the AS map.
|
||||
#[argh(option)]
|
||||
output_asn: Option<PathBuf>,
|
||||
}
|
||||
|
||||
/// Represents a network block from running `location dump`.
|
||||
@ -39,11 +48,19 @@ struct Args {
|
||||
pub struct NetBlock {
|
||||
pub net: IpNetwork,
|
||||
pub cc: [u8; 2],
|
||||
pub asn: Option<NonZeroU32>,
|
||||
pub is_anon_proxy: bool,
|
||||
pub is_anycast: bool,
|
||||
pub is_satellite: bool,
|
||||
}
|
||||
|
||||
/// Represents an AS definition from running `location dump`.
|
||||
#[derive(Debug, Clone, Ord, PartialOrd, Eq, PartialEq)]
|
||||
pub struct AsBlock {
|
||||
pub asn: NonZeroU32,
|
||||
pub name: String,
|
||||
}
|
||||
|
||||
impl PartialEq for NetBlock {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.net == other.net
|
||||
@ -69,6 +86,40 @@ impl PartialOrd for NetBlock {
|
||||
|
||||
impl Eq for NetBlock {}
|
||||
|
||||
#[derive(Copy, Clone, Eq, PartialEq, Debug)]
|
||||
struct NetDefn {
|
||||
cc: [u8; 2],
|
||||
asn: Option<NonZeroU32>,
|
||||
}
|
||||
|
||||
impl NetBlock {
|
||||
fn into_defn(self, include_asn: bool) -> NetDefn {
|
||||
if include_asn {
|
||||
NetDefn {
|
||||
cc: self.cc,
|
||||
asn: self.asn,
|
||||
}
|
||||
} else {
|
||||
NetDefn {
|
||||
cc: self.cc,
|
||||
asn: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl NetDefn {
|
||||
fn cc(&self) -> &str {
|
||||
std::str::from_utf8(&self.cc).unwrap()
|
||||
}
|
||||
fn asn(&self) -> u32 {
|
||||
match self.asn {
|
||||
Some(v) => v.into(),
|
||||
None => 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const PROLOGUE: &str = "\
|
||||
# This file has been converted from the IPFire Location database
|
||||
# using Tor's geoip-db-tool. For more information on the data, see
|
||||
@ -82,16 +133,26 @@ const PROLOGUE: &str = "\
|
||||
///
|
||||
/// This code tries to be "efficient enough"; most of the logic is handled by
|
||||
/// using the rangemap crate.
|
||||
fn convert(input: &Path, output_v4: &Path, output_v6: &Path) -> std::io::Result<()> {
|
||||
fn convert(args: Args) -> std::io::Result<()> {
|
||||
let input = args.input.as_path();
|
||||
let output_v4 = args.output_ipv4.as_path();
|
||||
let output_v6 = args.output_ipv6.as_path();
|
||||
let include_asn = args.include_asn;
|
||||
|
||||
let f = File::open(input)?;
|
||||
let f = BufReader::new(f);
|
||||
let mut blocks = Vec::new();
|
||||
let mut networks = Vec::new();
|
||||
|
||||
let mut reader = db::BlockReader::new(f.lines());
|
||||
let hdr = reader.extract_header();
|
||||
// Read blocks, and then sort them by specificity and address.
|
||||
for nb in reader {
|
||||
blocks.push(nb);
|
||||
match nb {
|
||||
db::AnyBlock::AsBlock(a) => networks.push(a),
|
||||
db::AnyBlock::NetBlock(n) => blocks.push(n),
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
blocks.sort();
|
||||
|
||||
@ -104,8 +165,8 @@ fn convert(input: &Path, output_v4: &Path, output_v6: &Path) -> std::io::Result<
|
||||
//
|
||||
// We use u32 and u128 as the index types for these RangeInclusiveMaps,
|
||||
// so that we don't need to implement a step function for IpAddr.
|
||||
let mut v4map: RangeInclusiveMap<u32, [u8; 2], _> = RangeInclusiveMap::new();
|
||||
let mut v6map: RangeInclusiveMap<u128, [u8; 2], _> = RangeInclusiveMap::new();
|
||||
let mut v4map: RangeInclusiveMap<u32, NetDefn, _> = RangeInclusiveMap::new();
|
||||
let mut v6map: RangeInclusiveMap<u128, NetDefn, _> = RangeInclusiveMap::new();
|
||||
|
||||
let mut n = 0usize;
|
||||
let num_blocks = blocks.len();
|
||||
@ -118,10 +179,10 @@ fn convert(input: &Path, output_v4: &Path, output_v6: &Path) -> std::io::Result<
|
||||
let end = nb.net.broadcast();
|
||||
match (start, end) {
|
||||
(IpAddr::V4(a), IpAddr::V4(b)) => {
|
||||
v4map.insert(a.into()..=b.into(), nb.cc);
|
||||
v4map.insert(a.into()..=b.into(), nb.into_defn(include_asn));
|
||||
}
|
||||
(IpAddr::V6(a), IpAddr::V6(b)) => {
|
||||
v6map.insert(a.into()..=b.into(), nb.cc);
|
||||
v6map.insert(a.into()..=b.into(), nb.into_defn(include_asn));
|
||||
}
|
||||
(_, _) => panic!("network started and ended in different families!?"),
|
||||
}
|
||||
@ -133,33 +194,46 @@ fn convert(input: &Path, output_v4: &Path, output_v6: &Path) -> std::io::Result<
|
||||
|
||||
v4.write_all(PROLOGUE.as_bytes())?;
|
||||
v4.write_all(hdr.as_bytes())?;
|
||||
for (r, cc) in v4map.iter() {
|
||||
for (r, defn) in v4map.iter() {
|
||||
let a: u32 = *r.start();
|
||||
let b: u32 = *r.end();
|
||||
writeln!(&mut v4, "{},{},{}", a, b, std::str::from_utf8(cc).unwrap())?;
|
||||
if include_asn {
|
||||
writeln!(&mut v4, "{},{},{},{}", a, b, defn.cc(), defn.asn())?;
|
||||
} else {
|
||||
writeln!(&mut v4, "{},{},{}", a, b, defn.cc())?;
|
||||
}
|
||||
}
|
||||
|
||||
v6.write_all(PROLOGUE.as_bytes())?;
|
||||
v6.write_all(hdr.as_bytes())?;
|
||||
for (r, cc) in v6map.iter() {
|
||||
for (r, defn) in v6map.iter() {
|
||||
let a: Ipv6Addr = (*r.start()).into();
|
||||
let b: Ipv6Addr = (*r.end()).into();
|
||||
writeln!(&mut v6, "{},{},{}", a, b, std::str::from_utf8(cc).unwrap())?;
|
||||
if include_asn {
|
||||
writeln!(&mut v6, "{},{},{},{}", a, b, defn.cc(), defn.asn())?;
|
||||
} else {
|
||||
writeln!(&mut v6, "{},{},{}", a, b, defn.cc())?;
|
||||
}
|
||||
}
|
||||
|
||||
// The documentation says you should always flush a BufWriter.
|
||||
v4.flush()?;
|
||||
v6.flush()?;
|
||||
|
||||
if let Some(output_asn) = args.output_asn {
|
||||
networks.sort();
|
||||
let mut asn = BufWriter::new(File::create(output_asn)?);
|
||||
for net in networks {
|
||||
writeln!(&mut asn, "{},{}", net.asn, net.name)?;
|
||||
}
|
||||
asn.flush()?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn main() -> std::io::Result<()> {
|
||||
let args: Args = argh::from_env();
|
||||
|
||||
convert(
|
||||
args.input.as_path(),
|
||||
args.output_ipv4.as_path(),
|
||||
args.output_ipv6.as_path(),
|
||||
)
|
||||
convert(args)
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user