From 820159cac540b4777fb639de663d72f04690e2a2 Mon Sep 17 00:00:00 2001 From: Nick Mathewson Date: Mon, 17 Dec 2007 22:44:11 +0000 Subject: [PATCH] r15530@tombo: nickm | 2007-12-17 16:54:03 -0500 First wodge of geoip code so bridges can figure out which countries are blocking them. svn:r12845 --- doc/TODO | 28 +++-- src/or/Makefile.am | 4 +- src/or/config.c | 8 ++ src/or/connection_or.c | 9 +- src/or/geoip.c | 275 +++++++++++++++++++++++++++++++++++++++++ src/or/main.c | 1 + src/or/or.h | 19 +++ src/or/rephist.c | 1 + 8 files changed, 332 insertions(+), 13 deletions(-) create mode 100644 src/or/geoip.c diff --git a/doc/TODO b/doc/TODO index 753e2cbd7e..da351c8f18 100644 --- a/doc/TODO +++ b/doc/TODO @@ -28,20 +28,32 @@ Features blocking 0.2.0.x: - mirror tor downloads on (via) tor dir caches R . spec d deploy - - geoip caching and publishing for bridges + . geoip caching and publishing for bridges R . spec -? - deploy + - Implement + . Code to load a geoip file from disk + o Truncated format + - Full format. + o Actually invoke + o Code to store a GEOIP file in memory. + o Code to remember client IPs. + . Code to generate history lines + - Make history lines match spec. + - Controller interface + - Track consecutive time up, not time since last-forgotten IP. + - Add log lines. + - Tests d let Vidalia use the geoip data too rather than doing its own anonymized queries - - bridge address disbursal strategies + o bridge address disbursal strategies o get the cached-descriptors* to bridges@moria - - parse out bridge addresses from cached-descriptors* + o parse out bridge addresses from cached-descriptors* (or parse them out before Tonga sends them) (or get Tonga's Tor to write them out better in the first place) -N * answer by IP/timestamp - - run a little web server on moria? -N d answer by answering email to bridges@torproject - - keep track of which addresses have been answered already + o answer by IP/timestamp + o run a little web server on moria? + o answer by answering email to bridges@torproject + o keep track of which addresses have been answered already R - bridge communities - spec - deploy diff --git a/src/or/Makefile.am b/src/or/Makefile.am index bcc588f9bb..9efe220636 100644 --- a/src/or/Makefile.am +++ b/src/or/Makefile.am @@ -16,7 +16,7 @@ tor_SOURCES = buffers.c circuitbuild.c circuitlist.c \ circuituse.c command.c config.c \ connection.c connection_edge.c connection_or.c control.c \ cpuworker.c directory.c dirserv.c dirvote.c \ - dns.c dnsserv.c hibernate.c main.c $(tor_platform_source) \ + dns.c dnsserv.c geoip.c hibernate.c main.c $(tor_platform_source) \ networkstatus.c \ onion.c policies.c relay.c rendcommon.c rendclient.c rendmid.c \ rendservice.c rephist.c router.c routerlist.c routerparse.c \ @@ -38,7 +38,7 @@ test_SOURCES = buffers.c circuitbuild.c circuitlist.c \ circuituse.c command.c config.c \ connection.c connection_edge.c connection_or.c control.c \ cpuworker.c directory.c dirserv.c dirvote.c \ - dns.c dnsserv.c hibernate.c main.c $(tor_platform_source) \ + dns.c dnsserv.c geoip.c hibernate.c main.c $(tor_platform_source) \ networkstatus.c \ onion.c policies.c relay.c rendcommon.c rendclient.c rendmid.c \ rendservice.c rephist.c router.c routerlist.c routerparse.c \ diff --git a/src/or/config.c b/src/or/config.c index 12be45826c..1ae02c7067 100644 --- a/src/or/config.c +++ b/src/or/config.c @@ -150,6 +150,7 @@ static config_var_t _option_vars[] = { V(BandwidthRate, MEMUNIT, "5 MB"), V(BridgeAuthoritativeDir, BOOL, "0"), VAR("Bridge", LINELIST, Bridges, NULL), + V(BridgeRecordUsageByCountry, BOOL, "1"), V(BridgeRelay, BOOL, "0"), V(CircuitBuildTimeout, INTERVAL, "1 minute"), V(CircuitIdleTimeout, INTERVAL, "1 hour"), @@ -191,6 +192,7 @@ static config_var_t _option_vars[] = { V(FetchServerDescriptors, BOOL, "1"), V(FetchHidServDescriptors, BOOL, "1"), V(FetchUselessDescriptors, BOOL, "0"), + V(GEOIPFile, STRING, NULL), V(Group, STRING, NULL), V(HardwareAccel, BOOL, "0"), V(HashedControlPassword, LINELIST, NULL), @@ -1214,6 +1216,12 @@ options_act(or_options_t *old_options) init_keys(); } + /* Maybe load geoip file */ + if (options->GEOIPFile && + ((!old_options || !opt_streq(old_options->GEOIPFile, options->GEOIPFile)) + || !geoip_is_loaded())) { + geoip_load_file(options->GEOIPFile); + } /* Check if we need to parse and add the EntryNodes config option. */ if (options->EntryNodes && (!old_options || diff --git a/src/or/connection_or.c b/src/or/connection_or.c index c4f61577d5..dd49af93c1 100644 --- a/src/or/connection_or.c +++ b/src/or/connection_or.c @@ -893,17 +893,20 @@ int connection_or_set_state_open(or_connection_t *conn) { int started_here = connection_or_nonopen_was_started_here(conn); + time_t now = time(NULL); conn->_base.state = OR_CONN_STATE_OPEN; control_event_or_conn_status(conn, OR_CONN_EVENT_CONNECTED, 0); if (started_here) { - rep_hist_note_connect_succeeded(conn->identity_digest, time(NULL)); - if (entry_guard_register_connect_status(conn->identity_digest, 1, - time(NULL)) < 0) { + rep_hist_note_connect_succeeded(conn->identity_digest, now); + if (entry_guard_register_connect_status(conn->identity_digest, + 1, now) < 0) { /* pending circs get closed in circuit_about_to_close_connection() */ return -1; } router_set_status(conn->identity_digest, 1); + } else { + geoip_note_client_seen(TO_CONN(conn)->addr, now); } if (conn->handshake_state) { or_handshake_state_free(conn->handshake_state); diff --git a/src/or/geoip.c b/src/or/geoip.c new file mode 100644 index 0000000000..eeeb453f2f --- /dev/null +++ b/src/or/geoip.c @@ -0,0 +1,275 @@ +/* Copyright (c) 2007, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ +/* $Id: /tor/trunk/src/or/networkstatus.c 15493 2007-12-16T18:33:25.055570Z nickm $ */ +const char geoip_c_id[] = + "$Id: /tor/trunk/src/or/networkstatus.c 15493 2007-12-16T18:33:25.055570Z nickm $"; + +#define GEOIP_PRIVATE +#include "or.h" +#include "ht.h" + +/** DOCDOC this whole file */ + +typedef struct geoip_entry_t { + uint32_t ip_low; + uint32_t ip_high; + int country; +} geoip_entry_t; + +static smartlist_t *geoip_countries = NULL; +static strmap_t *country_idxplus1_by_lc_code = NULL; +static smartlist_t *geoip_entries = NULL; + +void +geoip_add_entry(uint32_t low, uint32_t high, const char *country) +{ + uintptr_t idx; + geoip_entry_t *ent; + void *_idxplus1 = strmap_get_lc(country_idxplus1_by_lc_code, country); + + if (!_idxplus1) { + char *c = tor_strdup(country); + tor_strlower(c); + smartlist_add(geoip_countries, c); + idx = smartlist_len(geoip_countries) + 1; + strmap_set_lc(country_idxplus1_by_lc_code, country, (void*)(idx+1)); + } else { + idx = ((uintptr_t)_idxplus1)-1; + } + ent = tor_malloc_zero(sizeof(geoip_entry_t)); + ent->ip_low = low; + ent->ip_high = high; + ent->country = idx; + smartlist_add(geoip_entries, ent); +} + +static int +_geoip_compare_entries(const void **_a, const void **_b) +{ + const geoip_entry_t *a = *_a, *b = *_b; + if (a->ip_low < b->ip_low) + return -1; + else if (a->ip_low > b->ip_low) + return 1; + else + return 0; +} + +static int +_geoip_compare_key_to_entry(const void *_key, const void **_member) +{ + const uint32_t addr = *(uint32_t *)_key; + const geoip_entry_t *entry = *_member; + if (addr < entry->ip_low) + return -1; + else if (addr > entry->ip_high) + return 1; + else + return 0; +} + +int +geoip_load_file(const char *filename) +{ + FILE *f; + geoip_free_all(); + if (!(f = fopen(filename, "r"))) { + log_warn(LD_GENERAL, "Failed to open GEOIP file %s.", filename); + return -1; + } + geoip_countries = smartlist_create(); + geoip_entries = smartlist_create(); + country_idxplus1_by_lc_code = strmap_new(); + while (!feof(f)) { + unsigned int low, high; + char b[3]; + if (fscanf(f, "%u,%u,%2s", &low, &high, b) == 3) { + geoip_add_entry(low, high, b); + } + } + /*XXXX020 abort and return -1 if */ + fclose(f); + + smartlist_sort(geoip_entries, _geoip_compare_entries); + return 0; +} + +int +geoip_get_country_by_ip(uint32_t ipaddr) +{ + geoip_entry_t *ent; + if (!geoip_entries) + return -1; + ent = smartlist_bsearch(geoip_entries, &ipaddr, _geoip_compare_key_to_entry); + return ent ? ent->country : -1; +} + +int +geoip_get_n_countries(void) +{ + return smartlist_len(geoip_countries); +} + +const char * +geoip_get_country_name(int num) +{ + if (geoip_countries && num >= 0 && num < smartlist_len(geoip_countries)) + return smartlist_get(geoip_countries, num); + else + return "??"; +} + +int +geoip_is_loaded(void) +{ + return geoip_countries != NULL && geoip_entries != NULL; +} + +/** DOCDOC */ +typedef struct clientmap_entry_t { + HT_ENTRY(clientmap_entry_t) node; + uint32_t ipaddr; + time_t last_seen; +} clientmap_entry_t; + +static HT_HEAD(clientmap, clientmap_entry_t) client_history = + HT_INITIALIZER(); +static time_t client_history_starts = 0; + +static INLINE unsigned +clientmap_entry_hash(const clientmap_entry_t *a) +{ + return ht_improve_hash((unsigned) a->ipaddr); +} +static INLINE int +clientmap_entries_eq(const clientmap_entry_t *a, const clientmap_entry_t *b) +{ + return a->ipaddr == b->ipaddr; +} + +HT_PROTOTYPE(clientmap, clientmap_entry_t, node, clientmap_entry_hash, + clientmap_entries_eq); +HT_GENERATE(clientmap, clientmap_entry_t, node, clientmap_entry_hash, + clientmap_entries_eq, 0.6, malloc, realloc, free); + +/** DOCDOC */ +void +geoip_note_client_seen(uint32_t addr, time_t now) +{ + or_options_t *options = get_options(); + clientmap_entry_t lookup, *ent; + if (!(options->BridgeRelay && options->BridgeRecordUsageByCountry)) + return; + lookup.ipaddr = addr; + ent = HT_FIND(clientmap, &client_history, &lookup); + if (ent) { + ent->last_seen = now; + } else { + ent = tor_malloc_zero(sizeof(clientmap_entry_t)); + ent->ipaddr = addr; + ent->last_seen = now; + HT_INSERT(clientmap, &client_history, ent); + } + if (!client_history_starts) + client_history_starts = now; +} + +static int +_remove_old_client_helper(struct clientmap_entry_t *ent, void *_cutoff) +{ + time_t cutoff = *(time_t*)_cutoff; + if (ent->last_seen < cutoff) { + tor_free(ent); + return 1; + } else { + return 0; + } +} + +void +geoip_remove_old_clients(time_t cutoff) +{ + clientmap_HT_FOREACH_FN(&client_history, + _remove_old_client_helper, + &cutoff); + if (client_history_starts < cutoff) + client_history_starts = cutoff; +} + +#define MIN_IPS_TO_NOTE_COUNTRY 8 +#define MIN_IPS_TO_NOTE_ANYTHING 16 +#define IP_GRANULARITY 8 + +char * +geoip_get_client_history(time_t now) +{ + char *result = NULL; + if (!geoip_is_loaded()) + return NULL; + if (client_history_starts < (now - 12*60*60)) { + char buf[32]; + smartlist_t *chunks = NULL; + int n_countries = geoip_get_n_countries(); + int i; + clientmap_entry_t **ent; + unsigned *counts = tor_malloc_zero(sizeof(unsigned)*n_countries); + unsigned total = 0; + HT_FOREACH(ent, clientmap, &client_history) { + int country = geoip_get_country_by_ip((*ent)->ipaddr); + if (country < 0) + continue; + tor_assert(0 <= country && country < n_countries); + ++counts[country]; + ++total; + } + if (total < MIN_IPS_TO_NOTE_ANYTHING) + goto done; + chunks = smartlist_create(); + for (i = 0; i < n_countries; ++i) { + unsigned c = counts[i]; + const char *countrycode; + if (c >= MIN_IPS_TO_NOTE_COUNTRY) { + c -= c % IP_GRANULARITY; + countrycode = geoip_get_country_name(i); + tor_snprintf(buf, sizeof(buf), "%s=%u", countrycode, c); + smartlist_add(chunks, tor_strdup(buf)); + } + } + result = smartlist_join_strings(chunks, ",", 0, NULL); + done: + tor_free(counts); + if (chunks) { + SMARTLIST_FOREACH(chunks, char *, c, tor_free(c)); + smartlist_free(chunks); + } + } + return result; +} + + +void +geoip_free_all(void) +{ + if (geoip_countries) { + SMARTLIST_FOREACH(geoip_countries, char *, cp, tor_free(cp)); + smartlist_free(geoip_countries); + } + if (country_idxplus1_by_lc_code) + strmap_free(country_idxplus1_by_lc_code, NULL); + if (geoip_entries) { + SMARTLIST_FOREACH(geoip_entries, geoip_entry_t *, ent, tor_free(ent)); + smartlist_free(geoip_entries); + } + { + clientmap_entry_t **ent, **next, *this; + for (ent = HT_START(clientmap, &client_history); ent != NULL; ent = next) { + this = *ent; + next = HT_NEXT_RMV(clientmap, &client_history, ent); + tor_free(this); + } + HT_CLEAR(clientmap, &client_history); + } + geoip_countries = NULL; + country_idxplus1_by_lc_code = NULL; + geoip_entries = NULL; +} diff --git a/src/or/main.c b/src/or/main.c index 1bbd6cd111..2322a5cd68 100644 --- a/src/or/main.c +++ b/src/or/main.c @@ -1803,6 +1803,7 @@ tor_free_all(int postfork) if (!postfork) { evdns_shutdown(1); } + geoip_free_all(); dirvote_free_all(); routerlist_free_all(); networkstatus_free_all(); diff --git a/src/or/or.h b/src/or/or.h index c9b3a6d836..da9a6f3ac4 100644 --- a/src/or/or.h +++ b/src/or/or.h @@ -2315,6 +2315,10 @@ typedef struct { /** DOCDOC here and in tor.1 */ int LearnAuthorityAddrFromCerts; + /** DOCDOC here and in tor.1 */ + int BridgeRecordUsageByCountry; + char *GEOIPFile; + } or_options_t; /** Persistent state for an onion router, as saved to disk. */ @@ -3192,6 +3196,21 @@ void dnsserv_resolved(edge_connection_t *conn, void dnsserv_reject_request(edge_connection_t *conn); void dnsserv_launch_request(const char *name, int is_reverse); +/********************************* geoip.c **************************/ + +#ifdef GEOIP_PRIVATE +void geoip_add_entry(uint32_t low, uint32_t high, const char *country); +#endif +int geoip_load_file(const char *filename); +int geoip_get_country_by_ip(uint32_t ipaddr); +int geoip_get_n_countries(void); +const char *geoip_get_country_name(int num); +int geoip_is_loaded(void); +void geoip_note_client_seen(uint32_t addr, time_t now); +void geoip_remove_old_clients(time_t cutoff); +char *geoip_get_client_history(time_t now); +void geoip_free_all(void); + /********************************* hibernate.c **********************/ int accounting_parse_options(or_options_t *options, int validate_only); diff --git a/src/or/rephist.c b/src/or/rephist.c index 680ef01590..e181033180 100644 --- a/src/or/rephist.c +++ b/src/or/rephist.c @@ -13,6 +13,7 @@ const char rephist_c_id[] = **/ #include "or.h" +#include "ht.h" static void bw_arrays_init(void); static void predicted_ports_init(void);