Merge branch 'split_stats'

This commit is contained in:
Nick Mathewson 2018-09-27 16:26:06 -04:00
commit 9e65e7a36f
49 changed files with 1158 additions and 902 deletions

2
.gitignore vendored
View File

@ -173,6 +173,8 @@ uptime-*.json
/src/lib/libtor-fdio-testing.a
/src/lib/libtor-fs.a
/src/lib/libtor-fs-testing.a
/src/lib/libtor-geoip.a
/src/lib/libtor-geoip-testing.a
/src/lib/libtor-intmath.a
/src/lib/libtor-intmath-testing.a
/src/lib/libtor-lock.a

View File

@ -40,6 +40,7 @@ endif
# "Common" libraries used to link tor's utility code.
TOR_UTIL_LIBS = \
src/lib/libtor-geoip.a \
src/lib/libtor-process.a \
src/lib/libtor-time.a \
src/lib/libtor-fs.a \
@ -68,6 +69,7 @@ TOR_UTIL_LIBS = \
# and tests)
if UNITTESTS_ENABLED
TOR_UTIL_TESTING_LIBS = \
src/lib/libtor-geoip-testing.a \
src/lib/libtor-process-testing.a \
src/lib/libtor-time-testing.a \
src/lib/libtor-fs-testing.a \

3
changes/ticket27892 Normal file
View File

@ -0,0 +1,3 @@
o Code simplification and refactoring:
- Split the non-statistics-related parts from the rephist.c and geoip.c
modules. Closes ticket 27892.

View File

@ -96,13 +96,16 @@
#include "feature/nodelist/networkstatus.h"
#include "feature/nodelist/nickname.h"
#include "feature/nodelist/nodelist.h"
#include "feature/nodelist/routerlist.h"
#include "feature/nodelist/routerset.h"
#include "feature/relay/dns.h"
#include "feature/relay/ext_orport.h"
#include "feature/relay/routermode.h"
#include "feature/rend/rendclient.h"
#include "feature/rend/rendservice.h"
#include "feature/stats/geoip.h"
#include "lib/geoip/geoip.h"
#include "feature/stats/geoip_stats.h"
#include "feature/stats/predict_ports.h"
#include "feature/stats/rephist.h"
#include "lib/compress/compress.h"
#include "lib/crypt_ops/crypto_init.h"
@ -8346,6 +8349,11 @@ config_load_geoip_file_(sa_family_t family,
const char *fname,
const char *default_fname)
{
const or_options_t *options = get_options();
const char *msg = "";
int severity = options_need_geoip_info(options, &msg) ? LOG_WARN : LOG_INFO;
int r;
#ifdef _WIN32
char *free_fname = NULL; /* Used to hold any temporary-allocated value */
/* XXXX Don't use this "<default>" junk; make our filename options
@ -8355,12 +8363,16 @@ config_load_geoip_file_(sa_family_t family,
tor_asprintf(&free_fname, "%s\\%s", conf_root, default_fname);
fname = free_fname;
}
geoip_load_file(family, fname);
r = geoip_load_file(family, fname, severity);
tor_free(free_fname);
#else /* !(defined(_WIN32)) */
(void)default_fname;
geoip_load_file(family, fname);
r = geoip_load_file(family, fname, severity);
#endif /* defined(_WIN32) */
if (r < 0 && severity == LOG_WARN) {
log_warn(LD_GENERAL, "%s", msg);
}
}
/** Load geoip files for IPv4 and IPv6 if <a>options</a> and
@ -8374,13 +8386,19 @@ config_maybe_load_geoip_files_(const or_options_t *options,
if (options->GeoIPFile &&
((!old_options || !opt_streq(old_options->GeoIPFile,
options->GeoIPFile))
|| !geoip_is_loaded(AF_INET)))
|| !geoip_is_loaded(AF_INET))) {
config_load_geoip_file_(AF_INET, options->GeoIPFile, "geoip");
/* Okay, now we need to maybe change our mind about what is in
* which country. We do this for IPv4 only since that's what we
* store in node->country. */
refresh_all_country_info();
}
if (options->GeoIPv6File &&
((!old_options || !opt_streq(old_options->GeoIPv6File,
options->GeoIPv6File))
|| !geoip_is_loaded(AF_INET6)))
|| !geoip_is_loaded(AF_INET6))) {
config_load_geoip_file_(AF_INET6, options->GeoIPv6File, "geoip6");
}
}
/** Initialize cookie authentication (used so far by the ControlPort

View File

@ -27,6 +27,7 @@
#include "core/or/command.h"
#include "core/or/connection_edge.h"
#include "core/or/connection_or.h"
#include "core/or/dos.h"
#include "core/or/policies.h"
#include "core/or/protover.h"
#include "core/or/relay.h"
@ -60,7 +61,9 @@
#include "feature/rend/rendcache.h"
#include "feature/rend/rendclient.h"
#include "feature/rend/rendservice.h"
#include "feature/stats/geoip.h"
#include "lib/geoip/geoip.h"
#include "feature/stats/geoip_stats.h"
#include "feature/stats/predict_ports.h"
#include "feature/stats/rephist.h"
#include "lib/compress/compress.h"
#include "lib/container/buffers.h"
@ -754,6 +757,7 @@ tor_free_all(int postfork)
evdns_shutdown(1);
}
geoip_free_all();
geoip_stats_free_all();
dirvote_free_all();
routerlist_free_all();
networkstatus_free_all();

View File

@ -60,6 +60,7 @@ LIBTOR_APP_A_SOURCES = \
src/feature/client/transports.c \
src/feature/control/control.c \
src/feature/control/fmt_serverstatus.c \
src/feature/control/getinfo_geoip.c \
src/feature/dirauth/keypin.c \
src/feature/dircache/conscache.c \
src/feature/dircache/consdiffmgr.c \
@ -116,8 +117,9 @@ LIBTOR_APP_A_SOURCES = \
src/feature/rend/rendcommon.c \
src/feature/rend/rendmid.c \
src/feature/rend/rendservice.c \
src/feature/stats/geoip.c \
src/feature/stats/rephist.c
src/feature/stats/geoip_stats.c \
src/feature/stats/rephist.c \
src/feature/stats/predict_ports.c
# These should eventually move into module_dirauth_sources, but for now
# the separation is only in the code location.
@ -253,6 +255,7 @@ noinst_HEADERS += \
src/feature/control/control.h \
src/feature/control/control_connection_st.h \
src/feature/control/fmt_serverstatus.h \
src/feature/control/getinfo_geoip.h \
src/feature/dirauth/authmode.h \
src/feature/dirauth/bwauth.h \
src/feature/dirauth/dircollate.h \
@ -346,8 +349,9 @@ noinst_HEADERS += \
src/feature/rend/rendcommon.h \
src/feature/rend/rendmid.h \
src/feature/rend/rendservice.h \
src/feature/stats/geoip.h \
src/feature/stats/rephist.h
src/feature/stats/geoip_stats.h \
src/feature/stats/rephist.h \
src/feature/stats/predict_ports.h
noinst_HEADERS += \
src/app/config/auth_dirs.inc \

View File

@ -102,7 +102,7 @@
#include "feature/relay/routermode.h"
#include "feature/rend/rendclient.h"
#include "feature/rend/rendcommon.h"
#include "feature/stats/geoip.h"
#include "lib/geoip/geoip.h"
#include "feature/stats/rephist.h"
#include "lib/crypt_ops/crypto_util.h"

View File

@ -92,7 +92,8 @@
#include "feature/relay/selftest.h"
#include "feature/rend/rendcache.h"
#include "feature/rend/rendservice.h"
#include "feature/stats/geoip.h"
#include "feature/stats/geoip_stats.h"
#include "feature/stats/predict_ports.h"
#include "feature/stats/rephist.h"
#include "lib/container/buffers.h"
#include "lib/crypt_ops/crypto_rand.h"

View File

@ -58,28 +58,29 @@
#define CHANNEL_PRIVATE_
#include "core/or/or.h"
#include "app/config/config.h"
#include "core/mainloop/mainloop.h"
#include "core/or/channel.h"
#include "core/or/channeltls.h"
#include "core/or/channelpadding.h"
#include "core/or/channeltls.h"
#include "core/or/circuitbuild.h"
#include "core/or/circuitlist.h"
#include "core/or/circuitstats.h"
#include "app/config/config.h"
#include "core/or/connection_or.h" /* For var_cell_free() */
#include "core/or/circuitmux.h"
#include "feature/client/entrynodes.h"
#include "feature/stats/geoip.h"
#include "core/mainloop/mainloop.h"
#include "feature/nodelist/nodelist.h"
#include "core/or/circuitstats.h"
#include "core/or/connection_or.h" /* For var_cell_free() */
#include "core/or/dos.h"
#include "core/or/relay.h"
#include "feature/stats/rephist.h"
#include "feature/relay/router.h"
#include "feature/nodelist/routerlist.h"
#include "core/or/scheduler.h"
#include "lib/time/compat_time.h"
#include "feature/client/entrynodes.h"
#include "feature/nodelist/networkstatus.h"
#include "feature/nodelist/nodelist.h"
#include "feature/nodelist/routerlist.h"
#include "feature/relay/router.h"
#include "feature/rend/rendservice.h"
#include "feature/stats/geoip_stats.h"
#include "feature/stats/rephist.h"
#include "lib/evloop/timers.h"
#include "lib/time/compat_time.h"
#include "core/or/cell_queue_st.h"

View File

@ -67,7 +67,7 @@
#include "feature/relay/routermode.h"
#include "feature/relay/selftest.h"
#include "feature/rend/rendcommon.h"
#include "feature/stats/rephist.h"
#include "feature/stats/predict_ports.h"
#include "lib/crypt_ops/crypto_rand.h"
#include "core/or/cell_st.h"

View File

@ -85,6 +85,7 @@
#include "core/crypto/relay_crypto.h"
#include "feature/rend/rendclient.h"
#include "feature/rend/rendcommon.h"
#include "feature/stats/predict_ports.h"
#include "feature/stats/rephist.h"
#include "feature/nodelist/routerlist.h"
#include "feature/nodelist/routerset.h"

View File

@ -57,7 +57,7 @@
#include "feature/rend/rendclient.h"
#include "feature/rend/rendcommon.h"
#include "feature/rend/rendservice.h"
#include "feature/stats/rephist.h"
#include "feature/stats/predict_ports.h"
#include "lib/math/fp.h"
#include "lib/time/tvdiff.h"

View File

@ -95,6 +95,7 @@
#include "feature/rend/rendclient.h"
#include "feature/rend/rendcommon.h"
#include "feature/rend/rendservice.h"
#include "feature/stats/predict_ports.h"
#include "feature/stats/rephist.h"
#include "lib/container/buffers.h"
#include "lib/crypt_ops/crypto_util.h"

View File

@ -43,7 +43,7 @@
#include "lib/crypt_ops/crypto_util.h"
#include "feature/dirauth/reachability.h"
#include "feature/client/entrynodes.h"
#include "feature/stats/geoip.h"
#include "lib/geoip/geoip.h"
#include "core/mainloop/mainloop.h"
#include "trunnel/link_handshake.h"
#include "feature/nodelist/microdesc.h"

View File

@ -9,17 +9,17 @@
#define DOS_PRIVATE
#include "core/or/or.h"
#include "core/or/channel.h"
#include "app/config/config.h"
#include "core/mainloop/connection.h"
#include "core/or/connection_or.h"
#include "lib/crypt_ops/crypto_rand.h"
#include "feature/stats/geoip.h"
#include "core/mainloop/mainloop.h"
#include "core/or/channel.h"
#include "core/or/connection_or.h"
#include "core/or/relay.h"
#include "feature/nodelist/networkstatus.h"
#include "feature/nodelist/nodelist.h"
#include "core/or/relay.h"
#include "feature/relay/routermode.h"
#include "feature/stats/geoip_stats.h"
#include "lib/crypt_ops/crypto_rand.h"
#include "core/or/dos.h"

View File

@ -41,6 +41,7 @@
#include "lib/fs/mmap.h"
#include "lib/fs/path.h"
#include "lib/fs/userdb.h"
#include "lib/geoip/country.h"
#include "lib/intmath/addsub.h"
#include "lib/intmath/bits.h"
#include "lib/intmath/cmp.h"
@ -798,9 +799,6 @@ typedef struct download_status_t download_status_t;
typedef struct signed_descriptor_t signed_descriptor_t;
/** A signed integer representing a country code. */
typedef int16_t country_t;
/** Flags used to summarize the declared protocol versions of a relay,
* so we don't need to parse them again and again. */
typedef struct protover_summary_flags_t {

View File

@ -27,7 +27,7 @@
#include "feature/relay/router.h"
#include "feature/relay/routermode.h"
#include "feature/nodelist/routerparse.h"
#include "feature/stats/geoip.h"
#include "lib/geoip/geoip.h"
#include "ht.h"
#include "lib/encoding/confline.h"

View File

@ -65,7 +65,7 @@
#include "lib/crypt_ops/crypto_util.h"
#include "feature/dircommon/directory.h"
#include "feature/relay/dns.h"
#include "feature/stats/geoip.h"
#include "feature/stats/geoip_stats.h"
#include "feature/hs/hs_cache.h"
#include "core/mainloop/mainloop.h"
#include "feature/nodelist/networkstatus.h"

View File

@ -30,13 +30,13 @@
#include "feature/hs/hs_stats.h"
#include "feature/hs/hs_service.h"
#include "core/or/dos.h"
#include "feature/stats/geoip_stats.h"
#include "app/config/or_state_st.h"
#include "feature/nodelist/routerinfo_st.h"
#include "lib/tls/tortls.h"
static void log_accounting(const time_t now, const or_options_t *options);
#include "feature/stats/geoip.h"
/** Return the total number of circuits. */
STATIC int

View File

@ -60,6 +60,7 @@
#include "feature/client/entrynodes.h"
#include "feature/control/control.h"
#include "feature/control/fmt_serverstatus.h"
#include "feature/control/getinfo_geoip.h"
#include "feature/dircache/dirserv.h"
#include "feature/dirclient/dirclient.h"
#include "feature/dirclient/dlstatus.h"
@ -83,8 +84,8 @@
#include "feature/rend/rendclient.h"
#include "feature/rend/rendcommon.h"
#include "feature/rend/rendservice.h"
#include "feature/stats/geoip.h"
#include "feature/stats/rephist.h"
#include "feature/stats/geoip_stats.h"
#include "feature/stats/predict_ports.h"
#include "lib/container/buffers.h"
#include "lib/crypt_ops/crypto_rand.h"
#include "lib/crypt_ops/crypto_util.h"

View File

@ -0,0 +1,45 @@
#include "core/or/or.h"
#include "core/mainloop/connection.h"
#include "feature/control/control.h"
#include "feature/control/getinfo_geoip.h"
#include "lib/geoip/geoip.h"
/** Helper used to implement GETINFO ip-to-country/... controller command. */
int
getinfo_helper_geoip(control_connection_t *control_conn,
const char *question, char **answer,
const char **errmsg)
{
(void)control_conn;
if (!strcmpstart(question, "ip-to-country/")) {
int c;
sa_family_t family;
tor_addr_t addr;
question += strlen("ip-to-country/");
if (!strcmp(question, "ipv4-available") ||
!strcmp(question, "ipv6-available")) {
family = !strcmp(question, "ipv4-available") ? AF_INET : AF_INET6;
const int available = geoip_is_loaded(family);
tor_asprintf(answer, "%d", !! available);
return 0;
}
family = tor_addr_parse(&addr, question);
if (family != AF_INET && family != AF_INET6) {
*errmsg = "Invalid address family";
return -1;
}
if (!geoip_is_loaded(family)) {
*errmsg = "GeoIP data not loaded";
return -1;
}
if (family == AF_INET)
c = geoip_get_country_by_ipv4(tor_addr_to_ipv4h(&addr));
else /* AF_INET6 */
c = geoip_get_country_by_ipv6(tor_addr_to_in6(&addr));
*answer = tor_strdup(geoip_get_country_name(c));
}
return 0;
}

View File

@ -0,0 +1,14 @@
/* Copyright (c) 2001 Matej Pfajfar.
* Copyright (c) 2001-2004, Roger Dingledine.
* Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
* Copyright (c) 2007-2018, The Tor Project, Inc. */
/* See LICENSE for licensing information */
#ifndef TOR_GETINFO_GEOIP_H
#define TOR_GETINFO_GEOIP_H
int getinfo_helper_geoip(control_connection_t *control_conn,
const char *question, char **answer,
const char **errmsg);
#endif

View File

@ -25,7 +25,7 @@
#include "feature/nodelist/routerlist.h"
#include "feature/relay/routermode.h"
#include "feature/rend/rendcache.h"
#include "feature/stats/geoip.h"
#include "feature/stats/geoip_stats.h"
#include "feature/stats/rephist.h"
#include "lib/compress/compress.h"

View File

@ -16,7 +16,7 @@
#include "feature/nodelist/routerlist.h"
#include "feature/relay/router.h"
#include "feature/relay/routermode.h"
#include "feature/stats/rephist.h"
#include "feature/stats/predict_ports.h"
#include "feature/dircache/cached_dir_st.h"
#include "feature/dircommon/dir_connection_st.h"

View File

@ -43,7 +43,7 @@
#include "feature/rend/rendclient.h"
#include "feature/rend/rendcommon.h"
#include "feature/rend/rendservice.h"
#include "feature/stats/rephist.h"
#include "feature/stats/predict_ports.h"
#include "lib/compress/compress.h"
#include "lib/crypt_ops/crypto_format.h"

View File

@ -12,7 +12,7 @@
#include "feature/dirclient/dirclient.h"
#include "feature/dircommon/directory.h"
#include "feature/dircommon/fp_pair.h"
#include "feature/stats/geoip.h"
#include "feature/stats/geoip_stats.h"
#include "lib/compress/compress.h"
#include "feature/dircommon/dir_connection_st.h"

View File

@ -65,7 +65,7 @@
#include "feature/nodelist/routerset.h"
#include "feature/nodelist/torcert.h"
#include "feature/rend/rendservice.h"
#include "feature/stats/geoip.h"
#include "lib/geoip/geoip.h"
#include "lib/net/address.h"
#include <string.h>

View File

@ -34,7 +34,7 @@ n * Copyright (c) 2001-2004, Roger Dingledine.
#include "feature/nodelist/nodelist.h"
#include "feature/nodelist/routerparse.h"
#include "feature/nodelist/routerset.h"
#include "feature/stats/geoip.h"
#include "lib/geoip/geoip.h"
#include "core/or/addr_policy_st.h"
#include "core/or/extend_info_st.h"

View File

@ -36,7 +36,8 @@
#include "feature/relay/routerkeys.h"
#include "feature/relay/routermode.h"
#include "feature/relay/selftest.h"
#include "feature/stats/geoip.h"
#include "lib/geoip/geoip.h"
#include "feature/stats/geoip_stats.h"
#include "feature/stats/rephist.h"
#include "lib/crypt_ops/crypto_ed25519.h"
#include "lib/crypt_ops/crypto_format.h"

View File

@ -36,7 +36,7 @@
#include "feature/rend/rendclient.h"
#include "feature/rend/rendcommon.h"
#include "feature/rend/rendservice.h"
#include "feature/stats/rephist.h"
#include "feature/stats/predict_ports.h"
#include "lib/crypt_ops/crypto_dh.h"
#include "lib/crypt_ops/crypto_rand.h"
#include "lib/crypt_ops/crypto_util.h"

View File

@ -27,55 +27,26 @@
* for each country.
*/
#define GEOIP_PRIVATE
#include "core/or/or.h"
#include "ht.h"
#include "lib/container/buffers.h"
#include "app/config/config.h"
#include "feature/control/control.h"
#include "feature/client/dnsserv.h"
#include "core/or/dos.h"
#include "feature/stats/geoip.h"
#include "lib/geoip/geoip.h"
#include "feature/stats/geoip_stats.h"
#include "feature/nodelist/routerlist.h"
#include "lib/container/order.h"
#include "lib/time/tvdiff.h"
static void init_geoip_countries(void);
/** An entry from the GeoIP IPv4 file: maps an IPv4 range to a country. */
typedef struct geoip_ipv4_entry_t {
uint32_t ip_low; /**< The lowest IP in the range, in host order */
uint32_t ip_high; /**< The highest IP in the range, in host order */
intptr_t country; /**< An index into geoip_countries */
} geoip_ipv4_entry_t;
/** An entry from the GeoIP IPv6 file: maps an IPv6 range to a country. */
typedef struct geoip_ipv6_entry_t {
struct in6_addr ip_low; /**< The lowest IP in the range, in host order */
struct in6_addr ip_high; /**< The highest IP in the range, in host order */
intptr_t country; /**< An index into geoip_countries */
} geoip_ipv6_entry_t;
/** A per-country record for GeoIP request history. */
typedef struct geoip_country_t {
char countrycode[3];
uint32_t n_v3_ns_requests;
} geoip_country_t;
/** A list of geoip_country_t */
static smartlist_t *geoip_countries = NULL;
/** A map from lowercased country codes to their position in geoip_countries.
* The index is encoded in the pointer, and 1 is added so that NULL can mean
* not found. */
static strmap_t *country_idxplus1_by_lc_code = NULL;
/** Lists of all known geoip_ipv4_entry_t and geoip_ipv6_entry_t, sorted
* by their respective ip_low. */
static smartlist_t *geoip_ipv4_entries = NULL, *geoip_ipv6_entries = NULL;
/** SHA1 digest of the GeoIP files to include in extra-info descriptors. */
static char geoip_digest[DIGEST_LEN];
static char geoip6_digest[DIGEST_LEN];
/** Number of entries in n_v3_ns_requests */
static size_t n_v3_ns_requests_len = 0;
/** Array, indexed by country index, of number of v3 networkstatus requests
* received from that country */
static uint32_t *n_v3_ns_requests;
/* Total size in bytes of the geoip client history cache. Used by the OOM
* handler. */
@ -109,194 +80,30 @@ geoip_decrement_client_history_cache_size(size_t bytes)
geoip_client_history_cache_size -= bytes;
}
/** Return the index of the <b>country</b>'s entry in the GeoIP
* country list if it is a valid 2-letter country code, otherwise
* return -1. */
MOCK_IMPL(country_t,
geoip_get_country,(const char *country))
{
void *idxplus1_;
intptr_t idx;
idxplus1_ = strmap_get_lc(country_idxplus1_by_lc_code, country);
if (!idxplus1_)
return -1;
idx = ((uintptr_t)idxplus1_)-1;
return (country_t)idx;
}
/** Add an entry to a GeoIP table, mapping all IP addresses between <b>low</b>
* and <b>high</b>, inclusive, to the 2-letter country code <b>country</b>. */
/** Add 1 to the count of v3 ns requests received from <b>country</b>. */
static void
geoip_add_entry(const tor_addr_t *low, const tor_addr_t *high,
const char *country)
increment_v3_ns_request(country_t country)
{
intptr_t idx;
void *idxplus1_;
IF_BUG_ONCE(tor_addr_family(low) != tor_addr_family(high))
return;
IF_BUG_ONCE(tor_addr_compare(high, low, CMP_EXACT) < 0)
if (country < 0)
return;
idxplus1_ = strmap_get_lc(country_idxplus1_by_lc_code, country);
if (!idxplus1_) {
geoip_country_t *c = tor_malloc_zero(sizeof(geoip_country_t));
strlcpy(c->countrycode, country, sizeof(c->countrycode));
tor_strlower(c->countrycode);
smartlist_add(geoip_countries, c);
idx = smartlist_len(geoip_countries) - 1;
strmap_set_lc(country_idxplus1_by_lc_code, country, (void*)(idx+1));
} else {
idx = ((uintptr_t)idxplus1_)-1;
}
{
geoip_country_t *c = smartlist_get(geoip_countries, (int)idx);
tor_assert(!strcasecmp(c->countrycode, country));
}
if (tor_addr_family(low) == AF_INET) {
geoip_ipv4_entry_t *ent = tor_malloc_zero(sizeof(geoip_ipv4_entry_t));
ent->ip_low = tor_addr_to_ipv4h(low);
ent->ip_high = tor_addr_to_ipv4h(high);
ent->country = idx;
smartlist_add(geoip_ipv4_entries, ent);
} else if (tor_addr_family(low) == AF_INET6) {
geoip_ipv6_entry_t *ent = tor_malloc_zero(sizeof(geoip_ipv6_entry_t));
ent->ip_low = *tor_addr_to_in6_assert(low);
ent->ip_high = *tor_addr_to_in6_assert(high);
ent->country = idx;
smartlist_add(geoip_ipv6_entries, ent);
}
}
/** Add an entry to the GeoIP table indicated by <b>family</b>,
* parsing it from <b>line</b>. The format is as for geoip_load_file(). */
STATIC int
geoip_parse_entry(const char *line, sa_family_t family)
{
tor_addr_t low_addr, high_addr;
char c[3];
char *country = NULL;
if (!geoip_countries)
init_geoip_countries();
if (family == AF_INET) {
if (!geoip_ipv4_entries)
geoip_ipv4_entries = smartlist_new();
} else if (family == AF_INET6) {
if (!geoip_ipv6_entries)
geoip_ipv6_entries = smartlist_new();
} else {
log_warn(LD_GENERAL, "Unsupported family: %d", family);
return -1;
}
while (TOR_ISSPACE(*line))
++line;
if (*line == '#')
return 0;
char buf[512];
if (family == AF_INET) {
unsigned int low, high;
if (tor_sscanf(line,"%u,%u,%2s", &low, &high, c) == 3 ||
tor_sscanf(line,"\"%u\",\"%u\",\"%2s\",", &low, &high, c) == 3) {
tor_addr_from_ipv4h(&low_addr, low);
tor_addr_from_ipv4h(&high_addr, high);
} else
goto fail;
country = c;
} else { /* AF_INET6 */
char *low_str, *high_str;
struct in6_addr low, high;
char *strtok_state;
strlcpy(buf, line, sizeof(buf));
low_str = tor_strtok_r(buf, ",", &strtok_state);
if (!low_str)
goto fail;
high_str = tor_strtok_r(NULL, ",", &strtok_state);
if (!high_str)
goto fail;
country = tor_strtok_r(NULL, "\n", &strtok_state);
if (!country)
goto fail;
if (strlen(country) != 2)
goto fail;
if (tor_inet_pton(AF_INET6, low_str, &low) <= 0)
goto fail;
tor_addr_from_in6(&low_addr, &low);
if (tor_inet_pton(AF_INET6, high_str, &high) <= 0)
goto fail;
tor_addr_from_in6(&high_addr, &high);
}
geoip_add_entry(&low_addr, &high_addr, country);
return 0;
fail:
log_warn(LD_GENERAL, "Unable to parse line from GEOIP %s file: %s",
family == AF_INET ? "IPv4" : "IPv6", escaped(line));
return -1;
}
/** Sorting helper: return -1, 1, or 0 based on comparison of two
* geoip_ipv4_entry_t */
static int
geoip_ipv4_compare_entries_(const void **_a, const void **_b)
{
const geoip_ipv4_entry_t *a = *_a, *b = *_b;
if (a->ip_low < b->ip_low)
return -1;
else if (a->ip_low > b->ip_low)
return 1;
if ((size_t)country >= n_v3_ns_requests_len) {
/* We need to reallocate the array. */
size_t new_len;
if (n_v3_ns_requests_len == 0)
new_len = 256;
else
return 0;
}
new_len = n_v3_ns_requests_len * 2;
if (new_len <= (size_t)country)
new_len = ((size_t)country)+1;
n_v3_ns_requests = tor_reallocarray(n_v3_ns_requests, new_len,
sizeof(uint32_t));
memset(n_v3_ns_requests + n_v3_ns_requests_len, 0,
sizeof(uint32_t)*(new_len - n_v3_ns_requests_len));
n_v3_ns_requests_len = new_len;
}
/** bsearch helper: return -1, 1, or 0 based on comparison of an IP (a pointer
* to a uint32_t in host order) to a geoip_ipv4_entry_t */
static int
geoip_ipv4_compare_key_to_entry_(const void *_key, const void **_member)
{
/* No alignment issue here, since _key really is a pointer to uint32_t */
const uint32_t addr = *(uint32_t *)_key;
const geoip_ipv4_entry_t *entry = *_member;
if (addr < entry->ip_low)
return -1;
else if (addr > entry->ip_high)
return 1;
else
return 0;
}
/** Sorting helper: return -1, 1, or 0 based on comparison of two
* geoip_ipv6_entry_t */
static int
geoip_ipv6_compare_entries_(const void **_a, const void **_b)
{
const geoip_ipv6_entry_t *a = *_a, *b = *_b;
return fast_memcmp(a->ip_low.s6_addr, b->ip_low.s6_addr,
sizeof(struct in6_addr));
}
/** bsearch helper: return -1, 1, or 0 based on comparison of an IPv6
* (a pointer to a in6_addr) to a geoip_ipv6_entry_t */
static int
geoip_ipv6_compare_key_to_entry_(const void *_key, const void **_member)
{
const struct in6_addr *addr = (struct in6_addr *)_key;
const geoip_ipv6_entry_t *entry = *_member;
if (fast_memcmp(addr->s6_addr, entry->ip_low.s6_addr,
sizeof(struct in6_addr)) < 0)
return -1;
else if (fast_memcmp(addr->s6_addr, entry->ip_high.s6_addr,
sizeof(struct in6_addr)) > 0)
return 1;
else
return 0;
n_v3_ns_requests[country] += 1;
}
/** Return 1 if we should collect geoip stats on bridge users, and
@ -307,208 +114,6 @@ should_record_bridge_info(const or_options_t *options)
return options->BridgeRelay && options->BridgeRecordUsageByCountry;
}
/** Set up a new list of geoip countries with no countries (yet) set in it,
* except for the unknown country.
*/
static void
init_geoip_countries(void)
{
geoip_country_t *geoip_unresolved;
geoip_countries = smartlist_new();
/* Add a geoip_country_t for requests that could not be resolved to a
* country as first element (index 0) to geoip_countries. */
geoip_unresolved = tor_malloc_zero(sizeof(geoip_country_t));
strlcpy(geoip_unresolved->countrycode, "??",
sizeof(geoip_unresolved->countrycode));
smartlist_add(geoip_countries, geoip_unresolved);
country_idxplus1_by_lc_code = strmap_new();
strmap_set_lc(country_idxplus1_by_lc_code, "??", (void*)(1));
}
/** Clear appropriate GeoIP database, based on <b>family</b>, and
* reload it from the file <b>filename</b>. Return 0 on success, -1 on
* failure.
*
* Recognized line formats for IPv4 are:
* INTIPLOW,INTIPHIGH,CC
* and
* "INTIPLOW","INTIPHIGH","CC","CC3","COUNTRY NAME"
* where INTIPLOW and INTIPHIGH are IPv4 addresses encoded as 4-byte unsigned
* integers, and CC is a country code.
*
* Recognized line format for IPv6 is:
* IPV6LOW,IPV6HIGH,CC
* where IPV6LOW and IPV6HIGH are IPv6 addresses and CC is a country code.
*
* It also recognizes, and skips over, blank lines and lines that start
* with '#' (comments).
*/
int
geoip_load_file(sa_family_t family, const char *filename)
{
FILE *f;
const char *msg = "";
const or_options_t *options = get_options();
int severity = options_need_geoip_info(options, &msg) ? LOG_WARN : LOG_INFO;
crypto_digest_t *geoip_digest_env = NULL;
tor_assert(family == AF_INET || family == AF_INET6);
if (!(f = tor_fopen_cloexec(filename, "r"))) {
log_fn(severity, LD_GENERAL, "Failed to open GEOIP file %s. %s",
filename, msg);
return -1;
}
if (!geoip_countries)
init_geoip_countries();
if (family == AF_INET) {
if (geoip_ipv4_entries) {
SMARTLIST_FOREACH(geoip_ipv4_entries, geoip_ipv4_entry_t *, e,
tor_free(e));
smartlist_free(geoip_ipv4_entries);
}
geoip_ipv4_entries = smartlist_new();
} else { /* AF_INET6 */
if (geoip_ipv6_entries) {
SMARTLIST_FOREACH(geoip_ipv6_entries, geoip_ipv6_entry_t *, e,
tor_free(e));
smartlist_free(geoip_ipv6_entries);
}
geoip_ipv6_entries = smartlist_new();
}
geoip_digest_env = crypto_digest_new();
log_notice(LD_GENERAL, "Parsing GEOIP %s file %s.",
(family == AF_INET) ? "IPv4" : "IPv6", filename);
while (!feof(f)) {
char buf[512];
if (fgets(buf, (int)sizeof(buf), f) == NULL)
break;
crypto_digest_add_bytes(geoip_digest_env, buf, strlen(buf));
/* FFFF track full country name. */
geoip_parse_entry(buf, family);
}
/*XXXX abort and return -1 if no entries/illformed?*/
fclose(f);
/* Sort list and remember file digests so that we can include it in
* our extra-info descriptors. */
if (family == AF_INET) {
smartlist_sort(geoip_ipv4_entries, geoip_ipv4_compare_entries_);
/* Okay, now we need to maybe change our mind about what is in
* which country. We do this for IPv4 only since that's what we
* store in node->country. */
refresh_all_country_info();
crypto_digest_get_digest(geoip_digest_env, geoip_digest, DIGEST_LEN);
} else {
/* AF_INET6 */
smartlist_sort(geoip_ipv6_entries, geoip_ipv6_compare_entries_);
crypto_digest_get_digest(geoip_digest_env, geoip6_digest, DIGEST_LEN);
}
crypto_digest_free(geoip_digest_env);
return 0;
}
/** Given an IP address in host order, return a number representing the
* country to which that address belongs, -1 for "No geoip information
* available", or 0 for the 'unknown country'. The return value will always
* be less than geoip_get_n_countries(). To decode it, call
* geoip_get_country_name().
*/
STATIC int
geoip_get_country_by_ipv4(uint32_t ipaddr)
{
geoip_ipv4_entry_t *ent;
if (!geoip_ipv4_entries)
return -1;
ent = smartlist_bsearch(geoip_ipv4_entries, &ipaddr,
geoip_ipv4_compare_key_to_entry_);
return ent ? (int)ent->country : 0;
}
/** Given an IPv6 address, return a number representing the country to
* which that address belongs, -1 for "No geoip information available", or
* 0 for the 'unknown country'. The return value will always be less than
* geoip_get_n_countries(). To decode it, call geoip_get_country_name().
*/
STATIC int
geoip_get_country_by_ipv6(const struct in6_addr *addr)
{
geoip_ipv6_entry_t *ent;
if (!geoip_ipv6_entries)
return -1;
ent = smartlist_bsearch(geoip_ipv6_entries, addr,
geoip_ipv6_compare_key_to_entry_);
return ent ? (int)ent->country : 0;
}
/** Given an IP address, return a number representing the country to which
* that address belongs, -1 for "No geoip information available", or 0 for
* the 'unknown country'. The return value will always be less than
* geoip_get_n_countries(). To decode it, call geoip_get_country_name().
*/
MOCK_IMPL(int,
geoip_get_country_by_addr,(const tor_addr_t *addr))
{
if (tor_addr_family(addr) == AF_INET) {
return geoip_get_country_by_ipv4(tor_addr_to_ipv4h(addr));
} else if (tor_addr_family(addr) == AF_INET6) {
return geoip_get_country_by_ipv6(tor_addr_to_in6(addr));
} else {
return -1;
}
}
/** Return the number of countries recognized by the GeoIP country list. */
MOCK_IMPL(int,
geoip_get_n_countries,(void))
{
if (!geoip_countries)
init_geoip_countries();
return (int) smartlist_len(geoip_countries);
}
/** Return the two-letter country code associated with the number <b>num</b>,
* or "??" for an unknown value. */
const char *
geoip_get_country_name(country_t num)
{
if (geoip_countries && num >= 0 && num < smartlist_len(geoip_countries)) {
geoip_country_t *c = smartlist_get(geoip_countries, num);
return c->countrycode;
} else
return "??";
}
/** Return true iff we have loaded a GeoIP database.*/
MOCK_IMPL(int,
geoip_is_loaded,(sa_family_t family))
{
tor_assert(family == AF_INET || family == AF_INET6);
if (geoip_countries == NULL)
return 0;
if (family == AF_INET)
return geoip_ipv4_entries != NULL;
else /* AF_INET6 */
return geoip_ipv6_entries != NULL;
}
/** Return the hex-encoded SHA1 digest of the loaded GeoIP file. The
* result does not need to be deallocated, but will be overwritten by the
* next call of hex_str(). */
const char *
geoip_db_digest(sa_family_t family)
{
tor_assert(family == AF_INET || family == AF_INET6);
if (family == AF_INET)
return hex_str(geoip_digest, DIGEST_LEN);
else /* AF_INET6 */
return hex_str(geoip6_digest, DIGEST_LEN);
}
/** Largest allowable value for last_seen_in_minutes. (It's a 30-bit field,
* so it can hold up to (1u<<30)-1, or 0x3fffffffu.
*/
@ -660,10 +265,7 @@ geoip_note_client_seen(geoip_client_action_t action,
int country_idx = geoip_get_country_by_addr(addr);
if (country_idx < 0)
country_idx = 0; /** unresolved requests are stored at index 0. */
if (country_idx >= 0 && country_idx < smartlist_len(geoip_countries)) {
geoip_country_t *country = smartlist_get(geoip_countries, country_idx);
++country->n_v3_ns_requests;
}
increment_v3_ns_request(country_idx);
}
}
@ -1275,14 +877,14 @@ geoip_get_request_history(void)
char *result;
unsigned granularity = IP_GRANULARITY;
if (!geoip_countries)
return NULL;
entries = smartlist_new();
SMARTLIST_FOREACH_BEGIN(geoip_countries, geoip_country_t *, c) {
SMARTLIST_FOREACH_BEGIN(geoip_get_countries(), const geoip_country_t *, c) {
uint32_t tot = 0;
c_hist_t *ent;
tot = c->n_v3_ns_requests;
if ((size_t)c_sl_idx < n_v3_ns_requests_len)
tot = n_v3_ns_requests[c_sl_idx];
else
tot = 0;
if (!tot)
continue;
ent = tor_malloc_zero(sizeof(c_hist_t));
@ -1319,9 +921,8 @@ geoip_dirreq_stats_init(time_t now)
void
geoip_reset_dirreq_stats(time_t now)
{
SMARTLIST_FOREACH(geoip_countries, geoip_country_t *, c, {
c->n_v3_ns_requests = 0;
});
memset(n_v3_ns_requests, 0,
n_v3_ns_requests_len * sizeof(uint32_t));
{
clientmap_entry_t **ent, **next, *this;
for (ent = HT_START(clientmap, &client_history); ent != NULL;
@ -1793,74 +1394,9 @@ geoip_entry_stats_write(time_t now)
return start_of_entry_stats_interval + WRITE_STATS_INTERVAL;
}
/** Helper used to implement GETINFO ip-to-country/... controller command. */
int
getinfo_helper_geoip(control_connection_t *control_conn,
const char *question, char **answer,
const char **errmsg)
{
(void)control_conn;
if (!strcmpstart(question, "ip-to-country/")) {
int c;
sa_family_t family;
tor_addr_t addr;
question += strlen("ip-to-country/");
if (!strcmp(question, "ipv4-available") ||
!strcmp(question, "ipv6-available")) {
family = !strcmp(question, "ipv4-available") ? AF_INET : AF_INET6;
const int available = geoip_is_loaded(family);
tor_asprintf(answer, "%d", !! available);
return 0;
}
family = tor_addr_parse(&addr, question);
if (family != AF_INET && family != AF_INET6) {
*errmsg = "Invalid address family";
return -1;
}
if (!geoip_is_loaded(family)) {
*errmsg = "GeoIP data not loaded";
return -1;
}
if (family == AF_INET)
c = geoip_get_country_by_ipv4(tor_addr_to_ipv4h(&addr));
else /* AF_INET6 */
c = geoip_get_country_by_ipv6(tor_addr_to_in6(&addr));
*answer = tor_strdup(geoip_get_country_name(c));
}
return 0;
}
/** Release all storage held by the GeoIP databases and country list. */
STATIC void
clear_geoip_db(void)
{
if (geoip_countries) {
SMARTLIST_FOREACH(geoip_countries, geoip_country_t *, c, tor_free(c));
smartlist_free(geoip_countries);
}
strmap_free(country_idxplus1_by_lc_code, NULL);
if (geoip_ipv4_entries) {
SMARTLIST_FOREACH(geoip_ipv4_entries, geoip_ipv4_entry_t *, ent,
tor_free(ent));
smartlist_free(geoip_ipv4_entries);
}
if (geoip_ipv6_entries) {
SMARTLIST_FOREACH(geoip_ipv6_entries, geoip_ipv6_entry_t *, ent,
tor_free(ent));
smartlist_free(geoip_ipv6_entries);
}
geoip_countries = NULL;
country_idxplus1_by_lc_code = NULL;
geoip_ipv4_entries = NULL;
geoip_ipv6_entries = NULL;
}
/** Release all storage held in this file. */
void
geoip_free_all(void)
geoip_stats_free_all(void)
{
{
clientmap_entry_t **ent, **next, *this;
@ -1881,9 +1417,6 @@ geoip_free_all(void)
HT_CLEAR(dirreqmap, &dirreq_map);
}
clear_geoip_db();
tor_free(bridge_stats_extrainfo);
memset(geoip_digest, 0, sizeof(geoip_digest));
memset(geoip6_digest, 0, sizeof(geoip6_digest));
tor_free(n_v3_ns_requests);
}

View File

@ -5,14 +5,13 @@
/* See LICENSE for licensing information */
/**
* \file geoip.h
* \brief Header file for geoip.c.
* \file geoip_stats.h
* \brief Header file for geoip_stats.c.
**/
#ifndef TOR_GEOIP_H
#define TOR_GEOIP_H
#ifndef TOR_GEOIP_STATS_H
#define TOR_GEOIP_STATS_H
#include "lib/testsupport/testsupport.h"
#include "core/or/dos.h"
/** Indicates an action that we might be noting geoip statistics on.
@ -73,13 +72,6 @@ typedef enum {
DIRREQ_CHANNEL_BUFFER_FLUSHED = 4
} dirreq_state_t;
#ifdef GEOIP_PRIVATE
STATIC int geoip_parse_entry(const char *line, sa_family_t family);
STATIC int geoip_get_country_by_ipv4(uint32_t ipaddr);
STATIC int geoip_get_country_by_ipv6(const struct in6_addr *addr);
STATIC void clear_geoip_db(void);
#endif /* defined(GEOIP_PRIVATE) */
/** Entry in a map from IP address to the last time we've seen an incoming
* connection from that IP address. Used by bridges only to track which
* countries have them blocked, or the DoS mitigation subsystem if enabled. */
@ -103,13 +95,6 @@ typedef struct clientmap_entry_t {
} clientmap_entry_t;
int should_record_bridge_info(const or_options_t *options);
int geoip_load_file(sa_family_t family, const char *filename);
MOCK_DECL(int, geoip_get_country_by_addr, (const tor_addr_t *addr));
MOCK_DECL(int, geoip_get_n_countries, (void));
const char *geoip_get_country_name(country_t num);
MOCK_DECL(int, geoip_is_loaded, (sa_family_t family));
const char *geoip_db_digest(sa_family_t family);
MOCK_DECL(country_t, geoip_get_country, (const char *countrycode));
void geoip_note_client_seen(geoip_client_action_t action,
const tor_addr_t *addr, const char *transport_name,
@ -126,10 +111,7 @@ char *geoip_get_transport_history(void);
int geoip_get_client_history(geoip_client_action_t action,
char **country_str, char **ipver_str);
char *geoip_get_request_history(void);
int getinfo_helper_geoip(control_connection_t *control_conn,
const char *question, char **answer,
const char **errmsg);
void geoip_free_all(void);
void geoip_stats_free_all(void);
void geoip_start_dirreq(uint64_t dirreq_id, size_t response_size,
dirreq_type_t type);
@ -154,4 +136,4 @@ const char *geoip_get_bridge_stats_extrainfo(time_t);
char *geoip_get_bridge_stats_controller(time_t);
char *format_client_stats_heartbeat(time_t now);
#endif /* !defined(TOR_GEOIP_H) */
#endif /* !defined(TOR_GEOIP_STATS_H) */

View File

@ -0,0 +1,311 @@
/* Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
* Copyright (c) 2007-2018, The Tor Project, Inc. */
/* See LICENSE for licensing information */
/**
* \file predict_ports.c
* \brief Remember what ports we've needed so we can have circuits ready.
*
* Predicted ports are used by clients to remember how long it's been
* since they opened an exit connection to each given target
* port. Clients use this information in order to try to keep circuits
* open to exit nodes that can connect to the ports that they care
* about. (The predicted ports mechanism also handles predicted circuit
* usage that _isn't_ port-specific, such as resolves, internal circuits,
* and so on.)
**/
#include "core/or/or.h"
#include "app/config/config.h"
#include "core/or/channelpadding.h"
#include "core/or/circuituse.h"
#include "feature/relay/routermode.h"
#include "feature/relay/selftest.h"
#include "feature/stats/predict_ports.h"
#include "lib/container/bitarray.h"
#include "lib/time/tvdiff.h"
static size_t predicted_ports_total_alloc = 0;
static void predicted_ports_alloc(void);
/** A single predicted port: used to remember which ports we've made
* connections to, so that we can try to keep making circuits that can handle
* those ports. */
typedef struct predicted_port_t {
/** The port we connected to */
uint16_t port;
/** The time at which we last used it */
time_t time;
} predicted_port_t;
/** A list of port numbers that have been used recently. */
static smartlist_t *predicted_ports_list=NULL;
/** How long do we keep predicting circuits? */
static time_t prediction_timeout=0;
/** When was the last time we added a prediction entry (HS or port) */
static time_t last_prediction_add_time=0;
/**
* How much time left until we stop predicting circuits?
*/
int
predicted_ports_prediction_time_remaining(time_t now)
{
time_t seconds_waited;
time_t seconds_left;
/* Protect against overflow of return value. This can happen if the clock
* jumps backwards in time. Update the last prediction time (aka last
* active time) to prevent it. This update is preferable to using monotonic
* time because it prevents clock jumps into the past from simply causing
* very long idle timeouts while the monotonic time stands still. */
seconds_waited = time_diff(last_prediction_add_time, now);
if (seconds_waited == TIME_MAX) {
last_prediction_add_time = now;
seconds_waited = 0;
}
/* Protect against underflow of the return value. This can happen for very
* large periods of inactivity/system sleep. */
if (seconds_waited > prediction_timeout)
return 0;
seconds_left = time_diff(seconds_waited, prediction_timeout);
if (BUG(seconds_left == TIME_MAX))
return INT_MAX;
return (int)(seconds_left);
}
/** We just got an application request for a connection with
* port <b>port</b>. Remember it for the future, so we can keep
* some circuits open that will exit to this port.
*/
static void
add_predicted_port(time_t now, uint16_t port)
{
predicted_port_t *pp = tor_malloc(sizeof(predicted_port_t));
// If the list is empty, re-randomize predicted ports lifetime
if (!any_predicted_circuits(now)) {
prediction_timeout =
(time_t)channelpadding_get_circuits_available_timeout();
}
last_prediction_add_time = now;
log_info(LD_CIRC,
"New port prediction added. Will continue predictive circ building "
"for %d more seconds.",
predicted_ports_prediction_time_remaining(now));
pp->port = port;
pp->time = now;
predicted_ports_total_alloc += sizeof(*pp);
smartlist_add(predicted_ports_list, pp);
}
/** Remember that <b>port</b> has been asked for as of time <b>now</b>.
* This is used for predicting what sorts of streams we'll make in the
* future and making exit circuits to anticipate that.
*/
void
rep_hist_note_used_port(time_t now, uint16_t port)
{
tor_assert(predicted_ports_list);
if (!port) /* record nothing */
return;
SMARTLIST_FOREACH_BEGIN(predicted_ports_list, predicted_port_t *, pp) {
if (pp->port == port) {
pp->time = now;
last_prediction_add_time = now;
log_info(LD_CIRC,
"New port prediction added. Will continue predictive circ "
"building for %d more seconds.",
predicted_ports_prediction_time_remaining(now));
return;
}
} SMARTLIST_FOREACH_END(pp);
/* it's not there yet; we need to add it */
add_predicted_port(now, port);
}
/** Return a newly allocated pointer to a list of uint16_t * for ports that
* are likely to be asked for in the near future.
*/
smartlist_t *
rep_hist_get_predicted_ports(time_t now)
{
int predicted_circs_relevance_time;
smartlist_t *out = smartlist_new();
tor_assert(predicted_ports_list);
predicted_circs_relevance_time = (int)prediction_timeout;
/* clean out obsolete entries */
SMARTLIST_FOREACH_BEGIN(predicted_ports_list, predicted_port_t *, pp) {
if (pp->time + predicted_circs_relevance_time < now) {
log_debug(LD_CIRC, "Expiring predicted port %d", pp->port);
predicted_ports_total_alloc -= sizeof(predicted_port_t);
tor_free(pp);
SMARTLIST_DEL_CURRENT(predicted_ports_list, pp);
} else {
smartlist_add(out, tor_memdup(&pp->port, sizeof(uint16_t)));
}
} SMARTLIST_FOREACH_END(pp);
return out;
}
/**
* Take a list of uint16_t *, and remove every port in the list from the
* current list of predicted ports.
*/
void
rep_hist_remove_predicted_ports(const smartlist_t *rmv_ports)
{
/* Let's do this on O(N), not O(N^2). */
bitarray_t *remove_ports = bitarray_init_zero(UINT16_MAX);
SMARTLIST_FOREACH(rmv_ports, const uint16_t *, p,
bitarray_set(remove_ports, *p));
SMARTLIST_FOREACH_BEGIN(predicted_ports_list, predicted_port_t *, pp) {
if (bitarray_is_set(remove_ports, pp->port)) {
tor_free(pp);
predicted_ports_total_alloc -= sizeof(*pp);
SMARTLIST_DEL_CURRENT(predicted_ports_list, pp);
}
} SMARTLIST_FOREACH_END(pp);
bitarray_free(remove_ports);
}
/** The user asked us to do a resolve. Rather than keeping track of
* timings and such of resolves, we fake it for now by treating
* it the same way as a connection to port 80. This way we will continue
* to have circuits lying around if the user only uses Tor for resolves.
*/
void
rep_hist_note_used_resolve(time_t now)
{
rep_hist_note_used_port(now, 80);
}
/** The last time at which we needed an internal circ. */
static time_t predicted_internal_time = 0;
/** The last time we needed an internal circ with good uptime. */
static time_t predicted_internal_uptime_time = 0;
/** The last time we needed an internal circ with good capacity. */
static time_t predicted_internal_capacity_time = 0;
/** Remember that we used an internal circ at time <b>now</b>. */
void
rep_hist_note_used_internal(time_t now, int need_uptime, int need_capacity)
{
// If the list is empty, re-randomize predicted ports lifetime
if (!any_predicted_circuits(now)) {
prediction_timeout = channelpadding_get_circuits_available_timeout();
}
last_prediction_add_time = now;
log_info(LD_CIRC,
"New port prediction added. Will continue predictive circ building "
"for %d more seconds.",
predicted_ports_prediction_time_remaining(now));
predicted_internal_time = now;
if (need_uptime)
predicted_internal_uptime_time = now;
if (need_capacity)
predicted_internal_capacity_time = now;
}
/** Return 1 if we've used an internal circ recently; else return 0. */
int
rep_hist_get_predicted_internal(time_t now, int *need_uptime,
int *need_capacity)
{
int predicted_circs_relevance_time;
predicted_circs_relevance_time = (int)prediction_timeout;
if (!predicted_internal_time) { /* initialize it */
predicted_internal_time = now;
predicted_internal_uptime_time = now;
predicted_internal_capacity_time = now;
}
if (predicted_internal_time + predicted_circs_relevance_time < now)
return 0; /* too long ago */
if (predicted_internal_uptime_time + predicted_circs_relevance_time >= now)
*need_uptime = 1;
// Always predict that we need capacity.
*need_capacity = 1;
return 1;
}
/** Any ports used lately? These are pre-seeded if we just started
* up or if we're running a hidden service. */
int
any_predicted_circuits(time_t now)
{
int predicted_circs_relevance_time;
predicted_circs_relevance_time = (int)prediction_timeout;
return smartlist_len(predicted_ports_list) ||
predicted_internal_time + predicted_circs_relevance_time >= now;
}
/** Return 1 if we have no need for circuits currently, else return 0. */
int
rep_hist_circbuilding_dormant(time_t now)
{
const or_options_t *options = get_options();
if (any_predicted_circuits(now))
return 0;
/* see if we'll still need to build testing circuits */
if (server_mode(options) &&
(!check_whether_orport_reachable(options) ||
!circuit_enough_testing_circs()))
return 0;
if (!check_whether_dirport_reachable(options))
return 0;
return 1;
}
/**
* Allocate whatever memory and structs are needed for predicting
* which ports will be used. Also seed it with port 80, so we'll build
* circuits on start-up.
*/
static void
predicted_ports_alloc(void)
{
predicted_ports_list = smartlist_new();
}
void
predicted_ports_init(void)
{
predicted_ports_alloc();
add_predicted_port(time(NULL), 443); // Add a port to get us started
}
/** Free whatever memory is needed for predicting which ports will
* be used.
*/
void
predicted_ports_free_all(void)
{
predicted_ports_total_alloc -=
smartlist_len(predicted_ports_list)*sizeof(predicted_port_t);
SMARTLIST_FOREACH(predicted_ports_list, predicted_port_t *,
pp, tor_free(pp));
smartlist_free(predicted_ports_list);
}

View File

@ -0,0 +1,30 @@
/* Copyright (c) 2001 Matej Pfajfar.
* Copyright (c) 2001-2004, Roger Dingledine.
* Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
* Copyright (c) 2007-2018, The Tor Project, Inc. */
/* See LICENSE for licensing information */
/**
* \file predict_portst.h
* \brief Header file for predict_ports.c.
**/
#ifndef TOR_PREDICT_PORTS_H
#define TOR_PREDICT_PORTS_H
void predicted_ports_init(void);
void rep_hist_note_used_port(time_t now, uint16_t port);
smartlist_t *rep_hist_get_predicted_ports(time_t now);
void rep_hist_remove_predicted_ports(const smartlist_t *rmv_ports);
void rep_hist_note_used_resolve(time_t now);
void rep_hist_note_used_internal(time_t now, int need_uptime,
int need_capacity);
int rep_hist_get_predicted_internal(time_t now, int *need_uptime,
int *need_capacity);
int any_predicted_circuits(time_t now);
int rep_hist_circbuilding_dormant(time_t now);
int predicted_ports_prediction_time_remaining(time_t now);
void predicted_ports_free_all(void);
#endif

View File

@ -76,38 +76,29 @@
#define REPHIST_PRIVATE
#include "core/or/or.h"
#include "core/or/circuitlist.h"
#include "core/or/circuituse.h"
#include "app/config/config.h"
#include "lib/crypt_ops/crypto_rand.h"
#include "app/config/statefile.h"
#include "core/or/circuitlist.h"
#include "core/or/connection_or.h"
#include "feature/dirauth/authmode.h"
#include "feature/nodelist/networkstatus.h"
#include "feature/nodelist/nodelist.h"
#include "feature/stats/rephist.h"
#include "feature/relay/routermode.h"
#include "feature/relay/selftest.h"
#include "feature/nodelist/routerlist.h"
#include "ht.h"
#include "core/or/channelpadding.h"
#include "core/or/connection_or.h"
#include "app/config/statefile.h"
#include "feature/dirauth/authmode.h"
#include "feature/stats/predict_ports.h"
#include "feature/stats/rephist.h"
#include "lib/container/order.h"
#include "lib/crypt_ops/crypto_rand.h"
#include "lib/math/laplace.h"
#include "feature/nodelist/networkstatus_st.h"
#include "core/or/or_circuit_st.h"
#include "app/config/or_state_st.h"
#include "lib/container/bloomfilt.h"
#include "lib/container/order.h"
#include "lib/math/fp.h"
#include "lib/math/laplace.h"
#include "lib/time/tvdiff.h"
#ifdef HAVE_FCNTL_H
#include <fcntl.h>
#endif
static void bw_arrays_init(void);
static void predicted_ports_alloc(void);
/** Total number of bytes currently allocated in fields used by rephist.c. */
uint64_t rephist_total_alloc=0;
@ -242,7 +233,6 @@ rep_hist_init(void)
{
history_map = digestmap_new();
bw_arrays_init();
predicted_ports_alloc();
}
/** We have just decided that this router with identity digest <b>id</b> is
@ -1537,287 +1527,6 @@ rep_hist_load_state(or_state_t *state, char **err)
return 0;
}
/*********************************************************************/
/** A single predicted port: used to remember which ports we've made
* connections to, so that we can try to keep making circuits that can handle
* those ports. */
typedef struct predicted_port_t {
/** The port we connected to */
uint16_t port;
/** The time at which we last used it */
time_t time;
} predicted_port_t;
/** A list of port numbers that have been used recently. */
static smartlist_t *predicted_ports_list=NULL;
/** How long do we keep predicting circuits? */
static time_t prediction_timeout=0;
/** When was the last time we added a prediction entry (HS or port) */
static time_t last_prediction_add_time=0;
/**
* How much time left until we stop predicting circuits?
*/
int
predicted_ports_prediction_time_remaining(time_t now)
{
time_t seconds_waited;
time_t seconds_left;
/* Protect against overflow of return value. This can happen if the clock
* jumps backwards in time. Update the last prediction time (aka last
* active time) to prevent it. This update is preferable to using monotonic
* time because it prevents clock jumps into the past from simply causing
* very long idle timeouts while the monotonic time stands still. */
seconds_waited = time_diff(last_prediction_add_time, now);
if (seconds_waited == TIME_MAX) {
last_prediction_add_time = now;
seconds_waited = 0;
}
/* Protect against underflow of the return value. This can happen for very
* large periods of inactivity/system sleep. */
if (seconds_waited > prediction_timeout)
return 0;
seconds_left = time_diff(seconds_waited, prediction_timeout);
if (BUG(seconds_left == TIME_MAX))
return INT_MAX;
return (int)(seconds_left);
}
/** We just got an application request for a connection with
* port <b>port</b>. Remember it for the future, so we can keep
* some circuits open that will exit to this port.
*/
static void
add_predicted_port(time_t now, uint16_t port)
{
predicted_port_t *pp = tor_malloc(sizeof(predicted_port_t));
// If the list is empty, re-randomize predicted ports lifetime
if (!any_predicted_circuits(now)) {
prediction_timeout =
(time_t)channelpadding_get_circuits_available_timeout();
}
last_prediction_add_time = now;
log_info(LD_CIRC,
"New port prediction added. Will continue predictive circ building "
"for %d more seconds.",
predicted_ports_prediction_time_remaining(now));
pp->port = port;
pp->time = now;
rephist_total_alloc += sizeof(*pp);
smartlist_add(predicted_ports_list, pp);
}
/**
* Allocate whatever memory and structs are needed for predicting
* which ports will be used. Also seed it with port 80, so we'll build
* circuits on start-up.
*/
static void
predicted_ports_alloc(void)
{
predicted_ports_list = smartlist_new();
}
void
predicted_ports_init(void)
{
add_predicted_port(time(NULL), 443); // Add a port to get us started
}
/** Free whatever memory is needed for predicting which ports will
* be used.
*/
static void
predicted_ports_free_all(void)
{
rephist_total_alloc -=
smartlist_len(predicted_ports_list)*sizeof(predicted_port_t);
SMARTLIST_FOREACH(predicted_ports_list, predicted_port_t *,
pp, tor_free(pp));
smartlist_free(predicted_ports_list);
}
/** Remember that <b>port</b> has been asked for as of time <b>now</b>.
* This is used for predicting what sorts of streams we'll make in the
* future and making exit circuits to anticipate that.
*/
void
rep_hist_note_used_port(time_t now, uint16_t port)
{
tor_assert(predicted_ports_list);
if (!port) /* record nothing */
return;
SMARTLIST_FOREACH_BEGIN(predicted_ports_list, predicted_port_t *, pp) {
if (pp->port == port) {
pp->time = now;
last_prediction_add_time = now;
log_info(LD_CIRC,
"New port prediction added. Will continue predictive circ "
"building for %d more seconds.",
predicted_ports_prediction_time_remaining(now));
return;
}
} SMARTLIST_FOREACH_END(pp);
/* it's not there yet; we need to add it */
add_predicted_port(now, port);
}
/** Return a newly allocated pointer to a list of uint16_t * for ports that
* are likely to be asked for in the near future.
*/
smartlist_t *
rep_hist_get_predicted_ports(time_t now)
{
int predicted_circs_relevance_time;
smartlist_t *out = smartlist_new();
tor_assert(predicted_ports_list);
predicted_circs_relevance_time = (int)prediction_timeout;
/* clean out obsolete entries */
SMARTLIST_FOREACH_BEGIN(predicted_ports_list, predicted_port_t *, pp) {
if (pp->time + predicted_circs_relevance_time < now) {
log_debug(LD_CIRC, "Expiring predicted port %d", pp->port);
rephist_total_alloc -= sizeof(predicted_port_t);
tor_free(pp);
SMARTLIST_DEL_CURRENT(predicted_ports_list, pp);
} else {
smartlist_add(out, tor_memdup(&pp->port, sizeof(uint16_t)));
}
} SMARTLIST_FOREACH_END(pp);
return out;
}
/**
* Take a list of uint16_t *, and remove every port in the list from the
* current list of predicted ports.
*/
void
rep_hist_remove_predicted_ports(const smartlist_t *rmv_ports)
{
/* Let's do this on O(N), not O(N^2). */
bitarray_t *remove_ports = bitarray_init_zero(UINT16_MAX);
SMARTLIST_FOREACH(rmv_ports, const uint16_t *, p,
bitarray_set(remove_ports, *p));
SMARTLIST_FOREACH_BEGIN(predicted_ports_list, predicted_port_t *, pp) {
if (bitarray_is_set(remove_ports, pp->port)) {
tor_free(pp);
rephist_total_alloc -= sizeof(*pp);
SMARTLIST_DEL_CURRENT(predicted_ports_list, pp);
}
} SMARTLIST_FOREACH_END(pp);
bitarray_free(remove_ports);
}
/** The user asked us to do a resolve. Rather than keeping track of
* timings and such of resolves, we fake it for now by treating
* it the same way as a connection to port 80. This way we will continue
* to have circuits lying around if the user only uses Tor for resolves.
*/
void
rep_hist_note_used_resolve(time_t now)
{
rep_hist_note_used_port(now, 80);
}
/** The last time at which we needed an internal circ. */
static time_t predicted_internal_time = 0;
/** The last time we needed an internal circ with good uptime. */
static time_t predicted_internal_uptime_time = 0;
/** The last time we needed an internal circ with good capacity. */
static time_t predicted_internal_capacity_time = 0;
/** Remember that we used an internal circ at time <b>now</b>. */
void
rep_hist_note_used_internal(time_t now, int need_uptime, int need_capacity)
{
// If the list is empty, re-randomize predicted ports lifetime
if (!any_predicted_circuits(now)) {
prediction_timeout = channelpadding_get_circuits_available_timeout();
}
last_prediction_add_time = now;
log_info(LD_CIRC,
"New port prediction added. Will continue predictive circ building "
"for %d more seconds.",
predicted_ports_prediction_time_remaining(now));
predicted_internal_time = now;
if (need_uptime)
predicted_internal_uptime_time = now;
if (need_capacity)
predicted_internal_capacity_time = now;
}
/** Return 1 if we've used an internal circ recently; else return 0. */
int
rep_hist_get_predicted_internal(time_t now, int *need_uptime,
int *need_capacity)
{
int predicted_circs_relevance_time;
predicted_circs_relevance_time = (int)prediction_timeout;
if (!predicted_internal_time) { /* initialize it */
predicted_internal_time = now;
predicted_internal_uptime_time = now;
predicted_internal_capacity_time = now;
}
if (predicted_internal_time + predicted_circs_relevance_time < now)
return 0; /* too long ago */
if (predicted_internal_uptime_time + predicted_circs_relevance_time >= now)
*need_uptime = 1;
// Always predict that we need capacity.
*need_capacity = 1;
return 1;
}
/** Any ports used lately? These are pre-seeded if we just started
* up or if we're running a hidden service. */
int
any_predicted_circuits(time_t now)
{
int predicted_circs_relevance_time;
predicted_circs_relevance_time = (int)prediction_timeout;
return smartlist_len(predicted_ports_list) ||
predicted_internal_time + predicted_circs_relevance_time >= now;
}
/** Return 1 if we have no need for circuits currently, else return 0. */
int
rep_hist_circbuilding_dormant(time_t now)
{
const or_options_t *options = get_options();
if (any_predicted_circuits(now))
return 0;
/* see if we'll still need to build testing circuits */
if (server_mode(options) &&
(!check_whether_orport_reachable(options) ||
!circuit_enough_testing_circs()))
return 0;
if (!check_whether_dirport_reachable(options))
return 0;
return 1;
}
/*** Exit port statistics ***/
/* Some constants */

View File

@ -41,20 +41,6 @@ double rep_hist_get_weighted_fractional_uptime(const char *id, time_t when);
long rep_hist_get_weighted_time_known(const char *id, time_t when);
int rep_hist_have_measured_enough_stability(void);
void predicted_ports_init(void);
void rep_hist_note_used_port(time_t now, uint16_t port);
smartlist_t *rep_hist_get_predicted_ports(time_t now);
void rep_hist_remove_predicted_ports(const smartlist_t *rmv_ports);
void rep_hist_note_used_resolve(time_t now);
void rep_hist_note_used_internal(time_t now, int need_uptime,
int need_capacity);
int rep_hist_get_predicted_internal(time_t now, int *need_uptime,
int *need_capacity);
int any_predicted_circuits(time_t now);
int rep_hist_circbuilding_dormant(time_t now);
int predicted_ports_prediction_time_remaining(time_t now);
void rep_hist_exit_stats_init(time_t now);
void rep_hist_reset_exit_stats(time_t now);
void rep_hist_exit_stats_term(void);

View File

@ -11,6 +11,7 @@ include src/lib/encoding/include.am
include src/lib/evloop/include.am
include src/lib/fdio/include.am
include src/lib/fs/include.am
include src/lib/geoip/include.am
include src/lib/include.libdonna.am
include src/lib/intmath/include.am
include src/lib/lock/include.am

View File

@ -0,0 +1,13 @@
orconfig.h
lib/cc/*.h
lib/container/*.h
lib/crypt_ops/*.h
lib/ctime/*.h
lib/encoding/*.h
lib/fs/*.h
lib/geoip/*.h
lib/log/*.h
lib/malloc/*.h
lib/net/*.h
lib/string/*.h
lib/testsupport/*.h

14
src/lib/geoip/country.h Normal file
View File

@ -0,0 +1,14 @@
/* Copyright (c) 2001 Matej Pfajfar.
* Copyright (c) 2001-2004, Roger Dingledine.
* Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
* Copyright (c) 2007-2018, The Tor Project, Inc. */
/* See LICENSE for licensing information */
#ifndef TOR_COUNTRY_H
#define TOR_COUNTRY_H
#include "lib/cc/torint.h"
/** A signed integer representing a country code. */
typedef int16_t country_t;
#endif

510
src/lib/geoip/geoip.c Normal file
View File

@ -0,0 +1,510 @@
/* Copyright (c) 2007-2018, The Tor Project, Inc. */
/* See LICENSE for licensing information */
/**
* \file geoip.c
* \brief Functions related to maintaining an IP-to-country database;
* to summarizing client connections by country to entry guards, bridges,
* and directory servers; and for statistics on answering network status
* requests.
*
* There are two main kinds of functions in this module: geoip functions,
* which map groups of IPv4 and IPv6 addresses to country codes, and
* statistical functions, which collect statistics about different kinds of
* per-country usage.
*
* The geoip lookup tables are implemented as sorted lists of disjoint address
* ranges, each mapping to a singleton geoip_country_t. These country objects
* are also indexed by their names in a hashtable.
*
* The tables are populated from disk at startup by the geoip_load_file()
* function. For more information on the file format they read, see that
* function. See the scripts and the README file in src/config for more
* information about how those files are generated.
*
* Tor uses GeoIP information in order to implement user requests (such as
* ExcludeNodes {cc}), and to keep track of how much usage relays are getting
* for each country.
*/
#define GEOIP_PRIVATE
#include "lib/geoip/geoip.h"
#include "lib/container/map.h"
#include "lib/container/order.h"
#include "lib/container/smartlist.h"
#include "lib/crypt_ops/crypto_digest.h"
#include "lib/ctime/di_ops.h"
#include "lib/encoding/binascii.h"
#include "lib/fs/files.h"
#include "lib/log/escape.h"
#include "lib/malloc/malloc.h"
#include "lib/net/address.h" //????
#include "lib/net/inaddr.h"
#include "lib/string/compat_ctype.h"
#include "lib/string/compat_string.h"
#include "lib/string/scanf.h"
#include "lib/string/util_string.h"
#include <stdio.h>
#include <string.h>
static void init_geoip_countries(void);
/** An entry from the GeoIP IPv4 file: maps an IPv4 range to a country. */
typedef struct geoip_ipv4_entry_t {
uint32_t ip_low; /**< The lowest IP in the range, in host order */
uint32_t ip_high; /**< The highest IP in the range, in host order */
intptr_t country; /**< An index into geoip_countries */
} geoip_ipv4_entry_t;
/** An entry from the GeoIP IPv6 file: maps an IPv6 range to a country. */
typedef struct geoip_ipv6_entry_t {
struct in6_addr ip_low; /**< The lowest IP in the range, in host order */
struct in6_addr ip_high; /**< The highest IP in the range, in host order */
intptr_t country; /**< An index into geoip_countries */
} geoip_ipv6_entry_t;
/** A list of geoip_country_t */
static smartlist_t *geoip_countries = NULL;
/** A map from lowercased country codes to their position in geoip_countries.
* The index is encoded in the pointer, and 1 is added so that NULL can mean
* not found. */
static strmap_t *country_idxplus1_by_lc_code = NULL;
/** Lists of all known geoip_ipv4_entry_t and geoip_ipv6_entry_t, sorted
* by their respective ip_low. */
static smartlist_t *geoip_ipv4_entries = NULL, *geoip_ipv6_entries = NULL;
/** SHA1 digest of the GeoIP files to include in extra-info descriptors. */
static char geoip_digest[DIGEST_LEN];
static char geoip6_digest[DIGEST_LEN];
/** Return a list of geoip_country_t for all known countries. */
const smartlist_t *
geoip_get_countries(void)
{
if (geoip_countries == NULL) {
init_geoip_countries();
}
return geoip_countries;
}
/** Return the index of the <b>country</b>'s entry in the GeoIP
* country list if it is a valid 2-letter country code, otherwise
* return -1. */
MOCK_IMPL(country_t,
geoip_get_country,(const char *country))
{
void *idxplus1_;
intptr_t idx;
idxplus1_ = strmap_get_lc(country_idxplus1_by_lc_code, country);
if (!idxplus1_)
return -1;
idx = ((uintptr_t)idxplus1_)-1;
return (country_t)idx;
}
/** Add an entry to a GeoIP table, mapping all IP addresses between <b>low</b>
* and <b>high</b>, inclusive, to the 2-letter country code <b>country</b>. */
static void
geoip_add_entry(const tor_addr_t *low, const tor_addr_t *high,
const char *country)
{
intptr_t idx;
void *idxplus1_;
IF_BUG_ONCE(tor_addr_family(low) != tor_addr_family(high))
return;
IF_BUG_ONCE(tor_addr_compare(high, low, CMP_EXACT) < 0)
return;
idxplus1_ = strmap_get_lc(country_idxplus1_by_lc_code, country);
if (!idxplus1_) {
geoip_country_t *c = tor_malloc_zero(sizeof(geoip_country_t));
strlcpy(c->countrycode, country, sizeof(c->countrycode));
tor_strlower(c->countrycode);
smartlist_add(geoip_countries, c);
idx = smartlist_len(geoip_countries) - 1;
strmap_set_lc(country_idxplus1_by_lc_code, country, (void*)(idx+1));
} else {
idx = ((uintptr_t)idxplus1_)-1;
}
{
geoip_country_t *c = smartlist_get(geoip_countries, (int)idx);
tor_assert(!strcasecmp(c->countrycode, country));
}
if (tor_addr_family(low) == AF_INET) {
geoip_ipv4_entry_t *ent = tor_malloc_zero(sizeof(geoip_ipv4_entry_t));
ent->ip_low = tor_addr_to_ipv4h(low);
ent->ip_high = tor_addr_to_ipv4h(high);
ent->country = idx;
smartlist_add(geoip_ipv4_entries, ent);
} else if (tor_addr_family(low) == AF_INET6) {
geoip_ipv6_entry_t *ent = tor_malloc_zero(sizeof(geoip_ipv6_entry_t));
ent->ip_low = *tor_addr_to_in6_assert(low);
ent->ip_high = *tor_addr_to_in6_assert(high);
ent->country = idx;
smartlist_add(geoip_ipv6_entries, ent);
}
}
/** Add an entry to the GeoIP table indicated by <b>family</b>,
* parsing it from <b>line</b>. The format is as for geoip_load_file(). */
STATIC int
geoip_parse_entry(const char *line, sa_family_t family)
{
tor_addr_t low_addr, high_addr;
char c[3];
char *country = NULL;
if (!geoip_countries)
init_geoip_countries();
if (family == AF_INET) {
if (!geoip_ipv4_entries)
geoip_ipv4_entries = smartlist_new();
} else if (family == AF_INET6) {
if (!geoip_ipv6_entries)
geoip_ipv6_entries = smartlist_new();
} else {
log_warn(LD_GENERAL, "Unsupported family: %d", family);
return -1;
}
while (TOR_ISSPACE(*line))
++line;
if (*line == '#')
return 0;
char buf[512];
if (family == AF_INET) {
unsigned int low, high;
if (tor_sscanf(line,"%u,%u,%2s", &low, &high, c) == 3 ||
tor_sscanf(line,"\"%u\",\"%u\",\"%2s\",", &low, &high, c) == 3) {
tor_addr_from_ipv4h(&low_addr, low);
tor_addr_from_ipv4h(&high_addr, high);
} else
goto fail;
country = c;
} else { /* AF_INET6 */
char *low_str, *high_str;
struct in6_addr low, high;
char *strtok_state;
strlcpy(buf, line, sizeof(buf));
low_str = tor_strtok_r(buf, ",", &strtok_state);
if (!low_str)
goto fail;
high_str = tor_strtok_r(NULL, ",", &strtok_state);
if (!high_str)
goto fail;
country = tor_strtok_r(NULL, "\n", &strtok_state);
if (!country)
goto fail;
if (strlen(country) != 2)
goto fail;
if (tor_inet_pton(AF_INET6, low_str, &low) <= 0)
goto fail;
tor_addr_from_in6(&low_addr, &low);
if (tor_inet_pton(AF_INET6, high_str, &high) <= 0)
goto fail;
tor_addr_from_in6(&high_addr, &high);
}
geoip_add_entry(&low_addr, &high_addr, country);
return 0;
fail:
log_warn(LD_GENERAL, "Unable to parse line from GEOIP %s file: %s",
family == AF_INET ? "IPv4" : "IPv6", escaped(line));
return -1;
}
/** Sorting helper: return -1, 1, or 0 based on comparison of two
* geoip_ipv4_entry_t */
static int
geoip_ipv4_compare_entries_(const void **_a, const void **_b)
{
const geoip_ipv4_entry_t *a = *_a, *b = *_b;
if (a->ip_low < b->ip_low)
return -1;
else if (a->ip_low > b->ip_low)
return 1;
else
return 0;
}
/** bsearch helper: return -1, 1, or 0 based on comparison of an IP (a pointer
* to a uint32_t in host order) to a geoip_ipv4_entry_t */
static int
geoip_ipv4_compare_key_to_entry_(const void *_key, const void **_member)
{
/* No alignment issue here, since _key really is a pointer to uint32_t */
const uint32_t addr = *(uint32_t *)_key;
const geoip_ipv4_entry_t *entry = *_member;
if (addr < entry->ip_low)
return -1;
else if (addr > entry->ip_high)
return 1;
else
return 0;
}
/** Sorting helper: return -1, 1, or 0 based on comparison of two
* geoip_ipv6_entry_t */
static int
geoip_ipv6_compare_entries_(const void **_a, const void **_b)
{
const geoip_ipv6_entry_t *a = *_a, *b = *_b;
return fast_memcmp(a->ip_low.s6_addr, b->ip_low.s6_addr,
sizeof(struct in6_addr));
}
/** bsearch helper: return -1, 1, or 0 based on comparison of an IPv6
* (a pointer to a in6_addr) to a geoip_ipv6_entry_t */
static int
geoip_ipv6_compare_key_to_entry_(const void *_key, const void **_member)
{
const struct in6_addr *addr = (struct in6_addr *)_key;
const geoip_ipv6_entry_t *entry = *_member;
if (fast_memcmp(addr->s6_addr, entry->ip_low.s6_addr,
sizeof(struct in6_addr)) < 0)
return -1;
else if (fast_memcmp(addr->s6_addr, entry->ip_high.s6_addr,
sizeof(struct in6_addr)) > 0)
return 1;
else
return 0;
}
/** Set up a new list of geoip countries with no countries (yet) set in it,
* except for the unknown country.
*/
static void
init_geoip_countries(void)
{
geoip_country_t *geoip_unresolved;
geoip_countries = smartlist_new();
/* Add a geoip_country_t for requests that could not be resolved to a
* country as first element (index 0) to geoip_countries. */
geoip_unresolved = tor_malloc_zero(sizeof(geoip_country_t));
strlcpy(geoip_unresolved->countrycode, "??",
sizeof(geoip_unresolved->countrycode));
smartlist_add(geoip_countries, geoip_unresolved);
country_idxplus1_by_lc_code = strmap_new();
strmap_set_lc(country_idxplus1_by_lc_code, "??", (void*)(1));
}
/** Clear appropriate GeoIP database, based on <b>family</b>, and
* reload it from the file <b>filename</b>. Return 0 on success, -1 on
* failure.
*
* Recognized line formats for IPv4 are:
* INTIPLOW,INTIPHIGH,CC
* and
* "INTIPLOW","INTIPHIGH","CC","CC3","COUNTRY NAME"
* where INTIPLOW and INTIPHIGH are IPv4 addresses encoded as 4-byte unsigned
* integers, and CC is a country code.
*
* Recognized line format for IPv6 is:
* IPV6LOW,IPV6HIGH,CC
* where IPV6LOW and IPV6HIGH are IPv6 addresses and CC is a country code.
*
* It also recognizes, and skips over, blank lines and lines that start
* with '#' (comments).
*/
int
geoip_load_file(sa_family_t family, const char *filename, int severity)
{
FILE *f;
crypto_digest_t *geoip_digest_env = NULL;
tor_assert(family == AF_INET || family == AF_INET6);
if (!(f = tor_fopen_cloexec(filename, "r"))) {
log_fn(severity, LD_GENERAL, "Failed to open GEOIP file %s.",
filename);
return -1;
}
if (!geoip_countries)
init_geoip_countries();
if (family == AF_INET) {
if (geoip_ipv4_entries) {
SMARTLIST_FOREACH(geoip_ipv4_entries, geoip_ipv4_entry_t *, e,
tor_free(e));
smartlist_free(geoip_ipv4_entries);
}
geoip_ipv4_entries = smartlist_new();
} else { /* AF_INET6 */
if (geoip_ipv6_entries) {
SMARTLIST_FOREACH(geoip_ipv6_entries, geoip_ipv6_entry_t *, e,
tor_free(e));
smartlist_free(geoip_ipv6_entries);
}
geoip_ipv6_entries = smartlist_new();
}
geoip_digest_env = crypto_digest_new();
log_notice(LD_GENERAL, "Parsing GEOIP %s file %s.",
(family == AF_INET) ? "IPv4" : "IPv6", filename);
while (!feof(f)) {
char buf[512];
if (fgets(buf, (int)sizeof(buf), f) == NULL)
break;
crypto_digest_add_bytes(geoip_digest_env, buf, strlen(buf));
/* FFFF track full country name. */
geoip_parse_entry(buf, family);
}
/*XXXX abort and return -1 if no entries/illformed?*/
fclose(f);
/* Sort list and remember file digests so that we can include it in
* our extra-info descriptors. */
if (family == AF_INET) {
smartlist_sort(geoip_ipv4_entries, geoip_ipv4_compare_entries_);
crypto_digest_get_digest(geoip_digest_env, geoip_digest, DIGEST_LEN);
} else {
/* AF_INET6 */
smartlist_sort(geoip_ipv6_entries, geoip_ipv6_compare_entries_);
crypto_digest_get_digest(geoip_digest_env, geoip6_digest, DIGEST_LEN);
}
crypto_digest_free(geoip_digest_env);
return 0;
}
/** Given an IP address in host order, return a number representing the
* country to which that address belongs, -1 for "No geoip information
* available", or 0 for the 'unknown country'. The return value will always
* be less than geoip_get_n_countries(). To decode it, call
* geoip_get_country_name().
*/
int
geoip_get_country_by_ipv4(uint32_t ipaddr)
{
geoip_ipv4_entry_t *ent;
if (!geoip_ipv4_entries)
return -1;
ent = smartlist_bsearch(geoip_ipv4_entries, &ipaddr,
geoip_ipv4_compare_key_to_entry_);
return ent ? (int)ent->country : 0;
}
/** Given an IPv6 address, return a number representing the country to
* which that address belongs, -1 for "No geoip information available", or
* 0 for the 'unknown country'. The return value will always be less than
* geoip_get_n_countries(). To decode it, call geoip_get_country_name().
*/
int
geoip_get_country_by_ipv6(const struct in6_addr *addr)
{
geoip_ipv6_entry_t *ent;
if (!geoip_ipv6_entries)
return -1;
ent = smartlist_bsearch(geoip_ipv6_entries, addr,
geoip_ipv6_compare_key_to_entry_);
return ent ? (int)ent->country : 0;
}
/** Given an IP address, return a number representing the country to which
* that address belongs, -1 for "No geoip information available", or 0 for
* the 'unknown country'. The return value will always be less than
* geoip_get_n_countries(). To decode it, call geoip_get_country_name().
*/
MOCK_IMPL(int,
geoip_get_country_by_addr,(const tor_addr_t *addr))
{
if (tor_addr_family(addr) == AF_INET) {
return geoip_get_country_by_ipv4(tor_addr_to_ipv4h(addr));
} else if (tor_addr_family(addr) == AF_INET6) {
return geoip_get_country_by_ipv6(tor_addr_to_in6(addr));
} else {
return -1;
}
}
/** Return the number of countries recognized by the GeoIP country list. */
MOCK_IMPL(int,
geoip_get_n_countries,(void))
{
if (!geoip_countries)
init_geoip_countries();
return (int) smartlist_len(geoip_countries);
}
/** Return the two-letter country code associated with the number <b>num</b>,
* or "??" for an unknown value. */
const char *
geoip_get_country_name(country_t num)
{
if (geoip_countries && num >= 0 && num < smartlist_len(geoip_countries)) {
geoip_country_t *c = smartlist_get(geoip_countries, num);
return c->countrycode;
} else
return "??";
}
/** Return true iff we have loaded a GeoIP database.*/
MOCK_IMPL(int,
geoip_is_loaded,(sa_family_t family))
{
tor_assert(family == AF_INET || family == AF_INET6);
if (geoip_countries == NULL)
return 0;
if (family == AF_INET)
return geoip_ipv4_entries != NULL;
else /* AF_INET6 */
return geoip_ipv6_entries != NULL;
}
/** Return the hex-encoded SHA1 digest of the loaded GeoIP file. The
* result does not need to be deallocated, but will be overwritten by the
* next call of hex_str(). */
const char *
geoip_db_digest(sa_family_t family)
{
tor_assert(family == AF_INET || family == AF_INET6);
if (family == AF_INET)
return hex_str(geoip_digest, DIGEST_LEN);
else /* AF_INET6 */
return hex_str(geoip6_digest, DIGEST_LEN);
}
/** Release all storage held by the GeoIP databases and country list. */
STATIC void
clear_geoip_db(void)
{
if (geoip_countries) {
SMARTLIST_FOREACH(geoip_countries, geoip_country_t *, c, tor_free(c));
smartlist_free(geoip_countries);
}
strmap_free(country_idxplus1_by_lc_code, NULL);
if (geoip_ipv4_entries) {
SMARTLIST_FOREACH(geoip_ipv4_entries, geoip_ipv4_entry_t *, ent,
tor_free(ent));
smartlist_free(geoip_ipv4_entries);
}
if (geoip_ipv6_entries) {
SMARTLIST_FOREACH(geoip_ipv6_entries, geoip_ipv6_entry_t *, ent,
tor_free(ent));
smartlist_free(geoip_ipv6_entries);
}
geoip_countries = NULL;
country_idxplus1_by_lc_code = NULL;
geoip_ipv4_entries = NULL;
geoip_ipv6_entries = NULL;
}
/** Release all storage held in this file. */
void
geoip_free_all(void)
{
clear_geoip_db();
memset(geoip_digest, 0, sizeof(geoip_digest));
memset(geoip6_digest, 0, sizeof(geoip6_digest));
}

50
src/lib/geoip/geoip.h Normal file
View File

@ -0,0 +1,50 @@
/* Copyright (c) 2001 Matej Pfajfar.
* Copyright (c) 2001-2004, Roger Dingledine.
* Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
* Copyright (c) 2007-2018, The Tor Project, Inc. */
/* See LICENSE for licensing information */
/**
* \file geoip.h
* \brief Header file for geoip.c.
**/
#ifndef TOR_GEOIP_H
#define TOR_GEOIP_H
#include "orconfig.h"
#include "lib/net/nettypes.h"
#include "lib/testsupport/testsupport.h"
#include "lib/net/inaddr_st.h"
#include "lib/geoip/country.h"
#ifdef GEOIP_PRIVATE
STATIC int geoip_parse_entry(const char *line, sa_family_t family);
STATIC void clear_geoip_db(void);
#endif /* defined(GEOIP_PRIVATE) */
struct in6_addr;
struct tor_addr_t;
int geoip_get_country_by_ipv4(uint32_t ipaddr);
int geoip_get_country_by_ipv6(const struct in6_addr *addr);
/** A per-country GeoIP record. */
typedef struct geoip_country_t {
char countrycode[3];
} geoip_country_t;
struct smartlist_t;
const struct smartlist_t *geoip_get_countries(void);
int geoip_load_file(sa_family_t family, const char *filename, int severity);
MOCK_DECL(int, geoip_get_country_by_addr, (const struct tor_addr_t *addr));
MOCK_DECL(int, geoip_get_n_countries, (void));
const char *geoip_get_country_name(country_t num);
MOCK_DECL(int, geoip_is_loaded, (sa_family_t family));
const char *geoip_db_digest(sa_family_t family);
MOCK_DECL(country_t, geoip_get_country, (const char *countrycode));
void geoip_free_all(void);
#endif /* !defined(TOR_GEOIP_H) */

17
src/lib/geoip/include.am Normal file
View File

@ -0,0 +1,17 @@
noinst_LIBRARIES += src/lib/libtor-geoip.a
if UNITTESTS_ENABLED
noinst_LIBRARIES += src/lib/libtor-geoip-testing.a
endif
src_lib_libtor_geoip_a_SOURCES = \
src/lib/geoip/geoip.c
src_lib_libtor_geoip_testing_a_SOURCES = \
$(src_lib_libtor_geoip_a_SOURCES)
src_lib_libtor_geoip_testing_a_CPPFLAGS = $(AM_CPPFLAGS) $(TEST_CPPFLAGS)
src_lib_libtor_geoip_testing_a_CFLAGS = $(AM_CFLAGS) $(TEST_CFLAGS)
noinst_HEADERS += \
src/lib/geoip/geoip.h \
src/lib/geoip/country.h

View File

@ -29,7 +29,7 @@
#include "feature/client/entrynodes.h"
#include "feature/client/transports.h"
#include "feature/relay/ext_orport.h"
#include "feature/stats/geoip.h"
#include "lib/geoip/geoip.h"
#include "feature/hibernate/hibernate.h"
#include "core/mainloop/mainloop.h"
#include "feature/nodelist/networkstatus.h"

View File

@ -32,7 +32,8 @@
#include "feature/nodelist/routerparse.h"
#include "feature/nodelist/networkstatus.h"
#include "core/proto/proto_http.h"
#include "feature/stats/geoip.h"
#include "lib/geoip/geoip.h"
#include "feature/stats/geoip_stats.h"
#include "feature/dircache/dirserv.h"
#include "feature/dirauth/dirvote.h"
#include "test/log_test_helpers.h"

View File

@ -9,7 +9,7 @@
#include "core/or/dos.h"
#include "core/or/circuitlist.h"
#include "lib/crypt_ops/crypto_rand.h"
#include "feature/stats/geoip.h"
#include "feature/stats/geoip_stats.h"
#include "core/or/channel.h"
#include "feature/nodelist/microdesc.h"
#include "feature/nodelist/networkstatus.h"
@ -500,4 +500,3 @@ struct testcase_t dos_tests[] = {
NULL, NULL },
END_OF_TESTCASES
};

View File

@ -10,7 +10,8 @@
#define GEOIP_PRIVATE
#include "core/or/or.h"
#include "app/config/config.h"
#include "feature/stats/geoip.h"
#include "lib/geoip/geoip.h"
#include "feature/stats/geoip_stats.h"
#include "test/test.h"
/* Record odd numbered fake-IPs using ipv6, even numbered fake-IPs
@ -403,7 +404,8 @@ test_geoip_load_file(void *arg)
/* A nonexistant filename should fail. */
tt_int_op(-1, OP_EQ,
geoip_load_file(AF_INET, "/you/did/not/put/a/file/here/I/hope"));
geoip_load_file(AF_INET, "/you/did/not/put/a/file/here/I/hope",
LOG_INFO));
/* We start out with only "Ningunpartia" in the database. */
tt_int_op(1, OP_EQ, geoip_get_n_countries());
@ -417,7 +419,7 @@ test_geoip_load_file(void *arg)
const char *fname = get_fname("geoip");
tt_int_op(0, OP_EQ, write_str_to_file(fname, GEOIP_CONTENT, 1));
int rv = geoip_load_file(AF_INET, fname);
int rv = geoip_load_file(AF_INET, fname, LOG_WARN);
if (rv != 0) {
TT_GRIPE(("Unable to load geoip from %s", escaped(fname)));
}
@ -467,7 +469,8 @@ test_geoip6_load_file(void *arg)
/* A nonexistant filename should fail. */
tt_int_op(-1, OP_EQ,
geoip_load_file(AF_INET6, "/you/did/not/put/a/file/here/I/hope"));
geoip_load_file(AF_INET6, "/you/did/not/put/a/file/here/I/hope",
LOG_INFO));
/* Any lookup attempt should say "-1" because we have no info */
tor_inet_pton(AF_INET6, "2001:4860:4860::8888", &iaddr6);
@ -493,7 +496,7 @@ test_geoip6_load_file(void *arg)
"2001:4878:205::,2001:4878:214:ffff:ffff:ffff:ffff:ffff,US\n";
tt_int_op(0, OP_EQ, write_str_to_file(fname6, CONTENT, 1));
tt_int_op(0, OP_EQ, geoip_load_file(AF_INET6, fname6));
tt_int_op(0, OP_EQ, geoip_load_file(AF_INET6, fname6, LOG_WARN));
/* Check that we loaded some countries; this will fail if there are ever
* fewer than 5 countries in our test data above. */
@ -545,11 +548,11 @@ test_geoip_load_2nd_file(void *arg)
tt_int_op(0, OP_EQ, write_str_to_file(fname_empty, "\n", 1));
/* Load 1st geoip file */
tt_int_op(0, OP_EQ, geoip_load_file(AF_INET, fname_geoip));
tt_int_op(0, OP_EQ, geoip_load_file(AF_INET, fname_geoip, LOG_WARN));
/* Load 2nd geoip (empty) file */
/* It has to be the same IP address family */
tt_int_op(0, OP_EQ, geoip_load_file(AF_INET, fname_empty));
tt_int_op(0, OP_EQ, geoip_load_file(AF_INET, fname_empty, LOG_WARN));
/* Check that there is no geoip information for 8.8.8.8, */
/* since loading the empty 2nd file should have delete it. */

View File

@ -8,7 +8,7 @@
#include "app/config/confparse.h"
#include "app/config/config.h"
#include "test/test.h"
#include "feature/stats/geoip.h"
#include "lib/geoip/geoip.h"
#define ROUTERSET_PRIVATE
#include "feature/nodelist/routerset.h"

View File

@ -4,7 +4,7 @@
#define ROUTERSET_PRIVATE
#include "core/or/or.h"
#include "feature/stats/geoip.h"
#include "lib/geoip/geoip.h"
#include "feature/nodelist/routerset.h"
#include "feature/nodelist/routerparse.h"
#include "core/or/policies.h"

View File

@ -16,6 +16,7 @@
#include "lib/crypt_ops/crypto_dh.h"
#include "lib/crypt_ops/crypto_ed25519.h"
#include "lib/crypt_ops/crypto_rand.h"
#include "feature/stats/predict_ports.h"
#include "feature/stats/rephist.h"
#include "lib/err/backtrace.h"
#include "test/test.h"