Merge remote-tracking branch 'teor/ticket24838'

This commit is contained in:
Nick Mathewson 2018-12-11 10:04:16 -05:00
commit 91be6a77e9
4 changed files with 248 additions and 101 deletions

6
changes/ticket24838 Normal file
View File

@ -0,0 +1,6 @@
o Minor features (fallback directory mirrors):
- Accept relays that are a fuzzy match to a fallback whitelist entry.
If a relay matches at least one fingerprint, IPv4 address, or IPv6
address in the fallback whitelist, it can become a fallback. This
reduces the work required to keep the list up to date.
Closes ticket 24838.

4
changes/ticket28768 Normal file
View File

@ -0,0 +1,4 @@
o Minor features (fallback directory mirrors):
- Accept fallbacks that deliver reasonably live consensuses.
(Consensuses that will become valid less than 24 hours in the future,
or that expired less than 24 hours ago.) Closes ticket 28768.

View File

@ -1,34 +1,23 @@
# updateFallbackDirs.py directory mirror whitelist
#
# Format:
# IPv4:DirPort orport=<ORPort> id=<ID> [ ipv6=<IPv6>:<IPv6 ORPort> ]
# or use:
# scripts/maint/generateFallbackDirLine.py fingerprint ...
# At least one of these keys must match for a directory mirror to be included
# in the fallback list:
# id
# ipv4
# ipv6
# The ports and nickname are ignored. Missing or extra ipv6 addresses
# are ignored.
#
# All attributes must match for the directory mirror to be included.
# If the fallback has an ipv6 key, the whitelist line must also have
# it, and vice versa, otherwise they don't match.
# (The blacklist overrides the whitelist.)
# To replace this list with the hard-coded fallback list (for testing), use
# a command similar to:
# cat src/app/config/fallback_dirs.inc | grep \" | grep -v weight | \
# tr -d '\n' | \
# sed 's/"" / /g' | sed 's/""/"/g' | tr \" '\n' | grep -v '^$' \
# > scripts/maint/fallback.whitelist
# The latest relay details from Onionoo are included in the generated list.
#
# When testing before a release, exclusions due to changed details will result
# in a warning, unless the IPv4 address or port change happened recently.
# Then it is only logged at info level, as part of the eligibility check.
# Exclusions due to stability also are only shown at info level.
# To check the hard-coded fallback list (for testing), use:
# $ updateFallbackDirs.py check_existing
#
# Add the number of selected, slow, and excluded relays, and compare that to
# the number of hard-coded relays. If it's less, use info-level logs to find
# out why each of the missing relays was excluded.
# If a relay operator wants their relay to be a FallbackDir,
# enter the following information here:
# <IPv4>:<DirPort> orport=<ORPort> id=<ID> [ ipv6=<IPv6>:<IPv6 ORPort> ]
# <IPv4>:<DirPort> orport=<ORPort> id=<ID> ( ipv6=[<IPv6>]:<IPv6 ORPort> )?
# or use:
# scripts/maint/generateFallbackDirLine.py fingerprint ...
# https://lists.torproject.org/pipermail/tor-relays/2015-December/008362.html
# https://trac.torproject.org/projects/tor/ticket/22321#comment:22

View File

@ -18,8 +18,8 @@
# Optionally uses ipaddress (python 3 builtin) or py2-ipaddress (package)
# for netblock analysis.
#
# Then read the logs to make sure the fallbacks aren't dominated by a single
# netblock or port.
# After running this script, read the logs to make sure the fallbacks aren't
# dominated by a single netblock or port.
# Script by weasel, April 2015
# Portions by gsathya & karsten, 2013
@ -39,8 +39,6 @@ import urllib
import urllib2
import hashlib
import dateutil.parser
# bson_lazy provides bson
#from bson import json_util
import copy
import re
@ -100,19 +98,29 @@ MUST_BE_RUNNING_NOW = (PERFORM_IPV4_DIRPORT_CHECKS
# Clients have been using microdesc consensuses by default for a while now
DOWNLOAD_MICRODESC_CONSENSUS = True
# If a relay delivers an expired consensus, if it expired less than this many
# seconds ago, we still allow the relay. This should never be less than -90,
# as all directory mirrors should have downloaded a consensus 90 minutes
# before it expires. It should never be more than 24 hours, because clients
# reject consensuses that are older than REASONABLY_LIVE_TIME.
# For the consensus expiry check to be accurate, the machine running this
# script needs an accurate clock.
# If a relay delivers an invalid consensus, if it will become valid less than
# this many seconds in the future, or expired less than this many seconds ago,
# accept the relay as a fallback. For the consensus expiry check to be
# accurate, the machine running this script needs an accurate clock.
#
# Relays on 0.3.0 and later return a 404 when they are about to serve an
# expired consensus. This makes them fail the download check.
# We use a tolerance of 0, so that 0.2.x series relays also fail the download
# check if they serve an expired consensus.
CONSENSUS_EXPIRY_TOLERANCE = 0
# Relays on 0.3.0 and later return a 404 when they are about to serve a
# consensus that expired more than 24 hours ago. 0.2.9 and earlier relays
# will serve consensuses that are very old.
#
# Relays on 0.3.5.6-rc? and later return a 404 when they are about to serve a
# consensus that will become valid more than 24 hours in the future. Older
# relays don't serve future consensuses.
#
# A 404 makes relays fail the download check. We use a tolerance of 24 hours,
# so that 0.2.9 relays also fail the download check if they serve a consensus
# that is not reasonably live.
#
# REASONABLY_LIVE_TIME should never be more than Tor's REASONABLY_LIVE_TIME,
# (24 hours), because clients reject consensuses that are older than that.
# Clients on 0.3.5.5-alpha? and earlier also won't select guards from
# consensuses that have expired, but can bootstrap if they already have guards
# in their state file.
REASONABLY_LIVE_TIME = 24*60*60
# Output fallback name, flags, bandwidth, and ContactInfo in a C comment?
OUTPUT_COMMENTS = True if OUTPUT_CANDIDATES else False
@ -912,61 +920,181 @@ class Candidate(object):
return False
return True
def is_in_whitelist(self, relaylist):
""" A fallback matches if each key in the whitelist line matches:
def id_matches(self, id, exact=False):
""" Does this fallback's id match id?
exact is ignored. """
return self._fpr == id
def ipv4_addr_matches(self, ipv4_addr, exact=False):
""" Does this fallback's IPv4 address match ipv4_addr?
exact is ignored. """
return self.dirip == ipv4_addr
def ipv4_dirport_matches(self, ipv4_dirport, exact=False):
""" Does this fallback's IPv4 dirport match ipv4_dirport?
If exact is False, always return True. """
if exact:
return self.dirport == int(ipv4_dirport)
else:
return True
def ipv4_and_dirport_matches(self, ipv4_addr, ipv4_dirport, exact=False):
""" Does this fallback's IPv4 address match ipv4_addr?
If exact is True, also check ipv4_dirport. """
ipv4_match = self.ipv4_addr_matches(ipv4_addr, exact=exact)
if exact:
return ipv4_match and self.ipv4_dirport_matches(ipv4_dirport,
exact=exact)
else:
return ipv4_match
def ipv4_orport_matches(self, ipv4_orport, exact=False):
""" Does this fallback's IPv4 orport match ipv4_orport?
If exact is False, always return True. """
if exact:
return self.orport == int(ipv4_orport)
else:
return True
def ipv4_and_orport_matches(self, ipv4_addr, ipv4_orport, exact=False):
""" Does this fallback's IPv4 address match ipv4_addr?
If exact is True, also check ipv4_orport. """
ipv4_match = self.ipv4_addr_matches(ipv4_addr, exact=exact)
if exact:
return ipv4_match and self.ipv4_orport_matches(ipv4_orport,
exact=exact)
else:
return ipv4_match
def ipv6_addr_matches(self, ipv6_addr, exact=False):
""" Does this fallback's IPv6 address match ipv6_addr?
Both addresses must be present to match.
exact is ignored. """
if self.has_ipv6() and ipv6_addr is not None:
# Check that we have a bracketed IPv6 address without a port
assert(ipv6_addr.startswith('[') and ipv6_addr.endswith(']'))
return self.ipv6addr == ipv6_addr
else:
return False
def ipv6_orport_matches(self, ipv6_orport, exact=False):
""" Does this fallback's IPv6 orport match ipv6_orport?
Both ports must be present to match.
If exact is False, always return True. """
if exact:
return (self.has_ipv6() and ipv6_orport is not None and
self.ipv6orport == int(ipv6_orport))
else:
return True
def ipv6_and_orport_matches(self, ipv6_addr, ipv6_orport, exact=False):
""" Does this fallback's IPv6 address match ipv6_addr?
If exact is True, also check ipv6_orport. """
ipv6_match = self.ipv6_addr_matches(ipv6_addr, exact=exact)
if exact:
return ipv6_match and self.ipv6_orport_matches(ipv6_orport,
exact=exact)
else:
return ipv6_match
def entry_matches_exact(self, entry):
""" Is entry an exact match for this fallback?
A fallback is an exact match for entry if each key in entry matches:
ipv4
dirport
orport
id
ipv6 address and port (if present)
ipv6 address and port (if present in the fallback or the whitelist)
If the fallback has an ipv6 key, the whitelist line must also have
it, and vice versa, otherwise they don't match. """
ipv6 = None
if self.has_ipv6():
ipv6 = '%s:%d'%(self.ipv6addr, self.ipv6orport)
for entry in relaylist:
if entry['id'] != self._fpr:
# can't log here unless we match an IP and port, because every relay's
# fingerprint is compared to every entry's fingerprint
if entry['ipv4'] == self.dirip and int(entry['orport']) == self.orport:
logging.warning('%s excluded: has OR %s:%d changed fingerprint to ' +
'%s?', entry['id'], self.dirip, self.orport,
self._fpr)
if self.has_ipv6() and entry.has_key('ipv6') and entry['ipv6'] == ipv6:
logging.warning('%s excluded: has OR %s changed fingerprint to ' +
'%s?', entry['id'], ipv6, self._fpr)
continue
if entry['ipv4'] != self.dirip:
logging.warning('%s excluded: has it changed IPv4 from %s to %s?',
self._fpr, entry['ipv4'], self.dirip)
continue
if int(entry['dirport']) != self.dirport:
logging.warning('%s excluded: has it changed DirPort from %s:%d to ' +
'%s:%d?', self._fpr, self.dirip, int(entry['dirport']),
self.dirip, self.dirport)
continue
if int(entry['orport']) != self.orport:
logging.warning('%s excluded: has it changed ORPort from %s:%d to ' +
'%s:%d?', self._fpr, self.dirip, int(entry['orport']),
self.dirip, self.orport)
continue
if entry.has_key('ipv6') and self.has_ipv6():
# if both entry and fallback have an ipv6 address, compare them
if entry['ipv6'] != ipv6:
logging.warning('%s excluded: has it changed IPv6 ORPort from %s ' +
'to %s?', self._fpr, entry['ipv6'], ipv6)
continue
# if the fallback has an IPv6 address but the whitelist entry
# doesn't, or vice versa, the whitelist entry doesn't match
elif entry.has_key('ipv6') and not self.has_ipv6():
logging.warning('%s excluded: has it lost its former IPv6 address %s?',
self._fpr, entry['ipv6'])
continue
elif not entry.has_key('ipv6') and self.has_ipv6():
logging.warning('%s excluded: has it gained an IPv6 address %s?',
self._fpr, ipv6)
continue
it, otherwise they don't match.
Logs a warning-level message if the fallback would be an exact match,
but one of the id, ipv4, ipv4 orport, ipv4 dirport, or ipv6 orport
have changed. """
if not self.id_matches(entry['id'], exact=True):
# can't log here unless we match an IP and port, because every relay's
# fingerprint is compared to every entry's fingerprint
if self.ipv4_and_orport_matches(entry['ipv4'],
entry['orport'],
exact=True):
logging.warning('%s excluded: has OR %s:%d changed fingerprint to ' +
'%s?', entry['id'], self.dirip, self.orport,
self._fpr)
if self.ipv6_and_orport_matches(entry.get('ipv6_addr'),
entry.get('ipv6_orport'),
exact=True):
logging.warning('%s excluded: has OR %s changed fingerprint to ' +
'%s?', entry['id'], entry['ipv6'], self._fpr)
return False
if not self.ipv4_addr_matches(entry['ipv4'], exact=True):
logging.warning('%s excluded: has it changed IPv4 from %s to %s?',
self._fpr, entry['ipv4'], self.dirip)
return False
if not self.ipv4_dirport_matches(entry['dirport'], exact=True):
logging.warning('%s excluded: has it changed DirPort from %s:%d to ' +
'%s:%d?', self._fpr, self.dirip, int(entry['dirport']),
self.dirip, self.dirport)
return False
if not self.ipv4_orport_matches(entry['orport'], exact=True):
logging.warning('%s excluded: has it changed ORPort from %s:%d to ' +
'%s:%d?', self._fpr, self.dirip, int(entry['orport']),
self.dirip, self.orport)
return False
if entry.has_key('ipv6') and self.has_ipv6():
# if both entry and fallback have an ipv6 address, compare them
if not self.ipv6_and_orport_matches(entry['ipv6_addr'],
entry['ipv6_orport'],
exact=True):
logging.warning('%s excluded: has it changed IPv6 ORPort from %s ' +
'to %s:%d?', self._fpr, entry['ipv6'],
self.ipv6addr, self.ipv6orport)
return False
# if the fallback has an IPv6 address but the whitelist entry
# doesn't, or vice versa, the whitelist entry doesn't match
elif entry.has_key('ipv6') and not self.has_ipv6():
logging.warning('%s excluded: has it lost its former IPv6 address %s?',
self._fpr, entry['ipv6'])
return False
elif not entry.has_key('ipv6') and self.has_ipv6():
logging.warning('%s excluded: has it gained an IPv6 address %s:%d?',
self._fpr, self.ipv6addr, self.ipv6orport)
return False
return True
def entry_matches_fuzzy(self, entry):
""" Is entry a fuzzy match for this fallback?
A fallback is a fuzzy match for entry if at least one of these keys
in entry matches:
id
ipv4
ipv6 (if present in both the fallback and whitelist)
The ports and nickname are ignored. Missing or extra ipv6 addresses
are ignored.
Doesn't log any warning messages. """
if self.id_matches(entry['id'], exact=False):
return True
if self.ipv4_addr_matches(entry['ipv4'], exact=False):
return True
if entry.has_key('ipv6') and self.has_ipv6():
# if both entry and fallback have an ipv6 address, compare them
if self.ipv6_addr_matches(entry['ipv6_addr'], exact=False):
return True
return False
def is_in_whitelist(self, relaylist, exact=False):
""" If exact is True (existing fallback list), check if this fallback is
an exact match for any whitelist entry, using entry_matches_exact().
If exact is False (new fallback whitelist), check if this fallback is
a fuzzy match for any whitelist entry, using entry_matches_fuzzy(). """
for entry in relaylist:
if exact:
if self.entry_matches_exact(entry):
return True
else:
if self.entry_matches_fuzzy(entry):
return True
return False
def cw_to_bw_factor(self):
@ -1124,6 +1252,7 @@ class Candidate(object):
).run()[0]
end = datetime.datetime.utcnow()
time_since_expiry = (end - consensus.valid_until).total_seconds()
time_until_valid = (consensus.valid_after - end).total_seconds()
except Exception, stem_error:
end = datetime.datetime.utcnow()
log_excluded('Unable to retrieve a consensus from %s: %s', nickname,
@ -1141,8 +1270,17 @@ class Candidate(object):
download_failed = True
elif (time_since_expiry > 0):
status = 'outdated consensus, expired %ds ago'%(int(time_since_expiry))
if time_since_expiry <= CONSENSUS_EXPIRY_TOLERANCE:
status += ', tolerating up to %ds'%(CONSENSUS_EXPIRY_TOLERANCE)
if time_since_expiry <= REASONABLY_LIVE_TIME:
status += ', tolerating up to %ds'%(REASONABLY_LIVE_TIME)
level = logging.INFO
else:
status += ', invalid'
level = logging.WARNING
download_failed = True
elif (time_until_valid > 0):
status = 'future consensus, valid in %ds'%(int(time_until_valid))
if time_until_valid <= REASONABLY_LIVE_TIME:
status += ', tolerating up to %ds'%(REASONABLY_LIVE_TIME)
level = logging.INFO
else:
status += ', invalid'
@ -1400,7 +1538,7 @@ class CandidateList(dict):
each line's key/value pairs are placed in a dictonary,
(of string -> string key/value pairs),
and these dictionaries are placed in an array.
comments start with # and are ignored """
comments start with # and are ignored. """
file_data = file_obj['data']
file_name = file_obj['name']
relaylist = []
@ -1440,18 +1578,28 @@ class CandidateList(dict):
relay_entry['dirport'] = ipv4_maybe_dirport_split[1]
elif kvl == 2:
relay_entry[key_value_split[0]] = key_value_split[1]
# split ipv6 addresses and orports
if key_value_split[0] == 'ipv6':
ipv6_orport_split = key_value_split[1].rsplit(':', 1)
ipv6l = len(ipv6_orport_split)
if ipv6l != 2:
print '#error Bad %s IPv6 item: %s, format is [ipv6]:orport.'%(
file_name, item)
relay_entry['ipv6_addr'] = ipv6_orport_split[0]
relay_entry['ipv6_orport'] = ipv6_orport_split[1]
relaylist.append(relay_entry)
return relaylist
# apply the fallback whitelist
def apply_filter_lists(self, whitelist_obj):
def apply_filter_lists(self, whitelist_obj, exact=False):
""" Apply the fallback whitelist_obj to this fallback list,
passing exact to is_in_whitelist(). """
excluded_count = 0
logging.debug('Applying whitelist')
# parse the whitelist
whitelist = self.load_relaylist(whitelist_obj)
filtered_fallbacks = []
for f in self.fallbacks:
in_whitelist = f.is_in_whitelist(whitelist)
in_whitelist = f.is_in_whitelist(whitelist, exact=exact)
if in_whitelist:
# include
filtered_fallbacks.append(f)
@ -2064,14 +2212,14 @@ def process_existing():
logging.getLogger('stem').setLevel(logging.INFO)
whitelist = {'data': parse_fallback_file(FALLBACK_FILE_NAME),
'name': FALLBACK_FILE_NAME}
list_fallbacks(whitelist)
list_fallbacks(whitelist, exact=True)
def process_default():
logging.basicConfig(level=logging.WARNING)
logging.getLogger('stem').setLevel(logging.WARNING)
whitelist = {'data': read_from_file(WHITELIST_FILE_NAME, MAX_LIST_FILE_SIZE),
'name': WHITELIST_FILE_NAME}
list_fallbacks(whitelist)
list_fallbacks(whitelist, exact=False)
## Main Function
def main():
@ -2092,10 +2240,10 @@ def log_excluded(msg, *args):
else:
logging.info(msg, *args)
def list_fallbacks(whitelist):
def list_fallbacks(whitelist, exact=False):
""" Fetches required onionoo documents and evaluates the
fallback directory criteria for each of the relays """
fallback directory criteria for each of the relays,
passing exact to apply_filter_lists(). """
print "/* type=fallback */"
print ("/* version={} */"
.format(cleanse_c_multiline_comment(FALLBACK_FORMAT_VERSION)))
@ -2135,7 +2283,7 @@ def list_fallbacks(whitelist):
# warning that the details have changed from those in the whitelist.
# instead, there will be an info-level log during the eligibility check.
initial_count = len(candidates.fallbacks)
excluded_count = candidates.apply_filter_lists(whitelist)
excluded_count = candidates.apply_filter_lists(whitelist, exact=exact)
print candidates.summarise_filters(initial_count, excluded_count)
eligible_count = len(candidates.fallbacks)