From 430181fad28fb3275a3771c78453a579681dfa5e Mon Sep 17 00:00:00 2001 From: "teor (Tim Wilson-Brown)" Date: Fri, 18 Dec 2015 08:47:03 +1100 Subject: [PATCH] Allow fallback directory selection to use day-old data Allow cached or outdated Onionoo data to be used to choose fallback directories, as long as it's less than a day old. Modify last modified date checks in preparation for Onionoo change --- changes/bug17887 | 6 ++++ scripts/maint/updateFallbackDirs.py | 54 +++++++++++++++++++---------- 2 files changed, 41 insertions(+), 19 deletions(-) create mode 100644 changes/bug17887 diff --git a/changes/bug17887 b/changes/bug17887 new file mode 100644 index 0000000000..423c298bd5 --- /dev/null +++ b/changes/bug17887 @@ -0,0 +1,6 @@ + o Minor fixes (fallback directories): + - Allow cached or outdated Onionoo data to be used to choose + fallback directories, as long as it's less than a day old. + Modify last modified date checks in preparation for Onionoo change + #16907. Closes ticket #17887. Patch by "teor". Not in any released + version of tor. diff --git a/scripts/maint/updateFallbackDirs.py b/scripts/maint/updateFallbackDirs.py index 8fe234f206..316962b2b3 100755 --- a/scripts/maint/updateFallbackDirs.py +++ b/scripts/maint/updateFallbackDirs.py @@ -266,6 +266,17 @@ def load_json_from_file(json_file_name): ## OnionOO Functions +def datestr_to_datetime(datestr): + # Parse datetimes like: Fri, 02 Oct 2015 13:34:14 GMT + if datestr is not None: + dt = dateutil.parser.parse(datestr) + else: + # Never modified - use start of epoch + dt = datetime.datetime.utcfromtimestamp(0) + # strip any timezone out (in case they're supported in future) + dt = dt.replace(tzinfo=None) + return dt + def onionoo_fetch(what, **kwargs): params = kwargs params['type'] = 'relay' @@ -304,36 +315,41 @@ def onionoo_fetch(what, **kwargs): if last_mod_date is not None: request.add_header('If-modified-since', last_mod_date) - # Parse datetimes like: Fri, 02 Oct 2015 13:34:14 GMT - if last_mod_date is not None: - last_mod = dateutil.parser.parse(last_mod_date) - else: - # Never modified - use start of epoch - last_mod = datetime.datetime.utcfromtimestamp(0) - # strip any timezone out (in case they're supported in future) - last_mod = last_mod.replace(tzinfo=None) + # Parse last modified date + last_mod = datestr_to_datetime(last_mod_date) + # Not Modified and still recent enough to be useful + # Onionoo / Globe used to use 6 hours, but we can afford a day + required_freshness = datetime.datetime.utcnow() + # strip any timezone out (to match dateutil.parser) + required_freshness = required_freshness.replace(tzinfo=None) + required_freshness -= datetime.timedelta(hours=24) + + # Make the OnionOO request response_code = 0 try: response = urllib2.urlopen(request) response_code = response.getcode() except urllib2.HTTPError, error: response_code = error.code - # strip any timezone out (to match dateutil.parser) - six_hours_ago = datetime.datetime.utcnow() - six_hours_ago = six_hours_ago.replace(tzinfo=None) - six_hours_ago -= datetime.timedelta(hours=6) - # Not Modified and still recent enough to be useful (Globe uses 6 hours) - if response_code == 304: - if last_mod < six_hours_ago: - raise Exception("Outdated data from " + url + ": " - + str(error.code) + ": " + error.reason) - else: - pass + if response_code == 304: # not modified + pass else: raise Exception("Could not get " + url + ": " + str(error.code) + ": " + error.reason) + if response_code == 200: # OK + last_mod = datestr_to_datetime(response.info().get('Last-Modified')) + + # Check for freshness + if last_mod < required_freshness: + if last_mod_date is not None: + date_message = "Outdated data: last updated " + last_mod_date + else: + date_message = "No data: never downloaded " + raise Exception(date_message + " from " + url) + + # Process the data if response_code == 200: # OK response_json = load_possibly_compressed_response_json(response)