Add my scripts to dump directories to contrib

svn:r16736
This commit is contained in:
Peter Palfrader 2008-09-02 09:42:55 +00:00
parent 3c5c7994b0
commit 8dd5a3d830
6 changed files with 460 additions and 0 deletions

View File

@ -0,0 +1,3 @@
10 * * * * cd projects/tor-v2dir && ./fetch-all-v3
40 * * * * cd projects/tor-v2dir && ./fetch-all
15 3 6 * * cd projects/tor-v2dir && ./sort-into-month-folder > /dev/null && ./tar-them-up last > /dev/null

View File

@ -0,0 +1,77 @@
#!/bin/bash
# Download all current v2 directory status documents, then download
# the descriptors and extra info documents.
# Copyright (c) 2005, 2006, 2007, 2008 Peter Palfrader
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
TZ=UTC
export TZ
DIRSERVERS=""
DIRSERVERS="$DIRSERVERS 86.59.21.38:80" # tor26
DIRSERVERS="$DIRSERVERS 128.31.0.34:9031" # moria1
DIRSERVERS="$DIRSERVERS 128.31.0.34:9032" # moria2
#DIRSERVERS="$DIRSERVERS 140.247.60.64:80" # lefkada
DIRSERVERS="$DIRSERVERS 194.109.206.212:80" # dizum
DATEDIR=$(date "+%Y/%m/%d")
TIME=$(date "+%Y%m%d-%H%M%S")
. fetch-all-functions
statuses=""
for dirserver in $DIRSERVERS; do
authorities=$(wget -q -O - http://$dirserver/tor/status/all | egrep '^fingerprint ' | awk '{print $2}')
if [ "$authorities" == "" ]; then
echo "Did not get a list of authorities from $dirserver, going to next" 2>&1
continue
fi
dir="status/$DATEDIR"
[ -d "$dir" ] || mkdir -p "$dir"
authprefix="$dir/$TIME-"
for fp in $authorities; do
wget -q -O "$authprefix$fp" http://$dirserver/tor/status/fp/"$fp"
bzip2 "$authprefix$fp"
statuses="$statuses $authprefix$fp.bz2"
done
if [ "$statuses" == "" ]; then
echo "Did not get any statuses from $dirserver, going to next" 2>&1
continue
else
break
fi
done
if [ "$statuses" = "" ]; then
echo "No statuses available" 2>&1
exit 1
fi
digests=$( for i in ` bzcat $statuses | awk '$1 == "r" {printf "%s===\n", $4}' | sort -u `; do
echo $i | \
base64-decode | \
perl -e 'undef $/; $a=<>; print unpack("H\*", $a),"\n";';
done )
for digest in $digests; do
fetch_digest "$digest" "server-descriptor"
done

View File

@ -0,0 +1,72 @@
#!/bin/bash
# function used by fetch-all* to download server descriptors and
# extra info documents
# Copyright (c) 2005, 2006, 2007, 2008 Peter Palfrader
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
fetch_digest() {
local digest
local objecttype
local urlpart
local pathpart
local target
local targetdir
local dirserver
local ei
digest="$1"
objecttype="$2"
if [ "$objecttype" = "server-descriptor" ] ; then
urlpart="server"
pathpart="server-descriptor"
elif [ "$objecttype" = "extra-info" ] ; then
urlpart="extra"
pathpart="extra-info"
else
echo "Called fetch_digest with illegal objecttype '$objecttype'" >&2
exit 1
fi
target=$( echo $digest | sed -e 's#^\(.\)\(.\)#'"$pathpart"'/\1/\2/\1\2#' )
targetdir=$( dirname $target )
[ -d "$targetdir" ] || mkdir -p "$targetdir"
if ! [ -e "$target" ]; then
for dirserver in $DIRSERVERS; do
wget -q -O "$target" http://$dirserver/tor/$urlpart/d/"$digest" || rm -f "$target"
if [ -s "$target" ]; then
if egrep '^opt extra-info-digest ' "$target" > /dev/null; then
ei=$( egrep '^opt extra-info-digest ' "$target" | awk '{print $3}' | tr 'A-F' 'a-f' )
fetch_digest "$ei" "extra-info"
elif egrep '^extra-info-digest ' "$target" > /dev/null; then
ei=$( egrep '^extra-info-digest ' "$target" | awk '{print $2}' | tr 'A-F' 'a-f' )
fetch_digest "$ei" "extra-info"
fi
break
else
rm -f "$target"
fi
done
fi
#if ! [ -e "$target" ]; then
# echo "$objecttype $digest" >> failed
#fi
}

View File

@ -0,0 +1,109 @@
#!/bin/bash
# Download all current v3 directory status votes and the consensus document,
# then download the descriptors and extra info documents.
# Copyright (c) 2005, 2006, 2007, 2008 Peter Palfrader
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
TZ=UTC
export TZ
DIRSERVERS=""
DIRSERVERS="$DIRSERVERS 86.59.21.38:80" # tor26
DIRSERVERS="$DIRSERVERS 128.31.0.34:9031" # moria1
DIRSERVERS="$DIRSERVERS 216.224.124.114:9030" # ides
DIRSERVERS="$DIRSERVERS 88.198.7.215:80" # gabelmoo
#DIRSERVERS="$DIRSERVERS 140.247.60.64:80" # lefkada
DIRSERVERS="$DIRSERVERS 194.109.206.212:80" # dizum
DIRSERVERS="$DIRSERVERS 128.31.0.34:9032" # moria2
TIME=$(date "+%Y%m%d-%H%M%S")
. fetch-all-functions
consensus=""
tmpdir="consensus/tmp"
[ -d "$tmpdir" ] || mkdir -p "$tmpdir"
for dirserver in $DIRSERVERS; do
wget -q -O "$tmpdir/$TIME-consensus" http://$dirserver/tor/status-vote/current/consensus
if [ "$?" != 0 ]; then
rm -f "$tmpdir/$TIME-consensus"
continue
fi
freshconsensus="$tmpdir/$TIME-consensus"
timestamp=$(awk '$1=="valid-after" {printf "%s-%s", $2, $3}' < "$freshconsensus")
datedir=$(awk '$1=="valid-after" {printf "%s", $2}' < "$freshconsensus" | tr '-' '/')
dir="consensus/$datedir"
[ -d "$dir" ] || mkdir -p "$dir"
consensus="$dir/$timestamp-consensus.bz2"
if ! [ -e "$consensus" ]; then
# the consensus is new, or at least we don't have it yet
bzip2 "$freshconsensus"
mv "$freshconsensus.bz2" "$consensus"
break
fi
rm -f "$freshconsensus"
echo "Consensus from $timestamp (gotten from $dirserver) already exists!" >&2
# maybe there is a newer one on a different authority, so try again.
done
if [ "$consensus" = "" ]; then
echo "No consensus available" 2>&1
exit 1
fi
votes=$(bzcat $consensus | awk '$1 == "vote-digest" {print $2}')
for vote in $votes; do
for dirserver in $DIRSERVERS; do
wget -q -O "$dir/$TIME-vote-$vote" http://$dirserver/tor/status-vote/current/d/$vote
if [ "$?" != 0 ]; then
rm -f "$dir/$TIME-vote-$vote"
continue
fi
break
done
if [ -e "$dir/$TIME-vote-$vote" ]; then
voteridentity=$(awk '$1=="fingerprint" {print $2}' < "$dir/$TIME-vote-$vote")
if [ -e "$dir/$timestamp-vote-$voteridentity-$vote.bz2" ]; then
echo "Vote $vote from $voteridentity already exists!" >&2
rm -f "$dir/$TIME-vote-$vote"
continue;
fi
mv "$dir/$TIME-vote-$vote" "$dir/$timestamp-vote-$voteridentity-$vote"
bzip2 "$dir/$timestamp-vote-$voteridentity-$vote"
else
echo "Failed to get vote $vote!" >&2
fi
done
digests=$( for i in ` bzcat $consensus | awk '$1 == "r" {printf "%s===\n", $4}' | sort -u `; do
echo $i | \
base64-decode | \
perl -e 'undef $/; $a=<>; print unpack("H\*", $a),"\n";';
done )
for digest in $digests; do
fetch_digest "$digest" "server-descriptor"
done

View File

@ -0,0 +1,74 @@
#!/usr/bin/perl -w
# Sort dumped consensuses, statuses, descriptors etc into per-month folders.
# Copyright (c) 2006, 2007, 2008 Peter Palfrader
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
use strict;
use File::Find;
use File::Basename;
use File::stat;
use Time::Local;
my $cutofftime;
sub wanted() {
return unless -f;
my $mtime = stat($_)->mtime;
return if $mtime >= $cutofftime;
my (undef,undef,undef,undef,$mon,$year,undef,undef,undef) = gmtime $mtime;
my $bn = basename $_;
my $dn = dirname $_;
my @path = split /\//, $dn;
$path[0] .= sprintf 's-%4d-%02d', 1900+$year, $mon+1;
$dn = join '/', @path;
if (! -d $dn) {
my $p = '.';
for my $component (@path) {
$p .= '/'.$component;
if (! -d $p) {
mkdir $p or die ("Cannot mkdir $p: $!\n");
};
};
};
print "$_ -> $dn/$bn\n";
rename $_, $dn.'/'.$bn or die ("Cannot rename $_ to $dn/$bn: $!\n");
};
my (undef,undef,undef,undef,$mon,$year,undef,undef,undef) = gmtime(time - 5*24*3600);
$cutofftime = timegm(0,0,0,1,$mon,$year);
find( {
wanted => \&wanted,
no_chdir => 1
},
'server-descriptor');
find( {
wanted => \&wanted,
no_chdir => 1
},
'extra-info');

View File

@ -0,0 +1,125 @@
#!/bin/sh
# Tar up dumped consensuses, statuses, descriptors etc from per-month folders
# into per-month tarballs.
# Copyright (c) 2006, 2007, 2008 Peter Palfrader
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
set -e
set -x
set -u
usage() {
echo "Usage: $0 <year> <month>" >&2
echo " $0 last (does last month)" >&2
exit 1
}
if [ -z "${1:-}" ]; then
usage
fi
if [ "$1" = "last" ]; then
year=`date --date="last month" +'%Y'`
month=`date --date="last month" +'%m'`
elif [ -z "${2:-}" ]; then
usage
else
year="$1"
month="$2"
fi
if [ "$year" -lt 2000 ] || [ "$year" -gt 2020 ] ||
[ "$month" -lt 1 ] || [ "$month" -gt 12 ] ||
[ "`echo -n $month | wc -c`" != 2 ]; then
usage
fi
this_year=`date --utc +'%Y'`
this_month=`date --utc +'%m'`
if [ "`date -d $this_year-$this_month-01 +%s`" -le "`date -d $year-$month-01 +%s`" ]; then
echo "Date in the future or current month?" >&2
exit 1
fi
for file in \
"extra-infos-$year-$month.tar.bz2" \
"server-descriptors-$year-$month.tar.bz2" \
"consensuses-$year-$month.tar.bz2" \
"statuses-$year-$month.tar.bz2" \
; do
if [ -e "$file" ]; then
echo "$file already exists" >&2
exit 1
fi
done
for dir in \
"extra-infos-$year-$month" \
"server-descriptors-$year-$month" \
"consensus/$year/$month" \
"status/$year/$month" \
; do
if ! [ -d "$dir" ]; then
echo "$dir not found" >&2
exit 1
fi
done
for dir in \
"consensuses-$year-$month" \
"statuses-$year-$month" \
; do
if [ -e "$dir" ]; then
echo "$dir already exists" >&2
exit 1
fi
done
for kind in consensus status; do
mv "$kind"/$year/$month "$kind"es-$year-$month
find "$kind"es-$year-$month -type f -name '*.bz2' -print0 | xargs -0 bunzip2 -v
tar cjvf "$kind"es-$year-$month.tar.bz2 "$kind"es-$year-$month
rm -rf "$kind"es-$year-$month
done
for kind in extra-infos server-descriptors; do
tar cjvf "$kind"-$year-$month.tar.bz2 "$kind"-$year-$month
rm -rf "$kind"-$year-$month
done
for kind in consensus status; do
t="$kind"es-$year-$month.tar.bz2
! [ -e Archive/"$t" ] && mv "$t" Archive
done
for kind in extra-infos server-descriptors; do
t="$kind"-$year-$month.tar.bz2
! [ -e Archive/"$t" ] && mv "$t" Archive
done