Author: phobos Date: 2013-12-04 22:12:29 +0000 (Wed, 04 Dec 2013) New Revision: 26450
Modified: website/trunk/include/mirrors-table.wmi website/trunk/update-mirrors.pl Log: new update mirrors script, see ticket 10269, update mirrors table with the new version.
Modified: website/trunk/include/mirrors-table.wmi =================================================================== --- website/trunk/include/mirrors-table.wmi 2013-12-04 10:57:57 UTC (rev 26449) +++ website/trunk/include/mirrors-table.wmi 2013-12-04 22:12:29 UTC (rev 26450) @@ -1,23 +1,6 @@
<tr>
- <td>INT</td> - - <td>CoralCDN</td> - - <td>Up to date</td> - - <td> - </td> - <td><a href="http://www.torproject.org.nyud.net/dist/">http</a></td> - <td><a href="http://www.torproject.org.nyud.net/">http</a></td> - <td> - </td> - <td> - </td> - <td> - </td> - <td> - </td> -</tr> - -<tr> - <td>CZ</td>
<td>Lightning-bolt.net</td> @@ -120,23 +103,6 @@
<tr>
- <td>LT</td> - - <td></td> - - <td>Up to date</td> - - <td> - </td> - <td><a href="http://tor.vesta.nu/dist/">http</a></td> - <td><a href="http://tor.vesta.nu/">http</a></td> - <td> - </td> - <td> - </td> - <td> - </td> - <td> - </td> -</tr> - -<tr> - <td>DE</td>
<td></td> @@ -341,40 +307,6 @@
<tr>
- <td>US</td> - - <td>AskApache</td> - - <td>Up to date</td> - - <td> - </td> - <td><a href="http://tor.askapache.com/dist/">http</a></td> - <td><a href="http://tor.askapache.com/">http</a></td> - <td> - </td> - <td> - </td> - <td> - </td> - <td> - </td> -</tr> - -<tr> - - <td>DE</td> - - <td>Netzkonstrukt Berlin</td> - - <td>Up to date</td> - - <td> - </td> - <td><a href="http://mirror.ntzk.de/torproject.org/dist/">http</a></td> - <td><a href="http://mirror.ntzk.de/torproject.org/">http</a></td> - <td> - </td> - <td> - </td> - <td> - </td> - <td> - </td> -</tr> - -<tr> - <td>RU</td>
<td>Soviet Anonymous</td> @@ -443,36 +375,36 @@
<tr>
- <td>AT</td> + <td>INT</td>
- <td>TechAsk.IT</td> + <td>CoralCDN</td>
<td>Up to date</td>
<td> - </td> - <td><a href="http://www.unicorncloud.org/public/torproject.org/dist">http</a></td> - <td><a href="http://www.unicorncloud.org/public/torproject.org/">http</a></td> - <td><a href="https://www.unicorncloud.org/public/torproject.org/dist">https</a></td> - <td><a href="https://www.unicorncloud.org/public/torproject.org/">https</a></td> + <td><a href="http://www.torproject.org.nyud.net/dist/">http</a></td> + <td><a href="http://www.torproject.org.nyud.net/">http</a></td> <td> - </td> <td> - </td> + <td> - </td> + <td> - </td> </tr>
<tr>
- <td>NL</td> + <td>AT</td>
- <td>Amorphis</td> + <td>TechAsk.IT</td>
<td>Up to date</td>
<td> - </td> - <td><a href="http://tor.amorphis.eu/dist/">http</a></td> - <td><a href="http://tor.amorphis.eu/">http</a></td> + <td><a href="http://www.unicorncloud.org/public/torproject.org/dist">http</a></td> + <td><a href="http://www.unicorncloud.org/public/torproject.org/">http</a></td> + <td><a href="https://www.unicorncloud.org/public/torproject.org/dist">https</a></td> + <td><a href="https://www.unicorncloud.org/public/torproject.org/">https</a></td> <td> - </td> <td> - </td> - <td> - </td> - <td> - </td> </tr>
<tr> @@ -630,6 +562,23 @@
<tr>
+ <td>MX</td> + + <td></td> + + <td>Up to date</td> + + <td> - </td> + <td><a href="http://fbnaia.homelinux.net/torproject/dist/">http</a></td> + <td><a href="http://fbnaia.homelinux.net/torproject/">http</a></td> + <td><a href="https://fbnaia.homelinux.net/torproject//dist/">https</a></td> + <td><a href="https://fbnaia.homelinux.net/torproject/">https</a></td> + <td> - </td> + <td> - </td> +</tr> + +<tr> + <td>NL</td>
<td>BBLN</td> @@ -834,34 +783,17 @@
<tr>
- <td>IN</td> + <td>LT</td>
- <td>India Tor Fans</td> - - <td>DO NOT USE. Out of date.</td> - - <td> - </td> - <td><a href="http://www.torproject.org.in/dist/">http</a></td> - <td><a href="http://www.torproject.org.in/">http</a></td> - <td> - </td> - <td> - </td> - <td> - </td> - <td> - </td> -</tr> - -<tr> - - <td>TN</td> - <td></td>
- <td>DO NOT USE. Out of date.</td> + <td>Up to date</td>
<td> - </td> + <td><a href="http://tor.vesta.nu/dist/">http</a></td> + <td><a href="http://tor.vesta.nu/">http</a></td> <td> - </td> - <td><a href="http://torproject.antagonism.org/">http</a></td> <td> - </td> - <td><a href="https://torproject.antagonism.org/">https</a></td> <td> - </td> <td> - </td> </tr> @@ -870,13 +802,13 @@
<td>US</td>
- <td>searchprivate</td> + <td>AskApache</td>
- <td>DO NOT USE. Out of date.</td> + <td>Up to date</td>
<td> - </td> - <td><a href="http://tor.searchprivate.com/dist/">http</a></td> - <td><a href="http://tor.searchprivate.com/">http</a></td> + <td><a href="http://tor.askapache.com/dist/">http</a></td> + <td><a href="http://tor.askapache.com/">http</a></td> <td> - </td> <td> - </td> <td> - </td> @@ -885,289 +817,34 @@
<tr>
- <td>DE</td> - - <td></td> - - <td>Unknown</td> - - <td> - </td> - <td><a href="http://tor.taskserver.de/dist/">http</a></td> - <td><a href="http://tor.taskserver.de/">http</a></td> - <td> - </td> - <td> - </td> - <td> - </td> - <td> - </td> -</tr> - -<tr> - - <td>US</td> - - <td></td> - - <td>Unknown</td> - - <td> - </td> - <td><a href="http://tor.mirrors.whitedholdings.org/dist/">http</a></td> - <td><a href="http://tor.mirrors.whitedholdings.org/">http</a></td> - <td><a href="https://tor.mirrors.whitedholdings.org/dist/">https</a></td> - <td><a href="https://tor.mirrors.whitedholdings.org/">https</a></td> - <td><a href="rsync://tor.mirrors.whitedholdings.org/tor-dist">rsync</a></td> - <td><a href="rsync://mirrors.whitedholdings.org/tor">rsync</a></td> -</tr> - -<tr> - - <td>US</td> - - <td></td> - - <td>Unknown</td> - - <td><a href="ftp://mirrors.go-parts.com/tor/">ftp</a></td> - <td><a href="http://mirrors.go-parts.com/tor/dist/">http</a></td> - <td> - </td> - <td> - </td> - <td> - </td> - <td><a href="rsync://mirrors.go-parts.com/mirrors/tor/">rsync</a></td> - <td> - </td> -</tr> - -<tr> - - <td>US</td> - - <td></td> - - <td>Unknown</td> - - <td> - </td> - <td><a href="http://tor.loritsu.com/dist/">http</a></td> - <td><a href="http://tor.loritsu.com/">http</a></td> - <td> - </td> - <td> - </td> - <td> - </td> - <td> - </td> -</tr> - -<tr> - - <td>US</td> - - <td>NW Linux</td> - - <td>Unknown</td> - - <td> - </td> - <td><a href="http://torproject.nwlinux.us/dist/">http</a></td> - <td><a href="http://torproject.nwlinux.us/">http</a></td> - <td> - </td> - <td> - </td> - <td><a href="rsync://nwlinux.us/tor-dist">rsync</a></td> - <td><a href="rsync://nwlinux.us/tor-web">rsync</a></td> -</tr> - -<tr> - <td>NL</td>
- <td></td> + <td>Amorphis</td>
- <td>DO NOT USE. Out of date.</td> + <td>Up to date</td>
<td> - </td> + <td><a href="http://tor.amorphis.eu/dist/">http</a></td> + <td><a href="http://tor.amorphis.eu/">http</a></td> <td> - </td> <td> - </td> - <td><a href="https://www.coevoet.nl/tor/dist/">https</a></td> <td> - </td> <td> - </td> - <td> - </td> </tr>
<tr>
- <td>LU</td> + <td>IN</td>
- <td></td> + <td>India Tor Fans</td>
- <td>Unknown</td> - - <td> - </td> - <td><a href="http://torproject.adamas.ai/dist/">http</a></td> - <td><a href="http://torproject.adamas.ai/">http</a></td> - <td> - </td> - <td> - </td> - <td> - </td> - <td> - </td> -</tr> - -<tr> - - <td>FR</td> - - <td>LazyTiger</td> - - <td>Unknown</td> - - <td> - </td> - <td><a href="http://tor.taiga-san.net/dist/">http</a></td> - <td><a href="http://tor.taiga-san.net/">http</a></td> - <td> - </td> - <td> - </td> - <td> - </td> - <td> - </td> -</tr> - -<tr> - - <td>DE</td> - - <td>[[:bbs:]]</td> - - <td>Unknown</td> - - <td> - </td> - <td><a href="http://tor.blingblingsquad.net/dist/">http</a></td> - <td><a href="http://tor.blingblingsquad.net/">http</a></td> - <td><a href="https://tor.blingblingsquad.net/dist/">https</a></td> - <td><a href="https://tor.blingblingsquad.net/">https</a></td> - <td> - </td> - <td> - </td> -</tr> - -<tr> - - <td>US</td> - - <td></td> - <td>DO NOT USE. Out of date.</td>
<td> - </td> - <td><a href="http://www.netgull.com/torproject/">http</a></td> + <td><a href="http://www.torproject.org.in/dist/">http</a></td> + <td><a href="http://www.torproject.org.in/">http</a></td> <td> - </td> <td> - </td> <td> - </td> <td> - </td> - <td> - </td> </tr> - -<tr> - - <td>US</td> - - <td></td> - - <td>Unknown</td> - - <td> - </td> - <td><a href="http://tor.minibofh.org/dist/">http</a></td> - <td><a href="http://tor.minibofh.org/">http</a></td> - <td> - </td> - <td> - </td> - <td> - </td> - <td> - </td> -</tr> - -<tr> - - <td>FR</td> - - <td></td> - - <td>Unknown</td> - - <td> - </td> - <td><a href="http://37.187.0.127/tormirror/dist/">http</a></td> - <td><a href="http://37.187.0.127/tormirror/">http</a></td> - <td> - </td> - <td> - </td> - <td> - </td> - <td> - </td> -</tr> - -<tr> - - <td>MX</td> - - <td></td> - - <td>Unknown</td> - - <td> - </td> - <td><a href="http://fbnaia.homelinux.net/torproject/dist/">http</a></td> - <td><a href="http://fbnaia.homelinux.net/torproject/">http</a></td> - <td><a href="https://fbnaia.homelinux.net/torproject//dist/">https</a></td> - <td><a href="https://fbnaia.homelinux.net/torproject/">https</a></td> - <td> - </td> - <td> - </td> -</tr> - -<tr> - - <td>EE</td> - - <td></td> - - <td>Unknown</td> - - <td> - </td> - <td><a href="http://tor.li/dist/">http</a></td> - <td><a href="http://tor.li/">http</a></td> - <td><a href="https://tor.li/dist/">https</a></td> - <td><a href="https://tor.li/">https</a></td> - <td> - </td> - <td> - </td> -</tr> - -<tr> - - <td>UA</td> - - <td></td> - - <td>Unknown</td> - - <td> - </td> - <td><a href="http://torua.reactor-xg.kiev.ua/dist/">http</a></td> - <td><a href="http://torua.reactor-xg.kiev.ua/">http</a></td> - <td> - </td> - <td> - </td> - <td> - </td> - <td> - </td> -</tr> - -<tr> - - <td>UK</td> - - <td></td> - - <td>Unknown</td> - - <td> - </td> - <td><a href="http://tor.mage.me.uk/dist/">http</a></td> - <td><a href="http://tor.mage.me.uk/">http</a></td> - <td> - </td> - <td> - </td> - <td> - </td> - <td> - </td> -</tr> - -<tr> - - <td>DE</td> - - <td>tor-mirror.de</td> - - <td>Unknown</td> - - <td> - </td> - <td><a href="http://tor-mirror.de/dist/">http</a></td> - <td><a href="http://tor-mirror.de/">http</a></td> - <td><a href="https://tor-mirror.de/dist/">https</a></td> - <td><a href="https://tor-mirror.de/">https</a></td> - <td> - </td> - <td> - </td> -</tr>
Modified: website/trunk/update-mirrors.pl =================================================================== --- website/trunk/update-mirrors.pl 2013-12-04 10:57:57 UTC (rev 26449) +++ website/trunk/update-mirrors.pl 2013-12-04 22:12:29 UTC (rev 26450) @@ -1,23 +1,16 @@ #!/usr/bin/perl -w use warnings; use strict; +use Data::Dumper; use LWP::Simple; +use HTML::LinkExtor; use LWP; use Date::Parse; use Date::Format; +use Digest::SHA qw(sha256_hex);
-# -# A quick hack by Jacob Appelbaum jacob@appelbaum.net -# LWP suggestions by Leigh Honeywell # This is Free Software (GPLv3) # http://www.gnu.org/licenses/gpl-3.0.txt -# -# CHANGELOG -# 20091003 Code changes to elimiate the need for a trailing slash in addresses for script runtime -# 20091004 Code changes to increase out of date tolerance to 48 hours -# 20091028 Code changes to increase timout to 30 seconds (attempting to # resolve "unknown" status') -# 20091028 Code changes to change user agent of script -# 20100807 Remove dead mirrors.
print "Creating LWP agent ($LWP::VERSION)...\n"; my $lua = LWP::UserAgent->new( @@ -37,45 +30,71 @@
return $cleanedData; } +sub ExtractLinks { + my $content = shift; + my $url = shift; + my @links;
-sub FetchDate { - my $url = shift; # Base url for mirror - my $trace = "project/trace/www-master.torproject.org"; # this file should always exist - $url = "$url/$trace"; + my $parser = HTML::LinkExtor->new(undef, $url); + $parser->parse($content); + foreach my $linkarray($parser->links) + { + my ($elt_type, $attr_name, $attr_value) = @$linkarray; + if ($elt_type eq 'a' && $attr_name eq 'href' && $attr_value =~ //$/ && $attr_value =~ /^$url/) + { + push @links, Fetch($attr_value, &ExtractLinks); + } + elsif ($attr_value =~ /.(xpi|dmg|exe|tar.gz)$/) + #elsif ($attr_value =~ /.(asc)$/) + { + push @links, $attr_value; + } + } + return @links; +}
- print "Fetching possible date from: $url\n"; +sub ExtractDate { + my $content = shift; + $content = sanitize($content); + my $date = str2time($content);
+ print "Extracting possible date from: $content\n"; + if ($date) { + print "We've fetched a date $date.\n"; + return $date; + } else { + print "We haven't fetched a date.\n"; + return undef; + } +} + +sub ExtractSig { + my $content = shift; + return sha256_hex($content); +} + +sub Fetch { + my ($url, $sub) = @_; # Base url for mirror + print "Fetch $url\n"; + my $request = new HTTP::Request GET => "$url"; my $result = $lua->request($request); my $code = $result->code(); - print "Result code $code\n"; + print "\tResult code $code\n";
if ($result->is_success && $code eq "200"){ - my $taint = $result->content; - my $content = sanitize($taint); + my $content = $result->content; if ($content) { - - my $date = str2time($content); - - if ($date) { - print "We've fetched a date $date.\n"; - return $date; - } else { - print "We've haven't fetched a date.\n"; - return "Unknown"; - } - + return $sub->($content, $url); } else { - print "Unable to fetch date, empty content returned.\n"; - return "Unknown"; + print "Unable to fetch $url, empty content returned.\n"; }
} else { print "Our request failed, we had no result.\n"; - return "Unknown"; }
- return "Unknown"; + return undef; }
# This is the list of all known Tor mirrors @@ -1349,34 +1368,74 @@ print "We have a total of $count mirrors\n"; print "Fetching the last updated date for each mirror.\n";
-my $tortime; -$tortime = FetchDate("https://www.torproject.org/"); +my $tortime = Fetch("https://www.torproject.org/project/trace/www-master.torproject.org", &ExtractDate); +my @torfiles = Fetch("https://www.torproject.org/dist/", &ExtractLinks); +my %randomtorfiles; + +for (1 .. 1) +{ + my $r = int(rand(scalar(@torfiles))); + my $suffix = $torfiles[$r]; + $suffix =~ s/^https://www.torproject.org//; + $randomtorfiles{$suffix} = Fetch($torfiles[$r], &ExtractSig); +} + +print "Using these files for sig matching:\n"; +print join("\n", keys %randomtorfiles); + # Adjust offical Tor time by out-of-date offset: number of days * seconds per day $tortime -= 1 * 172800; print "The official time for Tor is $tortime. \n"; +my %todelete;
foreach my $server ( keys %m ) {
- print "Attempting to fetch from $m{$server}{'orgName'}\n"; + print "Attempting to fetch from $server: $m{$server}{'orgName'}\n";
- if ($m{$server}{'httpWebsiteMirror'}) { - print "Attempt to fetch via HTTP.\n"; - $m{$server}{"updateDate"} = FetchDate("$m{$server}{'httpWebsiteMirror'}"); - } elsif ($m{$server}{'httpsWebsiteMirror'}) { - print "Attempt to fetch via HTTPS.\n"; - $m{$server}{"updateDate"} = FetchDate("$m{$server}{'httpsWebsiteMirror'}"); - } elsif ($m{$server}{'ftpWebsiteMirror'}) { - print "Attempt to fetch via FTP.\n"; - $m{$server}{"updateDate"} = FetchDate("$m{$server}{'ftpWebsiteMirror'}"); + foreach my $serverType('httpWebsiteMirror', 'httpsWebsiteMirror', 'ftpWebsiteMirror') + { + if ($m{$server}{$serverType}) { + print "Attempt to fetch via $serverType.\n"; + $m{$server}{"updateDate"} = Fetch("$m{$server}{$serverType}/project/trace/www-master.torproject.org", &ExtractDate); + if ($m{$server}{updateDate}) { + foreach my $randomtorfile(keys %randomtorfiles) { + my $sig = Fetch("$m{$server}{$serverType}/$randomtorfile", &ExtractSig); + if (!$sig) { + $todelete{$server} = "Unreadable $randomtorfile"; + } elsif ($sig ne $randomtorfiles{$randomtorfile}) { + $todelete{$server} = "Sig mismatch on $randomtorfile"; + } else { + print "Sig $sig matches for $randomtorfile on $m{$server}{$serverType}\n"; + } + last; + } + } else { + delete $m{$server}{updateDate}; + $todelete{$server} = "Unreadable date"; + } + last; + } + } + if (exists $m{$server}{updateDate}) { + print "We fetched and stored the following: $m{$server}{'updateDate'}\n"; } else { - print "We were unable to fetch or store anything. We still have the following: $m{$server}{'updateDate'}\n"; - } + $todelete{$server} = "Unreadable date"; + } + }
- print "We fetched and stored the following: $m{$server}{'updateDate'}\n"; +foreach my $outdated(keys %todelete) +{ + print "Deleted $outdated due to $todelete{$outdated}\n"; + print Dumper($m{$outdated}); + delete $m{$outdated}; +}
- } +foreach my $k(keys %m) +{ + print "$k\n"; + print "$m{$k}{updateDate}\n"; +}
- print "We sorted the following mirrors by their date of last update: \n"; foreach my $server ( sort { $m{$b}{'updateDate'} <=> $m{$a}{'updateDate'}} keys %m ) {
@@ -1394,7 +1453,7 @@ open(OUT, "> $outFile") or die "Can't open $outFile: $!";
# Here's where we open a file and print some wml include goodness -# This is storted from last known recent update to unknown update times +# This is sorted from last known recent update to unknown update times foreach my $server ( sort { $m{$b}{'updateDate'} <=> $m{$a}{'updateDate'}} keys %m ) {
my $time;