[tor-commits] r26450: {website} new update mirrors script, see ticket 10269, update mirrors (in website/trunk: . include)

Andrew Lewman andrew at torproject.org
Wed Dec 4 22:12:29 UTC 2013


Author: phobos
Date: 2013-12-04 22:12:29 +0000 (Wed, 04 Dec 2013)
New Revision: 26450

Modified:
   website/trunk/include/mirrors-table.wmi
   website/trunk/update-mirrors.pl
Log:
new update mirrors script, see ticket 10269, update mirrors table with the new version.


Modified: website/trunk/include/mirrors-table.wmi
===================================================================
--- website/trunk/include/mirrors-table.wmi	2013-12-04 10:57:57 UTC (rev 26449)
+++ website/trunk/include/mirrors-table.wmi	2013-12-04 22:12:29 UTC (rev 26450)
@@ -1,23 +1,6 @@
      
 <tr>
 
-         <td>INT</td>
-
-         <td>CoralCDN</td>
-
-         <td>Up to date</td>
-
-    <td> - </td>
-    <td><a href="http://www.torproject.org.nyud.net/dist/">http</a></td>
-    <td><a href="http://www.torproject.org.nyud.net/">http</a></td>
-    <td> - </td>
-    <td> - </td>
-    <td> - </td>
-    <td> - </td>
-</tr>
-     
-<tr>
-
          <td>CZ</td>
 
          <td>Lightning-bolt.net</td>
@@ -120,23 +103,6 @@
      
 <tr>
 
-         <td>LT</td>
-
-         <td></td>
-
-         <td>Up to date</td>
-
-    <td> - </td>
-    <td><a href="http://tor.vesta.nu/dist/">http</a></td>
-    <td><a href="http://tor.vesta.nu/">http</a></td>
-    <td> - </td>
-    <td> - </td>
-    <td> - </td>
-    <td> - </td>
-</tr>
-     
-<tr>
-
          <td>DE</td>
 
          <td></td>
@@ -341,40 +307,6 @@
      
 <tr>
 
-         <td>US</td>
-
-         <td>AskApache</td>
-
-         <td>Up to date</td>
-
-    <td> - </td>
-    <td><a href="http://tor.askapache.com/dist/">http</a></td>
-    <td><a href="http://tor.askapache.com/">http</a></td>
-    <td> - </td>
-    <td> - </td>
-    <td> - </td>
-    <td> - </td>
-</tr>
-     
-<tr>
-
-         <td>DE</td>
-
-         <td>Netzkonstrukt Berlin</td>
-
-         <td>Up to date</td>
-
-    <td> - </td>
-    <td><a href="http://mirror.ntzk.de/torproject.org/dist/">http</a></td>
-    <td><a href="http://mirror.ntzk.de/torproject.org/">http</a></td>
-    <td> - </td>
-    <td> - </td>
-    <td> - </td>
-    <td> - </td>
-</tr>
-     
-<tr>
-
          <td>RU</td>
 
          <td>Soviet Anonymous</td>
@@ -443,36 +375,36 @@
      
 <tr>
 
-         <td>AT</td>
+         <td>INT</td>
 
-         <td>TechAsk.IT</td>
+         <td>CoralCDN</td>
 
          <td>Up to date</td>
 
     <td> - </td>
-    <td><a href="http://www.unicorncloud.org/public/torproject.org/dist">http</a></td>
-    <td><a href="http://www.unicorncloud.org/public/torproject.org/">http</a></td>
-    <td><a href="https://www.unicorncloud.org/public/torproject.org/dist">https</a></td>
-    <td><a href="https://www.unicorncloud.org/public/torproject.org/">https</a></td>
+    <td><a href="http://www.torproject.org.nyud.net/dist/">http</a></td>
+    <td><a href="http://www.torproject.org.nyud.net/">http</a></td>
     <td> - </td>
     <td> - </td>
+    <td> - </td>
+    <td> - </td>
 </tr>
      
 <tr>
 
-         <td>NL</td>
+         <td>AT</td>
 
-         <td>Amorphis</td>
+         <td>TechAsk.IT</td>
 
          <td>Up to date</td>
 
     <td> - </td>
-    <td><a href="http://tor.amorphis.eu/dist/">http</a></td>
-    <td><a href="http://tor.amorphis.eu/">http</a></td>
+    <td><a href="http://www.unicorncloud.org/public/torproject.org/dist">http</a></td>
+    <td><a href="http://www.unicorncloud.org/public/torproject.org/">http</a></td>
+    <td><a href="https://www.unicorncloud.org/public/torproject.org/dist">https</a></td>
+    <td><a href="https://www.unicorncloud.org/public/torproject.org/">https</a></td>
     <td> - </td>
     <td> - </td>
-    <td> - </td>
-    <td> - </td>
 </tr>
      
 <tr>
@@ -630,6 +562,23 @@
      
 <tr>
 
+         <td>MX</td>
+
+         <td></td>
+
+         <td>Up to date</td>
+
+    <td> - </td>
+    <td><a href="http://fbnaia.homelinux.net/torproject/dist/">http</a></td>
+    <td><a href="http://fbnaia.homelinux.net/torproject/">http</a></td>
+    <td><a href="https://fbnaia.homelinux.net/torproject//dist/">https</a></td>
+    <td><a href="https://fbnaia.homelinux.net/torproject/">https</a></td>
+    <td> - </td>
+    <td> - </td>
+</tr>
+     
+<tr>
+
          <td>NL</td>
 
          <td>BBLN</td>
@@ -834,34 +783,17 @@
      
 <tr>
 
-         <td>IN</td>
+         <td>LT</td>
 
-         <td>India Tor Fans</td>
-
-         <td>DO NOT USE. Out of date.</td>
-
-    <td> - </td>
-    <td><a href="http://www.torproject.org.in/dist/">http</a></td>
-    <td><a href="http://www.torproject.org.in/">http</a></td>
-    <td> - </td>
-    <td> - </td>
-    <td> - </td>
-    <td> - </td>
-</tr>
-     
-<tr>
-
-         <td>TN</td>
-
          <td></td>
 
-         <td>DO NOT USE. Out of date.</td>
+         <td>Up to date</td>
 
     <td> - </td>
+    <td><a href="http://tor.vesta.nu/dist/">http</a></td>
+    <td><a href="http://tor.vesta.nu/">http</a></td>
     <td> - </td>
-    <td><a href="http://torproject.antagonism.org/">http</a></td>
     <td> - </td>
-    <td><a href="https://torproject.antagonism.org/">https</a></td>
     <td> - </td>
     <td> - </td>
 </tr>
@@ -870,13 +802,13 @@
 
          <td>US</td>
 
-         <td>searchprivate</td>
+         <td>AskApache</td>
 
-         <td>DO NOT USE. Out of date.</td>
+         <td>Up to date</td>
 
     <td> - </td>
-    <td><a href="http://tor.searchprivate.com/dist/">http</a></td>
-    <td><a href="http://tor.searchprivate.com/">http</a></td>
+    <td><a href="http://tor.askapache.com/dist/">http</a></td>
+    <td><a href="http://tor.askapache.com/">http</a></td>
     <td> - </td>
     <td> - </td>
     <td> - </td>
@@ -885,289 +817,34 @@
      
 <tr>
 
-         <td>DE</td>
-
-         <td></td>
-
-         <td>Unknown</td>
-
-    <td> - </td>
-    <td><a href="http://tor.taskserver.de/dist/">http</a></td>
-    <td><a href="http://tor.taskserver.de/">http</a></td>
-    <td> - </td>
-    <td> - </td>
-    <td> - </td>
-    <td> - </td>
-</tr>
-     
-<tr>
-
-         <td>US</td>
-
-         <td></td>
-
-         <td>Unknown</td>
-
-    <td> - </td>
-    <td><a href="http://tor.mirrors.whitedholdings.org/dist/">http</a></td>
-    <td><a href="http://tor.mirrors.whitedholdings.org/">http</a></td>
-    <td><a href="https://tor.mirrors.whitedholdings.org/dist/">https</a></td>
-    <td><a href="https://tor.mirrors.whitedholdings.org/">https</a></td>
-    <td><a href="rsync://tor.mirrors.whitedholdings.org/tor-dist">rsync</a></td>
-    <td><a href="rsync://mirrors.whitedholdings.org/tor">rsync</a></td>
-</tr>
-     
-<tr>
-
-         <td>US</td>
-
-         <td></td>
-
-         <td>Unknown</td>
-
-    <td><a href="ftp://mirrors.go-parts.com/tor/">ftp</a></td>
-    <td><a href="http://mirrors.go-parts.com/tor/dist/">http</a></td>
-    <td> - </td>
-    <td> - </td>
-    <td> - </td>
-    <td><a href="rsync://mirrors.go-parts.com/mirrors/tor/">rsync</a></td>
-    <td> - </td>
-</tr>
-     
-<tr>
-
-         <td>US</td>
-
-         <td></td>
-
-         <td>Unknown</td>
-
-    <td> - </td>
-    <td><a href="http://tor.loritsu.com/dist/">http</a></td>
-    <td><a href="http://tor.loritsu.com/">http</a></td>
-    <td> - </td>
-    <td> - </td>
-    <td> - </td>
-    <td> - </td>
-</tr>
-     
-<tr>
-
-         <td>US</td>
-
-         <td>NW Linux</td>
-
-         <td>Unknown</td>
-
-    <td> - </td>
-    <td><a href="http://torproject.nwlinux.us/dist/">http</a></td>
-    <td><a href="http://torproject.nwlinux.us/">http</a></td>
-    <td> - </td>
-    <td> - </td>
-    <td><a href="rsync://nwlinux.us/tor-dist">rsync</a></td>
-    <td><a href="rsync://nwlinux.us/tor-web">rsync</a></td>
-</tr>
-     
-<tr>
-
          <td>NL</td>
 
-         <td></td>
+         <td>Amorphis</td>
 
-         <td>DO NOT USE. Out of date.</td>
+         <td>Up to date</td>
 
     <td> - </td>
+    <td><a href="http://tor.amorphis.eu/dist/">http</a></td>
+    <td><a href="http://tor.amorphis.eu/">http</a></td>
     <td> - </td>
     <td> - </td>
-    <td><a href="https://www.coevoet.nl/tor/dist/">https</a></td>
     <td> - </td>
     <td> - </td>
-    <td> - </td>
 </tr>
      
 <tr>
 
-         <td>LU</td>
+         <td>IN</td>
 
-         <td></td>
+         <td>India Tor Fans</td>
 
-         <td>Unknown</td>
-
-    <td> - </td>
-    <td><a href="http://torproject.adamas.ai/dist/">http</a></td>
-    <td><a href="http://torproject.adamas.ai/">http</a></td>
-    <td> - </td>
-    <td> - </td>
-    <td> - </td>
-    <td> - </td>
-</tr>
-     
-<tr>
-
-         <td>FR</td>
-
-         <td>LazyTiger</td>
-
-         <td>Unknown</td>
-
-    <td> - </td>
-    <td><a href="http://tor.taiga-san.net/dist/">http</a></td>
-    <td><a href="http://tor.taiga-san.net/">http</a></td>
-    <td> - </td>
-    <td> - </td>
-    <td> - </td>
-    <td> - </td>
-</tr>
-     
-<tr>
-
-         <td>DE</td>
-
-         <td>[[:bbs:]]</td>
-
-         <td>Unknown</td>
-
-    <td> - </td>
-    <td><a href="http://tor.blingblingsquad.net/dist/">http</a></td>
-    <td><a href="http://tor.blingblingsquad.net/">http</a></td>
-    <td><a href="https://tor.blingblingsquad.net/dist/">https</a></td>
-    <td><a href="https://tor.blingblingsquad.net/">https</a></td>
-    <td> - </td>
-    <td> - </td>
-</tr>
-     
-<tr>
-
-         <td>US</td>
-
-         <td></td>
-
          <td>DO NOT USE. Out of date.</td>
 
     <td> - </td>
-    <td><a href="http://www.netgull.com/torproject/">http</a></td>
+    <td><a href="http://www.torproject.org.in/dist/">http</a></td>
+    <td><a href="http://www.torproject.org.in/">http</a></td>
     <td> - </td>
     <td> - </td>
     <td> - </td>
     <td> - </td>
-    <td> - </td>
 </tr>
-     
-<tr>
-
-         <td>US</td>
-
-         <td></td>
-
-         <td>Unknown</td>
-
-    <td> - </td>
-    <td><a href="http://tor.minibofh.org/dist/">http</a></td>
-    <td><a href="http://tor.minibofh.org/">http</a></td>
-    <td> - </td>
-    <td> - </td>
-    <td> - </td>
-    <td> - </td>
-</tr>
-     
-<tr>
-
-         <td>FR</td>
-
-         <td></td>
-
-         <td>Unknown</td>
-
-    <td> - </td>
-    <td><a href="http://37.187.0.127/tormirror/dist/">http</a></td>
-    <td><a href="http://37.187.0.127/tormirror/">http</a></td>
-    <td> - </td>
-    <td> - </td>
-    <td> - </td>
-    <td> - </td>
-</tr>
-     
-<tr>
-
-         <td>MX</td>
-
-         <td></td>
-
-         <td>Unknown</td>
-
-    <td> - </td>
-    <td><a href="http://fbnaia.homelinux.net/torproject/dist/">http</a></td>
-    <td><a href="http://fbnaia.homelinux.net/torproject/">http</a></td>
-    <td><a href="https://fbnaia.homelinux.net/torproject//dist/">https</a></td>
-    <td><a href="https://fbnaia.homelinux.net/torproject/">https</a></td>
-    <td> - </td>
-    <td> - </td>
-</tr>
-     
-<tr>
-
-         <td>EE</td>
-
-         <td></td>
-
-         <td>Unknown</td>
-
-    <td> - </td>
-    <td><a href="http://tor.li/dist/">http</a></td>
-    <td><a href="http://tor.li/">http</a></td>
-    <td><a href="https://tor.li/dist/">https</a></td>
-    <td><a href="https://tor.li/">https</a></td>
-    <td> - </td>
-    <td> - </td>
-</tr>
-     
-<tr>
-
-         <td>UA</td>
-
-         <td></td>
-
-         <td>Unknown</td>
-
-    <td> - </td>
-    <td><a href="http://torua.reactor-xg.kiev.ua/dist/">http</a></td>
-    <td><a href="http://torua.reactor-xg.kiev.ua/">http</a></td>
-    <td> - </td>
-    <td> - </td>
-    <td> - </td>
-    <td> - </td>
-</tr>
-     
-<tr>
-
-         <td>UK</td>
-
-         <td></td>
-
-         <td>Unknown</td>
-
-    <td> - </td>
-    <td><a href="http://tor.mage.me.uk/dist/">http</a></td>
-    <td><a href="http://tor.mage.me.uk/">http</a></td>
-    <td> - </td>
-    <td> - </td>
-    <td> - </td>
-    <td> - </td>
-</tr>
-     
-<tr>
-
-         <td>DE</td>
-
-         <td>tor-mirror.de</td>
-
-         <td>Unknown</td>
-
-    <td> - </td>
-    <td><a href="http://tor-mirror.de/dist/">http</a></td>
-    <td><a href="http://tor-mirror.de/">http</a></td>
-    <td><a href="https://tor-mirror.de/dist/">https</a></td>
-    <td><a href="https://tor-mirror.de/">https</a></td>
-    <td> - </td>
-    <td> - </td>
-</tr>

Modified: website/trunk/update-mirrors.pl
===================================================================
--- website/trunk/update-mirrors.pl	2013-12-04 10:57:57 UTC (rev 26449)
+++ website/trunk/update-mirrors.pl	2013-12-04 22:12:29 UTC (rev 26450)
@@ -1,23 +1,16 @@
 #!/usr/bin/perl -w
 use warnings;
 use strict;
+use Data::Dumper;
 use LWP::Simple;
+use HTML::LinkExtor;
 use LWP;
 use Date::Parse;
 use Date::Format;
+use Digest::SHA qw(sha256_hex);
 
-#
-# A quick hack by Jacob Appelbaum <jacob at appelbaum.net>
-# LWP suggestions by Leigh Honeywell
 # This is Free Software (GPLv3)
 # http://www.gnu.org/licenses/gpl-3.0.txt
-#
-# CHANGELOG
-# 20091003 Code changes to elimiate the need for a trailing slash in addresses for script runtime
-# 20091004 Code changes to increase out of date tolerance to 48 hours
-# 20091028 Code changes to increase timout to 30 seconds (attempting to # resolve "unknown" status')
-# 20091028 Code changes to change user agent of script
-# 20100807 Remove dead mirrors.
 
 print "Creating LWP agent ($LWP::VERSION)...\n";
 my $lua = LWP::UserAgent->new(
@@ -37,45 +30,71 @@
 
     return $cleanedData;
 }
+sub ExtractLinks {
+    my $content = shift; 
+    my $url     = shift;
+    my @links;
 
-sub FetchDate {
-    my $url = shift; # Base url for mirror
-    my $trace = "project/trace/www-master.torproject.org"; # this file should always exist
-    $url = "$url/$trace";
+    my $parser = HTML::LinkExtor->new(undef, $url);
+    $parser->parse($content);
+    foreach my $linkarray($parser->links)
+    {
+         my ($elt_type, $attr_name, $attr_value) = @$linkarray;
+         if ($elt_type eq 'a' && $attr_name eq 'href' && $attr_value =~ /\/$/ && $attr_value =~ /^$url/)
+         {
+         	push @links, Fetch($attr_value, \&ExtractLinks);
+         }
+	 elsif ($attr_value =~ /\.(xpi|dmg|exe|tar\.gz)$/)
+	 #elsif ($attr_value =~ /\.(asc)$/)
+         {
+         	push @links, $attr_value;
+         }
+    }
+    return @links;
+}
 
-    print "Fetching possible date from: $url\n";
+sub ExtractDate {
+    my $content = shift;  
+    $content    = sanitize($content);
+    my $date    = str2time($content);
 
+    print "Extracting possible date from: $content\n";
+    if ($date) {
+        print "We've fetched a date $date.\n";
+        return $date;
+    } else {
+        print "We haven't fetched a date.\n";
+	return undef;
+    }
+}
+
+sub ExtractSig {
+    my $content = shift;
+    return sha256_hex($content); 
+}
+
+sub Fetch {
+    my ($url, $sub) = @_; # Base url for mirror
+    print "Fetch $url\n";
+
     my $request = new HTTP::Request GET => "$url";
     my $result = $lua->request($request);
     my $code = $result->code();
-    print "Result code $code\n";
+    print "\tResult code $code\n";
 
     if ($result->is_success && $code eq "200"){
-       my $taint = $result->content;
-       my $content = sanitize($taint);
+       my $content = $result->content;
        if ($content) {
-
-            my $date = str2time($content);
-
-            if ($date) {
-                print "We've fetched a date $date.\n";
-                return $date;
-            } else {
-                print "We've haven't fetched a date.\n";
-                return "Unknown";
-            }
-
+	    return $sub->($content, $url);
         } else {
-            print "Unable to fetch date, empty content returned.\n";
-            return "Unknown";
+            print "Unable to fetch $url, empty content returned.\n";
         }
 
     } else {
        print "Our request failed, we had no result.\n";
-       return "Unknown";
     }
 
-    return "Unknown";
+    return undef;
 }
 
 # This is the list of all known Tor mirrors
@@ -1349,34 +1368,74 @@
 print "We have a total of $count mirrors\n";
 print "Fetching the last updated date for each mirror.\n";
 
-my $tortime;
-$tortime = FetchDate("https://www.torproject.org/");
+my $tortime = Fetch("https://www.torproject.org/project/trace/www-master.torproject.org", \&ExtractDate);
+my @torfiles = Fetch("https://www.torproject.org/dist/", \&ExtractLinks); 
+my %randomtorfiles;
+
+for (1 .. 1)
+{
+	my $r = int(rand(scalar(@torfiles)));
+	my $suffix = $torfiles[$r];
+	$suffix =~ s/^https:\/\/www.torproject.org//;
+	$randomtorfiles{$suffix} = Fetch($torfiles[$r], \&ExtractSig);
+}
+
+print "Using these files for sig matching:\n";
+print join("\n", keys %randomtorfiles);
+
 # Adjust offical Tor time by out-of-date offset: number of days * seconds per day
 $tortime -= 1 * 172800;
 print "The official time for Tor is $tortime. \n";
+my %todelete;
 
 foreach my $server ( keys %m ) {
 
-    print "Attempting to fetch from $m{$server}{'orgName'}\n";
+    print "Attempting to fetch from $server: $m{$server}{'orgName'}\n";
 
-    if ($m{$server}{'httpWebsiteMirror'}) {
-        print "Attempt to fetch via HTTP.\n";
-        $m{$server}{"updateDate"} = FetchDate("$m{$server}{'httpWebsiteMirror'}");
-    } elsif ($m{$server}{'httpsWebsiteMirror'}) {
-        print "Attempt to fetch via HTTPS.\n";
-        $m{$server}{"updateDate"} = FetchDate("$m{$server}{'httpsWebsiteMirror'}");
-    } elsif ($m{$server}{'ftpWebsiteMirror'}) {
-        print "Attempt to fetch via FTP.\n";
-        $m{$server}{"updateDate"} = FetchDate("$m{$server}{'ftpWebsiteMirror'}");
+    foreach my $serverType('httpWebsiteMirror', 'httpsWebsiteMirror', 'ftpWebsiteMirror')
+    {
+        if ($m{$server}{$serverType}) {
+            print "Attempt to fetch via $serverType.\n";
+            $m{$server}{"updateDate"} = Fetch("$m{$server}{$serverType}/project/trace/www-master.torproject.org", \&ExtractDate);
+            if ($m{$server}{updateDate}) {
+                foreach my $randomtorfile(keys %randomtorfiles) {
+                	my $sig = Fetch("$m{$server}{$serverType}/$randomtorfile", \&ExtractSig);
+            		if (!$sig) {
+			    $todelete{$server} = "Unreadable $randomtorfile";
+		        } elsif ($sig ne $randomtorfiles{$randomtorfile}) {
+			    $todelete{$server} = "Sig mismatch on $randomtorfile";
+            		} else {
+        	    	    print "Sig $sig matches for $randomtorfile on $m{$server}{$serverType}\n";
+			}
+            	        last;
+		}
+            } else {
+		delete $m{$server}{updateDate};
+		$todelete{$server} = "Unreadable date";
+            }
+	    last;
+        } 
+    }
+    if (exists $m{$server}{updateDate}) {
+        print "We fetched and stored the following: $m{$server}{'updateDate'}\n";
     } else {
-        print "We were unable to fetch or store anything. We still have the following: $m{$server}{'updateDate'}\n";
-    }
+	$todelete{$server} = "Unreadable date";
+    } 
+ }
 
-    print "We fetched and stored the following: $m{$server}{'updateDate'}\n";
+foreach my $outdated(keys %todelete)
+{
+    print "Deleted $outdated due to $todelete{$outdated}\n";
+    print Dumper($m{$outdated});
+    delete  $m{$outdated};
+}
 
- }
+foreach my $k(keys %m)
+{
+    print "$k\n";
+    print "$m{$k}{updateDate}\n";
+}
 
-
 print "We sorted the following mirrors by their date of last update: \n";
 foreach my $server ( sort { $m{$b}{'updateDate'} <=> $m{$a}{'updateDate'}} keys %m ) {
 
@@ -1394,7 +1453,7 @@
 open(OUT, "> $outFile") or die "Can't open $outFile: $!";
 
 # Here's where we open a file and print some wml include goodness
-# This is storted from last known recent update to unknown update times
+# This is sorted from last known recent update to unknown update times
 foreach my $server ( sort { $m{$b}{'updateDate'} <=> $m{$a}{'updateDate'}} keys %m ) {
 
      my $time;



More information about the tor-commits mailing list