morgan pushed to branch main at The Tor Project / Applications / tor-browser-build

Commits:

1 changed file:

Changes:

  • tools/count-mar-downloads
    1
    +#!/usr/bin/perl -w
    
    2
    +#
    
    3
    +# This script takes logs archive months as arguments.
    
    4
    +# Example: count-mar-downloads 2025-08 2025-07
    
    5
    +# Check which months are available on https://collector.torproject.org/archive/webstats/
    
    6
    +#
    
    7
    +# The script will dowload the log archives for the selected months,
    
    8
    +# extract them, parse the logs and print download numbers per version
    
    9
    +# numbers, OS, and incremental/full updates.
    
    10
    +#
    
    11
    +# The log files archives are stored in the tools/web-logs-count-mar-downloads
    
    12
    +# directory.
    
    13
    +#
    
    14
    +use strict;
    
    15
    +use FindBin;
    
    16
    +use lib "$FindBin::Bin/../rbm/lib";
    
    17
    +use RBM::CaptureExec qw(capture_exec);
    
    18
    +use File::Temp qw/tempdir/;
    
    19
    +use File::Copy qw/move/;
    
    20
    +use File::Find;
    
    21
    +
    
    22
    +if (!@ARGV) {
    
    23
    +  print "Usage: count-mar-downloads <month>\n";
    
    24
    +  print "Example: count-mar-downloads 2025-08 2025-07\n";
    
    25
    +  print "Check which months are available on https://collector.torproject.org/archive/webstats/\n";
    
    26
    +  exit 1;
    
    27
    +}
    
    28
    +
    
    29
    +my @months = @ARGV;
    
    30
    +
    
    31
    +my %downloads;
    
    32
    +
    
    33
    +my $weblogsdir = "$FindBin::Bin/web-logs-count-mar-downloads";
    
    34
    +mkdir $weblogsdir;
    
    35
    +chdir $weblogsdir;
    
    36
    +
    
    37
    +sub exit_error {
    
    38
    +  print STDERR "Error: ", $_[0], "\n";
    
    39
    +  exit (exists $_[1] ? $_[1] : 1);
    
    40
    +}
    
    41
    +
    
    42
    +sub download_log_files {
    
    43
    +  my $tmpdir = tempdir(CLEANUP => 1);
    
    44
    +  foreach my $month (@months) {
    
    45
    +    my $file = "webstats-$month.tar";
    
    46
    +    if (-f $file) {
    
    47
    +      print STDERR "Using existing file $file (remove it if you want to re-download it).\n";
    
    48
    +      next;
    
    49
    +    }
    
    50
    +    my $url = "https://collector.torproject.org/archive/webstats/$file";
    
    51
    +    exit_error "Error downloading $url" unless
    
    52
    +        system('wget', '-O', "$tmpdir/$file", $url) == 0;
    
    53
    +    move("$tmpdir/$file", "$weblogsdir/$file");
    
    54
    +  }
    
    55
    +}
    
    56
    +
    
    57
    +sub parse_log_file {
    
    58
    +  return unless -f $File::Find::name;
    
    59
    +  return unless $File::Find::name =~ m/\.xz$/;
    
    60
    +
    
    61
    +  print STDERR "Reading $_\n";
    
    62
    +
    
    63
    +  my ($stdout, undef, $success) = capture_exec('xzcat', $File::Find::name);
    
    64
    +  foreach my $line (split /\n/, $stdout) {
    
    65
    +    my ($version, $os) =
    
    66
    +      ($line =~ m|^[^\s]+ - - \[.+\] "GET /aus1/torbrowser/([^/]+)/tor-browser-(.*)-\d.*_ALL\.mar HTTP/|);
    
    67
    +    if ($version) {
    
    68
    +      $downloads{$version}{total} += 1;
    
    69
    +      $downloads{$version}{full_update} += 1;
    
    70
    +      $downloads{$version}{OS}{$os} += 1;
    
    71
    +      next;
    
    72
    +    }
    
    73
    +    my $incremental_from;
    
    74
    +    ($version, $os, $incremental_from) =
    
    75
    +      ($line =~ m|^[^\s]+ - - \[.+\] "GET /aus1/torbrowser/([^/]+)/tor-browser-(.*)--(\d[^-]+)-.*_ALL\.incremental\.mar HTTP/|);
    
    76
    +    if ($incremental_from) {
    
    77
    +      $downloads{$version}{total} += 1;
    
    78
    +      $downloads{$version}{OS}{$os} += 1;
    
    79
    +      $downloads{$version}{incremental_update_total} += 1;
    
    80
    +      $downloads{$version}{incremental_update}{$incremental_from} += 1;
    
    81
    +    }
    
    82
    +  }
    
    83
    +}
    
    84
    +
    
    85
    +sub parse_log_files {
    
    86
    +  my $tmpdir = tempdir(CLEANUP => 1);
    
    87
    +  foreach my $month (@months) {
    
    88
    +    my $file = "webstats-$month.tar";
    
    89
    +    print STDERR "Extracting $file\n";
    
    90
    +    my (undef, undef, $success) = capture_exec('tar', '-C', $tmpdir, '-xf',
    
    91
    +                                        $file, "webstats-$month/cdn.torproject.org");
    
    92
    +
    
    93
    +    find(\&parse_log_file, "$tmpdir/webstats-$month/cdn.torproject.org");
    
    94
    +  }
    
    95
    +}
    
    96
    +
    
    97
    +sub print_results {
    
    98
    +  print 'Download numbers for months ', join(' ', @months), ":\n\n";
    
    99
    +  my @versions = sort { $downloads{$b}{total} <=> $downloads{$a}{total} }
    
    100
    +        keys %downloads;
    
    101
    +  foreach my $version (@versions) {
    
    102
    +    print "Version: $version\n";
    
    103
    +    print "  Total: $downloads{$version}{total}\n";
    
    104
    +    print "  Per OS:\n";
    
    105
    +    foreach my $os (sort keys %{$downloads{$version}{OS}}) {
    
    106
    +      print "    $os: $downloads{$version}{OS}{$os}\n";
    
    107
    +    }
    
    108
    +    print "  Full updates: $downloads{$version}{full_update}\n"
    
    109
    +        if $downloads{$version}{full_update};
    
    110
    +    if ($downloads{$version}{incremental_update_total}) {
    
    111
    +      print "  Incremental updates (total): $downloads{$version}{incremental_update_total}\n";
    
    112
    +      foreach my $incremental_from (sort keys %{$downloads{$version}{incremental_update}}) {
    
    113
    +        print "  Incremental updates from $incremental_from: $downloads{$version}{incremental_update}{$incremental_from}\n";
    
    114
    +      }
    
    115
    +    }
    
    116
    +    print "\n";
    
    117
    +  }
    
    118
    +}
    
    119
    +
    
    120
    +download_log_files;
    
    121
    +parse_log_files;
    
    122
    +print_results;