commit acfa536cf24f253dfdfc1321e0033c878553110a Author: Runa A. Sandvik runa.sandvik@gmail.com Date: Thu Jan 19 08:49:04 2012 +0000
Add logimport.sh and logarchive.sh to git --- logarchive.sh | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ logimport.sh | 47 ++++++++++++++++++++++++++++++++++++ 2 files changed, 121 insertions(+), 0 deletions(-)
diff --git a/logarchive.sh b/logarchive.sh new file mode 100644 index 0000000..bab17b5 --- /dev/null +++ b/logarchive.sh @@ -0,0 +1,74 @@ +#!/bin/bash +# +# Define a few variables +logs="/srv/webstats.torproject.org/home/webstats/out" +archive="/srv/webstats.torproject.org/archive" +log_archive="/srv/webstats.torproject.org/archive/weblogs" +data_archive="/srv/webstats.torproject.org/archive/data" +data_publish="/srv/webstats.torproject.org/htdocs/data" +record="/srv/webstats.torproject.org/logarchive.log" + +# Define the hosts we have logs for +hosts=( + metrics.torproject.org-access.log +) + +# See if we actually have logs to process +if [ ! "$(ls -A $logs)" ]; +then + echo "`date` - No logs to process" >> $record + exit 1 +fi + +# rsync ALL the logs so that we can delete them from the out/ directory +# later +rsync -ar "$logs/" "$log_archive/" + +for host in "${hosts[@]}" +do + year=`find "$log_archive/" -name $host | cut -d / -f 6 | sort | uniq` + + for y in $year; do + month=`find "$log_archive/$y/" -name $host | cut -d / -f 7 | sort | uniq` + cd "$archive/" + + for m in $month; do + all_the_logs=`find "weblogs/$y/$m" -name $host` + for i in $all_the_logs; do + # If we already have a tarball, check to see if the file is already a + # part of it + if [ -e "$data_archive/$host-$y-$m.tar" ] + then + # See if the file is already a part of the tarball + tar --list --file="$data_archive/$host-$y-$m.tar" $i + check_exists=`echo $?` + if [[ $check_exists -eq "1" ]] + then + tar --append --file="$host-$y-$m.tar" $i + fi + else + tar --append --file="$host-$y-$m.tar" $i + fi + done + + # If we did create a new tarball, move it + if [ -e "$host-$y-$m.tar" ] + then + mv "$host-$y-$m.tar" "$data_archive" + fi + + # Pack up the tarball and make it available online + cd "$data_archive" + bzip2 -kf1 "$host-$y-$m.tar" + mv "$host-$y-$m.tar.bz2" "$data_publish" + + done + + # And remove logs from the out/ directory + rm -rf "$logs/$y/" + + done + + # And report back + echo "`date` - Logs processed for $host" >> $record +done diff --git a/logimport.sh b/logimport.sh new file mode 100644 index 0000000..438e3dd --- /dev/null +++ b/logimport.sh @@ -0,0 +1,47 @@ +#!/bin/bash +# +# The sanitized logs are in /srv/webstats.torproject.org/home/webstats/out/, +# with the following format: year/month/day/$virtualhost-access.log +# +# Need to concat the logs in chronological order and import into +# awstats and webalizer. + +# Define a few variables +logtmp="/srv/webstats.torproject.org/logtmp" +logs="/srv/webstats.torproject.org/home/webstats/out" +record="/srv/webstats.torproject.org/logimport.log" + +# Define the hosts we have logs for +hosts=( + metrics +) + +# See if we actually have logs to process +if [ ! "$(ls -A $logs)" ]; +then + echo "`date` - No logs to process" >> $record + exit 1 +fi + +for host in "${hosts[@]}" +do + # Concat the logs in chronological order + mkdir "$logtmp" + cd "$logtmp" + find "$logs/" -name "$host.torproject.org-access.log" | sort | xargs -I {} cat {} > "$host.torproject.org-access.log" + + # Time to run the web log analysis tools + # + # Running awstats first + /srv/webstats.torproject.org/awstats/awstats.pl -config=$host.torproject.org -update + + # And then webalizer + webalizer -c "/srv/webstats.torproject.org/configs/webalizer.$host.torproject.org.conf" + + # Cleanup + cd "/srv/webstats.torproject.org" + rm -rf "$logtmp" + + # And report back + echo "`date` - Logs processed for $host" >> $record +done