[or-cvs] r15158: Add a new component for extracting lots of different statist (in projects: . dir-stats dir-stats/trunk)

nickm at seul.org nickm at seul.org
Thu Jun 12 02:40:22 UTC 2008


Author: nickm
Date: 2008-06-11 22:40:22 -0400 (Wed, 11 Jun 2008)
New Revision: 15158

Added:
   projects/dir-stats/
   projects/dir-stats/branches/
   projects/dir-stats/tags/
   projects/dir-stats/trunk/
   projects/dir-stats/trunk/LICENSE
   projects/dir-stats/trunk/routerstats.py
Log:
Add a new component for extracting lots of different statistics from a
directory at once.


Added: projects/dir-stats/trunk/LICENSE
===================================================================
--- projects/dir-stats/trunk/LICENSE	                        (rev 0)
+++ projects/dir-stats/trunk/LICENSE	2008-06-12 02:40:22 UTC (rev 15158)
@@ -0,0 +1,31 @@
+routerstats.py is distributed under this license:
+
+Copyright (c) 2008, The Tor Project, Inc.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+    * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+
+    * Neither the names of the copyright owners nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

Added: projects/dir-stats/trunk/routerstats.py
===================================================================
--- projects/dir-stats/trunk/routerstats.py	                        (rev 0)
+++ projects/dir-stats/trunk/routerstats.py	2008-06-12 02:40:22 UTC (rev 15158)
@@ -0,0 +1,333 @@
+#!/usr/bin/python
+# routerstats.py
+# Copyright 2008 The Tor Project, Inc
+# For license information, see LICENSE
+# For usage information, run routerstats.py --help
+
+import binascii
+import re
+import time, calendar
+import sys
+
+def parseTime(t):
+    tm = time.strptime(t, "%Y-%m-%d %H:%M:%S")
+    return calendar.timegm(tm)
+
+def parseHistory(line):
+    items = line.split()
+    when = "%s %s"%(items[0],items[1])
+    secPerChunk = int(items[2][1:])
+    if len(items) >= 5:
+        all = [ int(i) for i in items[4].split(",") ]
+    else:
+        all = [ ]
+
+    if len(all) == 0:
+        return NIL_HIST
+    if len(all) > 1:
+        all = all[:-1]
+
+    bwLatest = all[-1] // secPerChunk
+    bwMin = min(all)
+    bwMax = max(all)
+    bwMedian = sorted(all)[len(all)//2]
+    return bwMin, bwMedian, bwMax, bwLatest
+
+class StatSummary:
+    def __init__(self, items):
+        s = list(i for i in items if i is not None)
+        n = self.n = len(s)
+        if n == 0:
+            return
+        s.sort()
+        self.median = s[n//2]
+        self.p75 = s[int(n*.75)]
+        self.p90 = s[int(n*.9)]
+        self.p95 = s[int(n*.95)]
+        self.p99 = s[int(n*.99)]
+        self.total = sum(s)
+        self.mean = self.total/float(n)
+        self.minimum = s[0]
+        self.maximum = s[-1]
+
+    def __str__(self):
+        if not self.n:
+            return "N=0"
+        return ("N=%d\t"
+                "mean=%d\t"
+                "total=%d\t"
+                "min=%d\t50pct=%d\t75pct=%d\t"
+                "90pct=%d\t95pct=%d\t99pct=%d\tmax=%d")%(
+            self.n, self.mean, self.total,
+            self.minimum,self.median,self.p75,
+            self.p90,self.p95,self.p99,self.maximum)
+
+NIL_HIST = (None, None, None, None)
+
+class Router(object):
+    # We're going to make a lot of these.  __slots__ tells python that it
+    # can be real efficient about storage.
+    __slots__ = [ 'orport', 'ident', 'published', 'bw', 'uptime',
+                  'listed', 'running', 'stable', 'guard', 'exit',
+                  'ip', 'readHist', 'writeHist' ]
+
+    def __init__(self, orport, ip, ident, published, bw, uptime):
+        self.orport = orport
+        self.ip = ip
+        self.ident = ident
+        self.published = published
+        self.bw = bw
+        self.listed = False
+        self.running = False
+        self.stable = None
+        self.guard = None
+        self.exit = None
+        self.uptime = False
+        self.readHist = NIL_HIST
+        self.writeHist = NIL_HIST
+
+    def setFlags(self, flags):
+        self.listed = True
+        self.running = ('Running' in flags)
+        self.stable = ('Stable' in flags)
+        self.guard = ('Guard' in flags)
+        self.exit = ('Exit' in flags)
+
+    def setHistory(self, readData, writeData):
+        if readData:
+            self.readHist = parseHistory(readData)
+        if writeData:
+            self.writeHist = parseHistory(writeData)
+
+class Stats:
+    def __init__(self):
+        self.routers = { }
+
+    def parseDescFile(self, fname):
+        f = open(fname, 'r')
+        purpose = ip = ident = published = bw = uptime = None
+        bwRead = bwWrite = None
+
+        for line in f:
+            line = line.strip()
+            if line.startswith("opt "): line = line[4:]
+            if line.startswith("@purpose "):
+                items = line.split()
+                purpose = items[1]
+
+            if line.startswith("router "):
+                inRouter = True
+                items = line.split()
+                nickname = items[1]
+                ip = items[2]
+                orport = int(items[3])
+                dirport = int(items[5])
+            elif line.startswith("fingerprint "):
+                ident = line[12:].replace(" ","")
+            elif line.startswith("published "):
+                published = line[10:]
+            elif line.startswith("bandwidth "):
+                items = line.split()
+                bw = int(items[2])
+            elif line.startswith("uptime "):
+                items = line.split()
+                uptime = int(items[1])
+            elif line.startswith("read-history "):
+                bwRead = line[13:]
+            elif line.startswith("write-history "):
+                bwWrite = line[14:]
+            elif line.startswith("router-signature"):
+                self.addRouter(purpose, orport, ip, ident, published, bw, uptime, bwRead, bwWrite)
+                orport = purpose = ip = ident = published = bw = uptime = None
+                bwRead = bwWrite = None
+
+        f.close()
+
+    def parseEIFile(self, fname):
+        f = open(fname, 'r')
+        nickname = ident = published = bwRead = bwWrite = None
+        for line in f:
+            line = line.strip()
+            if line.startswith("opt "): line = line[4:]
+            if line.startswith("extra-info "):
+                items = line.split()
+                nickname = items[1]
+                ident = binascii.a2b_hex(items[2])
+            elif line.startswith("published "):
+                published = parseTime(line[10:])
+            elif line.startswith("read-history "):
+                bwRead = line[13:]
+            elif line.startswith("write-history "):
+                bwWrite = line[14:]
+            elif line.startswith("router-signature"):
+                r = self.routers.get(ident)
+                if r and r.published == published and bwRead is not None:
+                    r.setHistory(bwRead, bwWrite)
+                nickname = ident = published = bwRead = bwWrite = None
+
+        f.close()
+
+    def parseConsensus(self, fname):
+        f = open(fname, 'r')
+        ident = flags = None
+        for line in f:
+            if line.startswith("r "):
+                ident = binascii.a2b_base64(line.split()[2]+"=")
+            elif line.startswith("s "):
+                flags = line[2:].strip().split()
+                r = self.routers.get(ident)
+                if r:
+                    r.setFlags(flags)
+                ident = flags = None
+
+        f.close()
+
+    def addRouter(self, purpose, orport, ip, ident, published, bw, uptime, bwRead, bwWrite):
+        if purpose and purpose != "general":
+            return
+        assert uptime is not None
+        assert orport is not None
+        assert ip is not None
+        assert ident is not None
+
+        ident = binascii.a2b_hex(ident)
+        published = parseTime(published)
+        assert len(ident) == 20
+        if ident not in self.routers or published >= self.routers[ident].published:
+            router = Router(orport, ip, ident, published, bw, uptime)
+            self.routers[ident] = router
+
+            router.setHistory(bwRead, bwWrite)
+
+    def getStats(self, statFn, predFn, mode="summary"):
+        if mode == 'summary':
+            return StatSummary(statFn(r) for r in self.routers.values() if predFn(r))
+        else:
+            assert mode == 'raw'
+            return [ statFn(r) for r in self.routers.values() if predFn(r) ]
+
+
+def _portFactory(portStr):
+    p = int(portStr)
+    return lambda r: r.orport == p
+
+##def _versionFactory(versionStr):
+
+PRED_FACTORIES = {
+    "port" : _portFactory,
+##    "version" : _versionFactory,
+}
+
+PREDICATES = {
+    "running" : lambda r: r.running,
+    "listed" : lambda r: r.listed,
+    "stable" : lambda r: r.stable,
+    "guard" : lambda r: r.guard,
+    "exit" : lambda r: r.exit,
+    "dir" : lambda r: r.dirport not in (0,None),
+    "all": lambda r: True,
+    }
+
+STATS = {
+    "capacity" : lambda r: r.bw,
+    "reading" : lambda r: r.readHist[3],
+    "writing" : lambda r: r.writeHist[3],
+}
+
+def usage(code=0):
+    sys.stderr.write("""%s [-p predicate]... [-s stat]...[--raw]...<-d datadir>
+  Recognized predicates are: %s %s
+  Predicates can be joined with commas
+  Recognized stats are: %s
+    bandwidth -> declared capacity
+    reading,writing -> most recent actual bytes-per-sec over last 15 minutes
+""" % (sys.argv[0], " ".join(PREDICATES.keys()),
+       " ".join("%s-XXX"%p for p in PRED_FACTORIES.keys()),
+       " ".join(STATS.keys())))
+    sys.exit(code)
+
+
+if __name__ == '__main__':
+    import os, getopt
+
+    stats = [ ]
+    predicates = [ ]
+    datadir = None
+    mode = "summary"
+    try:
+        opts, args = getopt.getopt(sys.argv[1:], "p:s:d:h", ["help","raw"])
+    except getopt.GetoptError, e:
+        print e
+        usage()
+    for k,v in opts:
+        if k in ('-h', '--help'):
+            usage(0)
+        elif k == '-p':
+            subparts = []
+            for part in re.split(r'[:,]', v):
+                if "-" in part:
+                    factname, factarg = part.split("-", 1)
+                    try:
+                        factory = PRED_FACTORIES[factname]
+                    except KeyError:
+                        print "Unrecognized predicate: %r"%part
+                    subparts.append( factory(factarg) )
+                else:
+                    try:
+                        subparts.append( PREDICATES[part] )
+                    except KeyError:
+                        print "Unrecognized predicate: %r"%part
+                        usage()
+            if len(subparts) == 1:
+                predicates.append((v, subparts[0]))
+            else:
+                def fn(r):
+                    for p in subparts:
+                        if not p(r):
+                            return False
+                    return True
+                predicates.append((v, fn))
+        elif k == '-s':
+            try:
+                stats.append( (v, STATS[v]) )
+            except KeyError:
+                print "Unrecognized stat: %r"%v
+                usage()
+        elif k == '-d':
+            datadir = v
+        elif k == '--raw':
+            mode="raw"
+        else:
+            raise Exception(k)
+    if not datadir:
+        sys.stderr.write("No datadir given. defaulting to ~/.tor\n")
+        datadir = os.path.expanduser("~/.tor")
+    if not os.path.exists(datadir):
+        sys.stderr.write("No such path: %r"%datadir)
+    if not predicates:
+        predicates.append( ("all", PREDICATES["all"]) )
+    if not stats:
+        stats.append( ("bandwidth", STATS["capacity"]) )
+
+    s = Stats()
+    for new in "", ".new":
+        fname = os.path.join(datadir, "cached-descriptors"+new)
+        if os.path.exists(fname):
+            #print "Parsing %s"%fname
+            s.parseDescFile(fname)
+    for new in "", ".new":
+        fname = os.path.join(datadir, "cached-extrainfo"+new)
+        if os.path.exists(fname):
+            #print "Parsing %s"%fname
+            s.parseEIFile(fname)
+    fname = os.path.join(datadir, "cached-consensus")
+    if os.path.exists(fname):
+        #print "Parsing %s"%fname
+        s.parseConsensus(fname)
+
+    print "%s routers parsed"%len(s.routers)
+    for predName, pred in predicates:
+        for statName, stat in stats:
+            print "%s: %s: %s" % (predName, statName,
+                                  s.getStats(statFn=stat, predFn=pred,
+                                             mode=mode))


Property changes on: projects/dir-stats/trunk/routerstats.py
___________________________________________________________________
Name: svn:executable
   + 



More information about the tor-commits mailing list