[tor-commits] [bridgedb/develop] Add benchmark tests for b.p.descriptors.deduplicate().

29 Apr 2015

commit c9ad39af644d5d4ea7165203b4719ee25f187b29
Author: Isis Lovecruft <isis@torproject.org>
Date:   Sat Apr 18 00:03:23 2015 +0000

    Add benchmark tests for b.p.descriptors.deduplicate().
---
 lib/bridgedb/test/test_parse_descriptors.py |   77 +++++++++++++++++++++++++++
 1 file changed, 77 insertions(+)

diff --git a/lib/bridgedb/test/test_parse_descriptors.py b/lib/bridgedb/test/test_parse_descriptors.py
index aa3b662..5ef3f6a 100644
--- a/lib/bridgedb/test/test_parse_descriptors.py
+++ b/lib/bridgedb/test/test_parse_descriptors.py
@@ -13,11 +13,13 @@ from __future__ import print_function
 
 import datetime
 import glob
+import hashlib
 import io
 import os
 import textwrap
 
 from twisted.trial import unittest
+from twisted.trial.unittest import SkipTest
 
 HAS_STEM = False
 
@@ -31,6 +33,8 @@ except (ImportError, NameError), error:
 else:
     HAS_STEM = True
 
+from bridgedb.test.util import Benchmarker
+
 
 BRIDGE_NETWORKSTATUS_0 = '''\
 r MiserLandfalls 4IsyTSCtChPhFPAnq5rD8yymlqA /GMC4lz8RXT/62v6kZNdmzSmopk 2014-11-04 06:23:22 2.215.61.223 4056 0
@@ -441,6 +445,79 @@ class ParseDescriptorsTests(unittest.TestCase):
             datetime.datetime.strptime("2014-12-04 03:10:25", "%Y-%m-%d %H:%M:%S"),
             "We should have the newest available descriptor for this router.")
 
+    def createDuplicatesForBenchmark(self, b=1, n=1200):
+        """Create a bunch of duplicate extrainfos for benchmark tests.
+
+        :param int b: The number of fake "bridges" to create **n** duplicate
+            descriptors for.
+        :param int n: The number of duplicate descriptors for each bridge
+            **b**.
+        """
+        descFiles = []
+
+        # The timestamp and fingerprint from BRIDGE_EXTRA_INFO_DESCRIPTOR:
+        timestamp  = "2014-11-04 06:23:22"
+        Y, M, rest = timestamp.split("-")
+        fpr        = "E08B324D20AD0A13E114F027AB9AC3F32CA696A0"
+        newerFpr   = "E08B324D20AD0A13E114F027AB9AC3F32CA696A0"
+
+        total = 0
+        needed = b * n
+        for x in range(b):
+            if total >= needed:
+                break
+            # Re-digest the fingerprint to create a "new" bridge
+            newerFpr = hashlib.sha1(newerFpr).hexdigest().upper()
+            # Generate n extrainfos with different timestamps:
+            count = 0
+            for year in range(1, ((n + 1)/ 12) + 2):  # Start from the next year
+                if count >= n:
+                    break
+                for month in range(1, 13):
+                    if count < n:
+                        newerTimestamp = "-".join([str(int(Y) + year), "%02d" % month, rest])
+                        newerDuplicate = BRIDGE_EXTRA_INFO_DESCRIPTOR[:].replace(
+                            fpr, newerFpr).replace(
+                                timestamp, newerTimestamp)
+                        descFiles.append(io.BytesIO(newerDuplicate))
+                        count += 1
+                        total += 1
+                    else:
+                        break
+
+        print("Deduplicating %5d total descriptors (%4d per bridge; %3d bridges):"
+              % (len(descFiles), n, b), end='\t')
+        return descFiles
+
+    def test_parse_descriptors_parseExtraInfoFiles_benchmark_100_bridges(self):
+        """Benchmark test for ``b.p.descriptors.parseExtraInfoFiles``."""
+        print()
+        for i in range(1, 11):
+            descFiles = self.createDuplicatesForBenchmark(b=100, n=i)
+            with Benchmarker():
+                routers = descriptors.parseExtraInfoFiles(*descFiles)
+
+    def test_parse_descriptors_parseExtraInfoFiles_benchmark_1000_bridges(self):
+        """Benchmark test for ``b.p.descriptors.parseExtraInfoFiles``."""
+        print()
+        for i in range(1, 11):
+            descFiles = self.createDuplicatesForBenchmark(b=1000, n=i)
+            with Benchmarker():
+                routers = descriptors.parseExtraInfoFiles(*descFiles)
+
+    def test_parse_descriptors_parseExtraInfoFiles_benchmark_10000_bridges(self):
+        """Benchmark test for ``b.p.descriptors.parseExtraInfoFiles``.
+        The algorithm should grow linearly in the number of duplicates.
+        """
+        raise SkipTest(("This test takes ~7 minutes to complete. "
+                        "Run it on your own free time."))
+
+        print()
+        for i in range(1, 11):
+            descFiles = self.createDuplicatesForBenchmark(b=10000, n=i)
+            with Benchmarker():
+                routers = descriptors.parseExtraInfoFiles(*descFiles)
+
     def test_parse_descriptors_parseExtraInfoFiles_no_validate(self):
         """Test for ``b.p.descriptors.parseExtraInfoFiles`` with
         descriptor validation disabled.

    

[tor-commits] [bridgedb/develop] Add benchmark tests for b.p.descriptors.deduplicate().

isis＠torproject.org