[tor-commits] [pytorctl/master] Option to get get_network_status as an iterator

mikeperry at torproject.org mikeperry at torproject.org
Fri Jun 17 23:32:02 UTC 2011


commit 45d7753b7c947b141210017d1b987673c51d97c5
Author: Damian Johnson <atagar at torproject.org>
Date:   Wed Jun 15 19:27:12 2011 -0700

    Option to get get_network_status as an iterator
    
    Setting the 'getIterator' argument drops the memory usage of calling
    get_network_status by 71% (from 3.5 MB to 1 MB). This is still higher than what
    I was expecting from a generator, though certainly much better.
    
    Unfortunately this didn't have an impact on the ConsensusTracker. The memory
    usage from its constructor dwarfs anything else I've looked at (18.8 MB) and
    didn't drop like I'd expect when consensus_only was false. :(
---
 TorCtl.py |   54 +++++++++++++++++++++++++++++++++++-------------------
 1 files changed, 35 insertions(+), 19 deletions(-)

diff --git a/TorCtl.py b/TorCtl.py
index 44a612b..8680ee6 100755
--- a/TorCtl.py
+++ b/TorCtl.py
@@ -1040,10 +1040,21 @@ class Connection:
        TorCtl.NetworkStatus instances."""
     return parse_ns_body(self.sendAndRecv("GETINFO dir/status-vote/current/consensus\r\n")[0][2])
 
-  def get_network_status(self, who="all"):
+  def get_network_status(self, who="all", getIterator=False):
     """Get the entire network status list. Returns a list of
-       TorCtl.NetworkStatus instances."""
-    return parse_ns_body(self.sendAndRecv("GETINFO ns/"+who+"\r\n")[0][2])
+       TorCtl.NetworkStatus instances.
+
+       Be aware that by default this reads the whole consensus into memory at
+       once which can be fairly sizable (as of writing 3.5 MB), and even if
+       freed it may remain allocated to the interpretor:
+       http://effbot.org/pyfaq/why-doesnt-python-release-the-memory-when-i-delete-a-large-object.htm
+
+       To avoid this use the iterator instead.
+      """
+
+    nsData = self.sendAndRecv("GETINFO ns/"+who+"\r\n")[0][2]
+    if getIterator: return ns_body_iter(nsData)
+    else: return parse_ns_body(nsData)
 
   def get_address_mappings(self, type="all"):
     # TODO: Also parse errors and GMTExpiry
@@ -1232,21 +1243,26 @@ class Connection:
 def parse_ns_body(data):
   """Parse the body of an NS event or command into a list of
      NetworkStatus instances"""
-  if not data: return []
-  nsgroups = re.compile(r"^r ", re.M).split(data)
-  nsgroups.pop(0)
-  nslist = []
-  for nsline in nsgroups:
-    m = re.search(r"^s((?:[ ]\S*)+)", nsline, re.M)
-    flags = m.groups()
-    flags = flags[0].strip().split(" ")
-    m = re.match(r"(\S+)\s(\S+)\s(\S+)\s(\S+\s\S+)\s(\S+)\s(\d+)\s(\d+)", nsline)    
-    w = re.search(r"^w Bandwidth=(\d+)", nsline, re.M)
-    if w:
-      nslist.append(NetworkStatus(*(m.groups()+(flags,)+(int(w.group(1))*1000,))))
-    else:
-      nslist.append(NetworkStatus(*(m.groups() + (flags,))))
-  return nslist
+  return list(ns_body_iter(data))
+
+def ns_body_iter(data):
+  """Generator for NetworkStatus instances of an NS event"""
+  if data:
+    nsgroups = re.compile(r"^r ", re.M).split(data)
+    nsgroups.pop(0)
+
+    while nsgroups:
+      nsline = nsgroups.pop(0)
+      m = re.search(r"^s((?:[ ]\S*)+)", nsline, re.M)
+      flags = m.groups()
+      flags = flags[0].strip().split(" ")
+      m = re.match(r"(\S+)\s(\S+)\s(\S+)\s(\S+\s\S+)\s(\S+)\s(\d+)\s(\d+)", nsline)
+      w = re.search(r"^w Bandwidth=(\d+)", nsline, re.M)
+
+      if w:
+        yield NetworkStatus(*(m.groups()+(flags,)+(int(w.group(1))*1000,)))
+      else:
+        yield NetworkStatus(*(m.groups() + (flags,)))
 
 class EventSink:
   def heartbeat_event(self, event): pass
@@ -1687,7 +1703,7 @@ class ConsensusTracker(EventHandler):
     if self.consensus_only:
       self._update_consensus(self.c.get_consensus())
     else:
-      self._update_consensus(self.c.get_network_status())
+      self._update_consensus(self.c.get_network_status(getIterator=True))
     self._read_routers(self.ns_map.values())
 
   def new_consensus_event(self, n):





More information about the tor-commits mailing list