[tor-commits] [stem/master] Parse proc connections by index

atagar at torproject.org atagar at torproject.org
Sun Oct 1 23:14:34 UTC 2017


commit 8da78a9fadc178e14183ba0a65b9593dcf43b7bb
Author: Damian Johnson <atagar at torproject.org>
Date:   Mon Sep 25 22:07:13 2017 -0700

    Parse proc connections by index
    
    Proc's /proc/net/tcp commonly has thousands of lines on even moderately busy
    relays. For me it takes about half a second to process, pegging the cpu during
    that time.
    
    After lots of experimenting finally found something to greatly speed this up.
    Parsing the proc content via index rather than spit makes a large difference.
    More delicate so this might require some adjustment to accomidate how other
    platforms format their proc.
    
    This is about a 30% improvement, but along with the earlier changes this in
    total accounts for a 5x speedup in proc conneciton resolution, going from
    0.38s to 0.07s for my relay's proc contents.
---
 docs/change_log.rst    |  1 +
 stem/util/proc.py      | 98 +++++++++++++++++++++++++++++++++++---------------
 test/unit/util/proc.py | 14 ++++----
 3 files changed, 79 insertions(+), 34 deletions(-)

diff --git a/docs/change_log.rst b/docs/change_log.rst
index 415a8bfe..16a16ad7 100644
--- a/docs/change_log.rst
+++ b/docs/change_log.rst
@@ -88,6 +88,7 @@ The following are only available within Stem's `git repository
   * Added round argument to :func:`~stem.util.str_tools.size_label`
   * Added :class:`~stem.util.test_tools.TimedTestRunner` and :func:`~stem.util.test_tools.test_runtimes`
   * Supporing pid arguments in :func:`~stem.util.system.is_running`
+  * Made connection resolution via proc about 5x faster
   * Normalized :func:`~stem.util.term.format` to return unicode
   * Don't load vim swap files as configurations
 
diff --git a/stem/util/proc.py b/stem/util/proc.py
index 6f00311b..70096a33 100644
--- a/stem/util/proc.py
+++ b/stem/util/proc.py
@@ -379,27 +379,70 @@ def connections(pid = None, user = None):
         continue  # ipv6 proc contents are optional
 
       protocol = proc_file_path[10:].rstrip('6')  # 'tcp' or 'udp'
-      is_ipv6 = proc_file_path.endswith('6')
+      is_tcp, is_udp, is_ipv6 = protocol == 'tcp', protocol == 'udp', proc_file_path.endswith('6')
+      title = ''
 
       try:
         with open(proc_file_path, 'rb') as proc_file:
-          proc_file.readline()  # skip the first line
+          title = proc_file.readline()
+
+          if 'local_address' in title:
+            laddr_start = title.index('local_address')
+            laddr_end = laddr_start + (8 if not is_ipv6 else 32)
+
+            lport_start = laddr_end + 1
+            lport_end = lport_start + 4
+          else:
+            raise IOError("title line missing 'local_address', %s" % title)
+
+          if 'rem_address' in title or 'remote_address' in title:
+            raddr_start = title.index('rem_address') if 'rem_address' in title else title.index('remote_address')
+            raddr_end = raddr_start + (8 if not is_ipv6 else 32)
+
+            rport_start = raddr_end + 1
+            rport_end = rport_start + 4
+          else:
+            raise IOError("title line missing 'remote_address', %s" % title)
+
+          if 'st' in title:
+            status_start = title.index('st')
+            status_end = status_start + 2
+          else:
+            raise IOError("title line missing 'st', %s" % title)
+
+          if 'retrnsmt' in title and 'uid' in title:
+            # unlike the above fields uid is right aligned
+            uid_start = title.index('retrnsmt') + 9
+            uid_end = title.index('uid') + 3
+          elif 'retrnsmt' not in title:
+            raise IOError("title line missing 'retrnsmt', %s" % title)
+          else:
+            raise IOError("title line missing 'uid', %s" % title)
+
+          if 'timeout' in title:
+            # inodes can lack a header, and are a dynamic size
+            inode_start = title.index('timeout') + 8
+          else:
+            raise IOError("title line missing 'timeout', %s" % title)
 
           for line in proc_file:
-            _, l_addr, f_addr, status, _, _, _, uid, _, inode = line.split()[:10]
-
-            if inodes and inode not in inodes:
+            if inodes and line[inode_start:].split(' ', 1)[0] not in inodes:
               continue
-            elif process_uid and uid != process_uid:
+            elif process_uid and line[uid_start:uid_end].strip() != process_uid:
               continue
-            elif protocol == 'tcp' and status != b'01':
+            elif is_tcp and line[status_start:status_end] != b'01':
               continue  # skip tcp connections that aren't yet established
-            elif protocol == 'udp' and (f_addr == '00000000:0000' or f_addr == '00000000000000000000000000000000:0000'):
+
+            l_addr = _unpack_addr(line[laddr_start:laddr_end])
+            l_port = int(line[lport_start:lport_end], 16)
+
+            r_addr = _unpack_addr(line[raddr_start:raddr_end])
+            r_port = int(line[rport_start:rport_end], 16)
+
+            if is_udp and (r_addr == '0.0.0.0' or r_addr == '0000:0000:0000:0000:0000:0000') and r_port == 0:
               continue  # skip udp connections with a blank destination
 
-            local_ip, local_port = _decode_proc_address_encoding(l_addr, is_ipv6)
-            foreign_ip, foreign_port = _decode_proc_address_encoding(f_addr, is_ipv6)
-            conn.append(stem.util.connection.Connection(local_ip, local_port, foreign_ip, foreign_port, protocol, is_ipv6))
+            conn.append(stem.util.connection.Connection(l_addr, l_port, r_addr, r_port, protocol, is_ipv6))
       except IOError as exc:
         raise IOError("unable to read '%s': %s" % (proc_file_path, exc))
       except Exception as exc:
@@ -450,7 +493,7 @@ def _inodes_for_sockets(pid):
   return inodes
 
 
-def _decode_proc_address_encoding(addr, is_ipv6):
+def _unpack_addr(addr):
   """
   Translates an address entry in the /proc/net/* contents to a human readable
   form (`reference <http://linuxdevcenter.com/pub/a/linux/2000/11/16/LinuxAdmin.html>`_,
@@ -458,24 +501,21 @@ def _decode_proc_address_encoding(addr, is_ipv6):
 
   ::
 
-    "0500000A:0016" -> ("10.0.0.5", 22)
-    "F804012A4A5190010000000002000000:01BB" -> ("2a01:4f8:190:514a::2", 443)
+    "0500000A" -> "10.0.0.5"
+    "F804012A4A5190010000000002000000" -> "2a01:4f8:190:514a::2"
 
   :param str addr: proc address entry to be decoded
-  :param bool is_ipv6: if we should treat the address as ipv6
 
-  :returns: **tuple** of the form **(addr, port)**, with addr as a string and port an int
+  :returns: **str** of the decoded address
   """
 
-  ip, port = addr.rsplit(b':', 1)
-  port = int(port, 16)  # the port is represented as a two-byte hexadecimal number
-
-  if ip not in ENCODED_ADDR:
-    if not is_ipv6:
-      ip_encoded = base64.b16decode(ip)[::-1] if IS_LITTLE_ENDIAN else base64.b16decode(ip)
-      ENCODED_ADDR[ip] = socket.inet_ntop(socket.AF_INET, ip_encoded)
+  if addr not in ENCODED_ADDR:
+    if len(addr) == 8:
+      # IPv4 address
+      decoded = base64.b16decode(addr)[::-1] if IS_LITTLE_ENDIAN else base64.b16decode(addr)
+      ENCODED_ADDR[addr] = socket.inet_ntop(socket.AF_INET, decoded)
     else:
-      ip_encoded = ip
+      # IPv6 address
 
       if IS_LITTLE_ENDIAN:
         # Group into eight characters, then invert in pairs...
@@ -485,14 +525,16 @@ def _decode_proc_address_encoding(addr, is_ipv6):
         inverted = []
 
         for i in range(4):
-          grouping = ip[8 * i:8 * (i + 1)]
+          grouping = addr[8 * i:8 * (i + 1)]
           inverted += [grouping[2 * i:2 * (i + 1)] for i in range(4)][::-1]
 
-        ip_encoded = b''.join(inverted)
+        encoded = b''.join(inverted)
+      else:
+        encoded = addr
 
-      ENCODED_ADDR[ip] = stem.util.connection.expand_ipv6_address(socket.inet_ntop(socket.AF_INET6, base64.b16decode(ip_encoded)))
+      ENCODED_ADDR[addr] = stem.util.connection.expand_ipv6_address(socket.inet_ntop(socket.AF_INET6, base64.b16decode(encoded)))
 
-  return (ENCODED_ADDR[ip], port)
+  return ENCODED_ADDR[addr]
 
 
 def _is_float(*value):
diff --git a/test/unit/util/proc.py b/test/unit/util/proc.py
index a172fcb4..4d9c8ffa 100644
--- a/test/unit/util/proc.py
+++ b/test/unit/util/proc.py
@@ -16,6 +16,8 @@ try:
 except ImportError:
   from mock import Mock, patch
 
+TITLE_LINE = 'sl  local_address rem_address   st tx_queue rx_queue tr tm->when retrnsmt  uid  timeout'
+
 TCP6_CONTENT = b"""\
   sl  local_address                         remote_address                        st tx_queue rx_queue tr tm->when retrnsmt   uid  timeout inode
    0: 00000000000000000000000000000000:1495 00000000000000000000000000000000:0000 0A 00000000:00000000 00:00000000 00000000   106        0 14347030 1 0000000000000000 100 0 0 10 0
@@ -210,8 +212,8 @@ class TestProc(unittest.TestCase):
       '/proc/%s/fd/4' % pid: 'pipe:[40404]',
     }[param]
 
-    tcp = b'\n 0: 11111111:1111 22222222:2222 01 44444444:44444444 55:55555555 66666666 1111 8 99999999'
-    udp = b'\n A: BBBBBBBB:BBBB CCCCCCCC:CCCC DD EEEEEEEE:EEEEEEEE FF:FFFFFFFF GGGGGGGG 1111 H IIIIIIII'
+    tcp = b'%s\n 0: 11111111:1111 22222222:2222 01 44444444:44444444 55:55555555 66666666 1111        8 99999999' % TITLE_LINE
+    udp = b'%s\n A: BBBBBBBB:BBBB CCCCCCCC:CCCC DD EEEEEEEE:EEEEEEEE FF:FFFFFFFF GGGGGGGG 1111        H IIIIIIII' % TITLE_LINE
 
     path_exists_mock.side_effect = lambda param: {
       '/proc/net/tcp': True,
@@ -258,9 +260,9 @@ class TestProc(unittest.TestCase):
     }[param]
 
     open_mock.side_effect = lambda param, mode: {
-      '/proc/net/tcp': io.BytesIO(b''),
+      '/proc/net/tcp': io.BytesIO(TITLE_LINE),
       '/proc/net/tcp6': io.BytesIO(TCP6_CONTENT),
-      '/proc/net/udp': io.BytesIO(b''),
+      '/proc/net/udp': io.BytesIO(TITLE_LINE),
     }[param]
 
     expected_results = [
@@ -286,9 +288,9 @@ class TestProc(unittest.TestCase):
     }[param]
 
     open_mock.side_effect = lambda param, mode: {
-      '/proc/net/tcp': io.BytesIO(b''),
+      '/proc/net/tcp': io.BytesIO(TITLE_LINE),
       '/proc/net/tcp6': io.BytesIO(TCP6_CONTENT),
-      '/proc/net/udp': io.BytesIO(b''),
+      '/proc/net/udp': io.BytesIO(TITLE_LINE),
     }[param]
 
     expected_results = [



More information about the tor-commits mailing list