commit d0f402c6f0220087320c58cecb25b53d2bd6a156 Author: Damian Johnson atagar@torproject.org Date: Mon May 14 09:43:03 2018 -0700
Resume parsing proc connections via split
While back I put quite a bit of elbow grease into speeding up connection resolution. To this end commit 8da78a9 squeezed out a 30% improvement by parsing proc contents using indices rather than split.
I was torn at the time since this made our code quite a bit more verbose and fragile, but all things being equal seemed worth the speed boost. However, in digging into connection resolution discrepancies Toralf spotted that when a relay has at least 10,000 connections the first column (sl) gets wider, throwing our parser off.
Proc is clearly intended to be parsed via delimiters, not static indices. As such largely reverting commit 8da78a9. --- stem/util/proc.py | 62 +++++++++++-------------------------------------- test/unit/util/proc.py | 63 +++++++++++++++++++++++++++++++++++++++++++------- 2 files changed, 68 insertions(+), 57 deletions(-)
diff --git a/stem/util/proc.py b/stem/util/proc.py index 6a316634..9969b59c 100644 --- a/stem/util/proc.py +++ b/stem/util/proc.py @@ -379,65 +379,29 @@ def connections(pid = None, user = None): continue # ipv6 proc contents are optional
protocol = proc_file_path[10:].rstrip('6') # 'tcp' or 'udp' - is_tcp, is_ipv6 = protocol == 'tcp', proc_file_path.endswith('6') - title = '' + is_ipv6 = proc_file_path.endswith('6')
try: with open(proc_file_path, 'rb') as proc_file: - title = proc_file.readline() - - if b'local_address' in title: - laddr_start = title.index(b'local_address') - laddr_end = laddr_start + (8 if not is_ipv6 else 32) - - lport_start = laddr_end + 1 - lport_end = lport_start + 4 - else: - raise IOError("title line missing 'local_address', %s" % title) - - if b'rem_address' in title or b'remote_address' in title: - raddr_start = title.index(b'rem_address') if b'rem_address' in title else title.index(b'remote_address') - raddr_end = raddr_start + (8 if not is_ipv6 else 32) - - rport_start = raddr_end + 1 - rport_end = rport_start + 4 - else: - raise IOError("title line missing 'remote_address', %s" % title) - - if b'st' in title: - status_start = title.index(b'st') - status_end = status_start + 2 - else: - raise IOError("title line missing 'st', %s" % title) - - if b'retrnsmt' in title and b'uid' in title: - # unlike the above fields uid is right aligned - uid_start = title.index(b'retrnsmt') + 9 - uid_end = title.index(b'uid') + 3 - elif b'retrnsmt' not in title: - raise IOError("title line missing 'retrnsmt', %s" % title) - else: - raise IOError("title line missing 'uid', %s" % title) - - if b'timeout' in title: - # inodes can lack a header, and are a dynamic size - inode_start = title.index(b'timeout') + 8 - else: - raise IOError("title line missing 'timeout', %s" % title) + proc_file.readline() # skip the first line
for line in proc_file: - if inodes and line[inode_start:].split(b' ', 1)[0] not in inodes: + _, l_dst, r_dst, status, _, _, _, uid, _, inode = line.split()[:10] + + if inodes and inode not in inodes: continue - elif process_uid and line[uid_start:uid_end].strip() != process_uid: + elif process_uid and uid != process_uid: continue - elif is_tcp and line[status_start:status_end] != b'01': + elif protocol == 'tcp' and status != b'01': continue # skip tcp connections that aren't yet established
- l_addr = _unpack_addr(line[laddr_start:laddr_end]) - l_port = int(line[lport_start:lport_end], 16) + div = l_dst.find(':') + l_addr = _unpack_addr(l_dst[:div]) + l_port = int(l_dst[div + 1:], 16)
- r_addr = _unpack_addr(line[raddr_start:raddr_end]) - r_port = int(line[rport_start:rport_end], 16) + div = r_dst.find(':') + r_addr = _unpack_addr(r_dst[:div]) + r_port = int(r_dst[div + 1:], 16)
if r_addr == '0.0.0.0' or r_addr == '0000:0000:0000:0000:0000:0000': continue # no address diff --git a/test/unit/util/proc.py b/test/unit/util/proc.py index 562d618a..7ee4ecda 100644 --- a/test/unit/util/proc.py +++ b/test/unit/util/proc.py @@ -33,6 +33,13 @@ TCP6_CONTENT = b"""\ 10: 0000000000000000FFFF00004B9E0905:1466 0000000000000000FFFF00002186364E:951E 01 00000000:00000000 02:00090E70 00000000 106 0 41512577 2 0000000000000000 26 4 31 10 -1 """
+SL_WIDTH_CHANGE = b"""\ + sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode + 0: 11111111:1111 22222233:2244 01 44444444:44444444 55:55555555 66666666 1111 8 99999999 + 1: 22222222:2222 33333344:3355 01 44444444:44444444 55:55555555 66666666 1111 8 88888888 +12675: 33333333:3333 44444455:4466 01 44444444:44444444 55:55555555 66666666 1111 8 77777777 +""".rstrip() +
class TestProc(unittest.TestCase): @patch('stem.util.proc._get_line') @@ -216,9 +223,7 @@ class TestProc(unittest.TestCase): udp = TITLE_LINE + b'\n A: BBBBBBBB:BBBB CCCCCCCC:CCCC DD EEEEEEEE:EEEEEEEE FF:FFFFFFFF GGGGGGGG 1111 H IIIIIIII'
path_exists_mock.side_effect = lambda param: { - '/proc/net/tcp': True, '/proc/net/tcp6': False, - '/proc/net/udp': True, '/proc/net/udp6': False }[param]
@@ -227,12 +232,12 @@ class TestProc(unittest.TestCase): '/proc/net/udp': io.BytesIO(udp) }[param]
- expected_results = [ + expected = [ Connection('17.17.17.17', 4369, '34.34.34.34', 8738, 'tcp', False), Connection('187.187.187.187', 48059, '204.204.204.204', 52428, 'udp', False), ]
- self.assertEqual(expected_results, proc.connections(pid)) + self.assertEqual(expected, proc.connections(pid))
@patch('os.listdir') @patch('os.path.exists') @@ -265,12 +270,12 @@ class TestProc(unittest.TestCase): '/proc/net/udp': io.BytesIO(TITLE_LINE), }[param]
- expected_results = [ + expected = [ Connection('2a01:04f8:0190:514a:0000:0000:0000:0002', 443, '2001:0638:a000:4140:0000:0000:ffff:0189', 40435, 'tcp', True), Connection('2a01:04f8:0190:514a:0000:0000:0000:0002', 443, '2001:0858:0002:0002:aabb:0000:563b:1526', 44469, 'tcp', True), ]
- self.assertEqual(expected_results, proc.connections(pid = pid)) + self.assertEqual(expected, proc.connections(pid = pid))
@patch('os.path.exists') @patch('pwd.getpwnam') @@ -293,10 +298,52 @@ class TestProc(unittest.TestCase): '/proc/net/udp': io.BytesIO(TITLE_LINE), }[param]
- expected_results = [ + expected = [ Connection('0000:0000:0000:0000:0000:ffff:0509:9e4b', 5222, '0000:0000:0000:0000:0000:ffff:4e36:8621', 38330, 'tcp', True), Connection('2a01:04f8:0190:514a:0000:0000:0000:0002', 5269, '2001:06f8:126f:0011:0000:0000:0000:0026', 50594, 'tcp', True), Connection('0000:0000:0000:0000:0000:ffff:0509:9e4b', 5222, '0000:0000:0000:0000:0000:ffff:4e36:8621', 38174, 'tcp', True), ]
- self.assertEqual(expected_results, proc.connections(user = 'me')) + self.assertEqual(expected, proc.connections(user = 'me')) + + @patch('os.listdir') + @patch('os.path.exists') + @patch('os.readlink') + @patch('stem.util.proc.open', create = True) + def test_high_connection_count(self, open_mock, readlink_mock, path_exists_mock, listdir_mock): + """ + When we have over ten thousand connections the 'SL' column's width changes. + Checking that we account for this. + """ + + pid = 1111 + + listdir_mock.side_effect = lambda param: { + '/proc/%s/fd' % pid: ['1', '2', '3', '4'], + }[param] + + readlink_mock.side_effect = lambda param: { + '/proc/%s/fd/1' % pid: 'socket:[99999999]', + '/proc/%s/fd/2' % pid: 'socket:[88888888]', + '/proc/%s/fd/3' % pid: 'socket:[77777777]', + '/proc/%s/fd/4' % pid: 'pipe:[30303]', + '/proc/%s/fd/5' % pid: 'pipe:[40404]', + }[param] + + path_exists_mock.side_effect = lambda param: { + '/proc/net/tcp6': False, + '/proc/net/udp6': False + }[param] + + open_mock.side_effect = lambda param, mode: { + '/proc/net/tcp': io.BytesIO(SL_WIDTH_CHANGE), + '/proc/net/udp': io.BytesIO(TITLE_LINE) + }[param] + + expected = [ + Connection('17.17.17.17', 4369, '51.34.34.34', 8772, 'tcp', False), + Connection('34.34.34.34', 8738, '68.51.51.51', 13141, 'tcp', False), + Connection('51.51.51.51', 13107, '85.68.68.68', 17510, 'tcp', False), + ] + + self.assertEqual(expected, proc.connections(pid))
tor-commits@lists.torproject.org