commit 8736a7ee133a8eecc7f193b52f23c3ed951271ee Author: Damian Johnson atagar@torproject.org Date: Tue Mar 24 09:26:41 2015 -0700
tail() function for reading the last lines from a file
Tor log files can be in the order of gigabytes so we often want a tail helper for reading them. Happily StackOverflow had some great answers...
https://stackoverflow.com/questions/136168/get-last-n-lines-of-a-file-with-p...
Adopting a slight tweak of S.Lott's answer with papercrane's fix. Actually, this performs even better than shelling out to tail. Tad surprised. :P
% cat scrap.py import time
import stem.util.system
print "Reading the whole file:"
for i in range(3): start_time = time.time()
with open('/tmp/long_file') as long_file: print long_file.readlines()[-3:]
print 'took %s' % (time.time() - start_time)
print "\nShelling out to tail:"
for i in range(3): start_time = time.time() print stem.util.system.call('tail -n 3 /tmp/long_file') print 'took %s' % (time.time() - start_time)
print "\nCalling our tail() function:"
for i in range(3): start_time = time.time() print stem.util.system.tail('/tmp/long_file', 3) print 'took %s' % (time.time() - start_time)
--------------------------------------------------------------------------------
% python scrap.py Reading the whole file: [' or the dense London fog\n', "but I'm with people you meet\n", ' both the wicked and sweet.\n'] took 0.546777009964 [' or the dense London fog\n', "but I'm with people you meet\n", ' both the wicked and sweet.\n'] took 0.518195152283 [' or the dense London fog\n', "but I'm with people you meet\n", ' both the wicked and sweet.\n'] took 0.519068956375
Shelling out to tail: [u'or the dense London fog', u"but I'm with people you meet", u' both the wicked and sweet.'] took 0.00712585449219 [u'or the dense London fog', u"but I'm with people you meet", u' both the wicked and sweet.'] took 0.00904107093811 [u'or the dense London fog', u"but I'm with people you meet", u' both the wicked and sweet.'] took 0.00874090194702
Calling our tail() function: [' or the dense London fog', "but I'm with people you meet", ' both the wicked and sweet.'] took 0.000432968139648 [' or the dense London fog', "but I'm with people you meet", ' both the wicked and sweet.'] took 0.000234842300415 [' or the dense London fog', "but I'm with people you meet", ' both the wicked and sweet.'] took 0.000171184539795 --- stem/util/system.py | 50 +++++++++++++++++++++++++++++++++++++++++++++- test/unit/util/system.py | 20 +++++++++++++++++++ test/unit/util/text_file | 14 +++++++++++++ 3 files changed, 83 insertions(+), 1 deletion(-)
diff --git a/stem/util/system.py b/stem/util/system.py index 2c15d23..f2d1bc8 100644 --- a/stem/util/system.py +++ b/stem/util/system.py @@ -30,6 +30,7 @@ best-effort, providing **None** if the lookup fails. cwd - provides the current working directory for a given process user - provides the user a process is running under start_time - provides the unix timestamp when the process started + tail - provides lines from the end of a file bsd_jail_id - provides the BSD jail id a given process is running within bsd_jail_path - provides the path of the given BSD jail
@@ -37,7 +38,6 @@ best-effort, providing **None** if the lookup fails. expand_path - expands relative paths and ~ entries files_with_suffix - provides files with the given suffix
- get_process_name - provides our process' name set_process_name - changes our process' name """ @@ -49,6 +49,7 @@ import os import platform import re import subprocess +import sys import tarfile import time
@@ -88,6 +89,8 @@ GET_CWD_LSOF = 'lsof -a -p %s -d cwd -Fn' GET_BSD_JAIL_ID_PS = 'ps -p %s -o jid' GET_BSD_JAIL_PATH = 'jls -j %s'
+BLOCK_SIZE = 1024 + # flag for setting the process name, found in '/usr/include/linux/prctl.h'
PR_SET_NAME = 15 @@ -754,6 +757,51 @@ def start_time(pid): return None
+def tail(target, lines = None): + """ + Provides the last lines from a file, similar to 'tail -n 50 /tmp/my_log'. + + :param str,file target: path or file object to read from + :param int lines: number of lines to read + + :returns: **list** of lines the file ends with + """ + + if isinstance(target, str): + with open(target) as target_file: + return tail(target_file, lines) + + if lines is None: + lines = sys.maxint + + # based on snippet from... + # https://stackoverflow.com/questions/136168/get-last-n-lines-of-a-file-with-p... + + target.seek(0, 2) # go to the end of the file + block_end_byte = target.tell() + lines_left = lines + block_number = -1 + blocks = [] # blocks of size BLOCK_SIZE, in reverse order + + while lines_left > 0 and block_end_byte > 0: + if (block_end_byte - BLOCK_SIZE > 0): + # read the last block we haven't yet read + target.seek(block_number * BLOCK_SIZE, 2) + blocks.insert(0, target.read(BLOCK_SIZE)) + else: + # reached the start of the file, just read what's left + target.seek(0, 0) + blocks.insert(0, target.read(block_end_byte)) + + lines_found = blocks[-1].count('\n') + lines_left -= lines_found + block_end_byte -= BLOCK_SIZE + block_number -= 1 + + text = ''.join(blocks) + return text.splitlines()[-lines:] + + def bsd_jail_id(pid): """ Gets the jail id for a process. These seem to only exist for FreeBSD (this diff --git a/test/unit/util/system.py b/test/unit/util/system.py index f6e9f35..98cfb8d 100644 --- a/test/unit/util/system.py +++ b/test/unit/util/system.py @@ -7,6 +7,7 @@ system running the tests.
import functools import ntpath +import os import posixpath import unittest
@@ -371,6 +372,25 @@ class TestSystem(unittest.TestCase): expected_response = '/Users/atagar/tor/src/or' if test_input == '75717' else None self.assertEqual(expected_response, system.cwd(test_input))
+ def test_tail(self): + """ + Exercise our tail() function with a variety of inputs. + """ + + path = os.path.join(os.path.dirname(__file__), 'text_file') + + # by file handle + + with open(path) as riddle_file: + self.assertEqual([' both the wicked and sweet.'], system.tail(riddle_file, 1)) + + self.assertEqual([], system.tail(path, 0)) + self.assertEqual([' both the wicked and sweet.'], system.tail(path, 1)) + self.assertEqual(["but I'm with people you meet", ' both the wicked and sweet.'], system.tail(path, 2)) + + self.assertEqual(14, len(system.tail(path))) + self.assertEqual(14, len(system.tail(path, 200))) + @patch('stem.util.system.call') @patch('stem.util.system.is_available', Mock(return_value = True)) def test_bsd_jail_id(self, call_mock): diff --git a/test/unit/util/text_file b/test/unit/util/text_file new file mode 100644 index 0000000..6c6d2ae --- /dev/null +++ b/test/unit/util/text_file @@ -0,0 +1,14 @@ +I'm in magic and books, + with spooks and the crooks, +and forbidden lover's bliss + after a vine trellis kiss. + +I might be open or closed, + often plain as your nose. +I'm a gift you can't take back, + and in the sharing I crack. + +Authors place me in Prague + or the dense London fog +but I'm with people you meet + both the wicked and sweet.