commit a5596873fd544d79c53f0c0123caa89bdb5a9f72 Author: Damian Johnson atagar@torproject.org Date: Sat May 31 14:14:06 2014 -0700
Adding tarfile support to stem.descriptor.parse_file()
A while back Karsten tried to hand a tarfile to our parse_file() method and had confusing results...
https://trac.torproject.org/projects/tor/ticket/10977
Expanding our parse_file() function so it'll happily handle tarfiles and tar paths.
Note that the DescriptorReader, which already had tar support, is keeping its own separate implementation. This is because using the parse_file()'s tar support has a couple drawbacks...
1. The reader then couldn't stop in the middle of handling tarballs.
2. If a tarball contains both descriptor and non-descriptor content then the DescriptorReader can handle that. parse_file(), however, raises an exception. --- docs/change_log.rst | 2 + stem/descriptor/__init__.py | 55 +++++++++++++++++++++++++--- stem/descriptor/reader.py | 24 +++--------- stem/util/system.py | 34 ++++++++++++++++- test/integ/descriptor/server_descriptor.py | 31 ++++++++++++++++ 5 files changed, 121 insertions(+), 25 deletions(-)
diff --git a/docs/change_log.rst b/docs/change_log.rst index 279f2bb..7f2982b 100644 --- a/docs/change_log.rst +++ b/docs/change_log.rst @@ -59,6 +59,7 @@ The following are only available within Stem's `git repository
* **Descriptors**
+ * Added tarfile support to :func:`~stem.descriptor.__init__.parse_file` (:trac:`10977`) * Added microdescriptor's new identity and identity_type attributes (:spec:`22cda72`)
* **Utilities** @@ -116,6 +117,7 @@ and a myriad of smaller improvements and fixes. * Added :func:`stem.util.system.get_user` * Added :func:`stem.util.system.get_start_time` * Added :func:`stem.util.system.get_bsd_jail_path` + * Added :func:`stem.util.system.is_tarfile` * Added :func:`stem.util.connection.is_private_address`
* **Website** diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py index 2d7cc69..4270cb9 100644 --- a/stem/descriptor/__init__.py +++ b/stem/descriptor/__init__.py @@ -52,10 +52,12 @@ __all__ = [
import os import re +import tarfile
import stem.prereq import stem.util.enum import stem.util.str_tools +import stem.util.system
try: # added in python 2.7 @@ -127,7 +129,7 @@ def parse_file(descriptor_file, descriptor_type = None, validate = True, documen
my_descriptor_file = open(descriptor_path, 'rb')
- :param str,file descriptor_file: path or opened file with the descriptor contents + :param str,file,tarfile descriptor_file: path or opened file with the descriptor contents :param str descriptor_type: `descriptor type https://metrics.torproject.org/formats.html#descriptortypes`_, this is guessed if not provided :param bool validate: checks the validity of the descriptor's content if **True**, skips these checks otherwise @@ -143,14 +145,23 @@ def parse_file(descriptor_file, descriptor_type = None, validate = True, documen * **IOError** if unable to read from the descriptor_file """
- # if we got a path then open that file for parsing + # Delegate to a helper if this is a path or tarfile. + + handler = None
if isinstance(descriptor_file, (bytes, unicode)): - with open(descriptor_file) as desc_file: - for desc in parse_file(desc_file, descriptor_type, validate, document_handler, **kwargs): - yield desc + if stem.util.system.is_tarfile(descriptor_file): + handler = _parse_file_for_tar_path + else: + handler = _parse_file_for_path + elif isinstance(descriptor_file, tarfile.TarFile): + handler = _parse_file_for_tarfile + + if handler: + for desc in handler(descriptor_file, descriptor_type, validate, document_handler, **kwargs): + yield desc
- return + return
# The tor descriptor specifications do not provide a reliable method for # identifying a descriptor file's type and version so we need to guess @@ -210,6 +221,38 @@ def parse_file(descriptor_file, descriptor_type = None, validate = True, documen raise TypeError("Unable to determine the descriptor's type. filename: '%s', first line: '%s'" % (filename, first_line))
+def _parse_file_for_path(descriptor_file, *args, **kwargs): + with open(descriptor_file, 'rb') as desc_file: + for desc in parse_file(desc_file, *args, **kwargs): + yield desc + + +def _parse_file_for_tar_path(descriptor_file, *args, **kwargs): + # TODO: use 'with' for tarfile after dropping python 2.6 support + tar_file = tarfile.open(descriptor_file) + + try: + for desc in parse_file(tar_file, *args, **kwargs): + desc._set_path(os.path.abspath(descriptor_file)) + yield desc + finally: + if tar_file: + tar_file.close() + + +def _parse_file_for_tarfile(descriptor_file, *args, **kwargs): + for tar_entry in descriptor_file: + if tar_entry.isfile(): + entry = descriptor_file.extractfile(tar_entry) + + try: + for desc in parse_file(entry, *args, **kwargs): + desc._set_archive_path(entry.name) + yield desc + finally: + entry.close() + + def _parse_metrics_file(descriptor_type, major_version, minor_version, descriptor_file, validate, document_handler, **kwargs): # Parses descriptor files from metrics, yielding individual descriptors. This # throws a TypeError if the descriptor_type or version isn't recognized. diff --git a/stem/descriptor/reader.py b/stem/descriptor/reader.py index 3fb4166..05c7533 100644 --- a/stem/descriptor/reader.py +++ b/stem/descriptor/reader.py @@ -85,6 +85,7 @@ import threading
import stem.descriptor import stem.prereq +import stem.util.system
# flag to indicate when the reader thread is out of descriptor files to read FINISHED = 'DONE' @@ -487,24 +488,10 @@ class DescriptorReader(object):
target_type = mimetypes.guess_type(target)
- # Checking if it's a tar file may fail due to permissions so failing back - # to the mime type... - # - # IOError: [Errno 13] Permission denied: '/vmlinuz.old' - # - # With python 3 insuffient permissions raises an AttributeError instead... - # - # http://bugs.python.org/issue17059 - - try: - is_tar = tarfile.is_tarfile(target) - except (IOError, AttributeError): - is_tar = target_type[0] == 'application/x-tar' - if target_type[0] in (None, 'text/plain'): # either '.txt' or an unknown type self._handle_descriptor_file(target, target_type) - elif is_tar: + elif stem.util.system.is_tarfile(target): # handles gzip, bz2, and decompressed tarballs among others self._handle_archive(target) else: @@ -529,9 +516,10 @@ class DescriptorReader(object): self._notify_skip_listeners(target, ReadFailed(exc))
def _handle_archive(self, target): - # TODO: This would be nicer via the 'with' keyword, but tarfile's __exit__ - # method was added sometime after python 2.5. We should change this when - # we drop python 2.5 support. + # TODO: When dropping python 2.6 support go back to using 'with' for + # tarfiles... + # + # http://bugs.python.org/issue7232
tar_file = None
diff --git a/stem/util/system.py b/stem/util/system.py index 89317df..d24d34b 100644 --- a/stem/util/system.py +++ b/stem/util/system.py @@ -16,6 +16,8 @@ best-effort, providing **None** if the lookup fails.
is_available - determines if a command is available on this system is_running - determines if a given process is running + call - runs the given system command and provides back the results + get_name_by_pid - gets the name for a process by the given pid get_pid_by_name - gets the pid for a process by the given name get_pid_by_port - gets the pid for a process listening to a given port @@ -25,9 +27,11 @@ best-effort, providing **None** if the lookup fails. get_start_time - provides the unix timestamp when the process started get_bsd_jail_id - provides the BSD jail id a given process is running within get_bsd_jail_path - provides the path of the given BSD jail + + is_tarfile - checks if the given path is a tarball expand_path - expands relative paths and ~ entries files_with_suffix - provides files with the given suffix - call - runs the given system command and provides back the results +
get_process_name - provides our process' name set_process_name - changes our process' name @@ -35,9 +39,11 @@ best-effort, providing **None** if the lookup fails.
import ctypes import ctypes.util +import mimetypes import os import platform import subprocess +import tarfile import time
import stem.util.proc @@ -763,6 +769,32 @@ def get_bsd_jail_path(jid): return None
+def is_tarfile(path): + """ + Returns if the path belongs to a tarfile or not. + + .. versionadded:: 1.2.0 + + :param str path: path to be checked + + :returns: **True** if the path belongs to a tarball, **False** otherwise + """ + + # Checking if it's a tar file may fail due to permissions so failing back + # to the mime type... + # + # IOError: [Errno 13] Permission denied: '/vmlinuz.old' + # + # With python 3 insuffient permissions raises an AttributeError instead... + # + # http://bugs.python.org/issue17059 + + try: + return tarfile.is_tarfile(path) + except (IOError, AttributeError): + return mimetypes.guess_type(path)[0] == 'application/x-tar' + + def expand_path(path, cwd = None): """ Provides an absolute path, expanding tildes with the user's home and diff --git a/test/integ/descriptor/server_descriptor.py b/test/integ/descriptor/server_descriptor.py index 7b4645c..6d16add 100644 --- a/test/integ/descriptor/server_descriptor.py +++ b/test/integ/descriptor/server_descriptor.py @@ -4,6 +4,7 @@ Integration tests for stem.descriptor.server_descriptor.
import datetime import os +import tarfile import unittest
import stem.control @@ -15,8 +16,38 @@ import test.runner
from test.integ.descriptor import get_resource
+TARFILE_PATH = os.path.join(os.path.dirname(__file__), 'data', 'descriptor_archive.tar') +TARFILE_FINGERPRINTS = set([ + u'B6D83EC2D9E18B0A7A33428F8CFA9C536769E209', + u'E0BD57A11F00041A9789577C53A1B784473669E4', + u'1F43EE37A0670301AD9CB555D94AFEC2C89FDE86', +]) +
class TestServerDescriptor(unittest.TestCase): + def test_with_tarfile_path(self): + """ + Fetch server descriptors via parse_file() for a tarfile path. + """ + + descriptors = list(stem.descriptor.parse_file(TARFILE_PATH)) + self.assertEqual(3, len(descriptors)) + + fingerprints = set([desc.fingerprint for desc in descriptors]) + self.assertEqual(TARFILE_FINGERPRINTS, fingerprints) + + def test_with_tarfile_object(self): + """ + Fetch server descriptors via parse_file() for a tarfile object. + """ + + with tarfile.open(TARFILE_PATH) as tar_file: + descriptors = list(stem.descriptor.parse_file(tar_file)) + self.assertEqual(3, len(descriptors)) + + fingerprints = set([desc.fingerprint for desc in descriptors]) + self.assertEqual(TARFILE_FINGERPRINTS, fingerprints) + def test_metrics_descriptor(self): """ Parses and checks our results against a server descriptor from metrics.