[tor-commits] [stem/master] Adding get_archive_path() method to descriptors

atagar at torproject.org atagar at torproject.org
Sun Feb 17 06:07:15 UTC 2013


commit f83c7efce3380c60c81028802e198791aa30dfbd
Author: Damian Johnson <atagar at torproject.org>
Date:   Sat Feb 16 10:53:39 2013 -0800

    Adding get_archive_path() method to descriptors
    
    We can't use a TarInfo's 'name' attribute for get_path() since that corresponds
    to its location within the archive. That said, I've often wanted both paths so
    both fixing get_path() for tarballs and adding a get_archive_path().
---
 stem/descriptor/__init__.py     |   17 +++++++++++++++++
 stem/descriptor/reader.py       |    9 +++++++++
 test/integ/descriptor/reader.py |   18 ++++++++++++++++++
 test/settings.cfg               |    2 +-
 4 files changed, 45 insertions(+), 1 deletions(-)

diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py
index 6e5d68f..87872d6 100644
--- a/stem/descriptor/__init__.py
+++ b/stem/descriptor/__init__.py
@@ -9,6 +9,7 @@ Package for parsing and processing descriptor data.
 
   Descriptor - Common parent for all descriptor file types.
     |- get_path - location of the descriptor on disk if it came from a file
+    |- get_archive_path - location of the descriptor within the archive it came from
     |- get_unrecognized_lines - unparsed descriptor content
     +- __str__ - string that the descriptor was made from
 
@@ -250,6 +251,7 @@ class Descriptor(object):
 
   def __init__(self, contents):
     self._path = None
+    self._archive_path = None
     self._raw_contents = contents
 
   def get_path(self):
@@ -261,6 +263,18 @@ class Descriptor(object):
 
     return self._path
 
+  def get_archive_path(self):
+    """
+    If this descriptor came from an archive then provides its path within the
+    archive. This is only set if the descriptor came from a
+    :class:`~stem.descriptor.reader.DescriptorReader`, and is **None** if this
+    descriptor didn't come from an archive.
+
+    :returns: **str** with the descriptor's path within the archive
+    """
+
+    return self._archive_path
+
   def get_unrecognized_lines(self):
     """
     Provides a list of lines that were either ignored or had data that we did
@@ -275,6 +289,9 @@ class Descriptor(object):
   def _set_path(self, path):
     self._path = path
 
+  def _set_archive_path(self, path):
+    self._archive_path = path
+
   def __str__(self):
     return self._raw_contents
 
diff --git a/stem/descriptor/reader.py b/stem/descriptor/reader.py
index 92adeaf..b017e06 100644
--- a/stem/descriptor/reader.py
+++ b/stem/descriptor/reader.py
@@ -545,10 +545,19 @@ class DescriptorReader(object):
         if tar_entry.isfile():
           entry = tar_file.extractfile(tar_entry)
 
+          # The parse_file() function uses the 'name' attribute to figure out
+          # the file that it came from. In the case of TarInfo instances the
+          # 'name' is the path within the archive. We'll want that for
+          # _set_archive_path().
+
+          archive_path = entry.name
+          entry.name = target
+
           for desc in stem.descriptor.parse_file(entry, validate = self._validate, document_handler = self._document_handler):
             if self._is_stopped.isSet():
               return
 
+            desc._set_archive_path(archive_path)
             self._unreturned_descriptors.put(desc)
             self._iter_notice.set()
 
diff --git a/test/integ/descriptor/reader.py b/test/integ/descriptor/reader.py
index 936cf39..3ed47ae 100644
--- a/test/integ/descriptor/reader.py
+++ b/test/integ/descriptor/reader.py
@@ -266,6 +266,24 @@ class TestDescriptorReader(unittest.TestCase):
     with reader:
       self.assertEqual(0, len(list(reader)))
 
+  def test_archived_paths(self):
+    """
+    Checks the get_path() and get_archive_path() for a tarball.
+    """
+
+    expected_archive_paths = (
+      "descriptor_archive/0/2/02c311d3d789f3f55c0880b5c85f3c196343552c",
+      "descriptor_archive/1/b/1bb798cae15e21479db0bc700767eee4733e9d4a",
+      "descriptor_archive/1/b/1ef75fef564180d8b3f72c6f8635ff0cd855f92c",
+    )
+
+    test_path = os.path.join(DESCRIPTOR_TEST_DATA, "descriptor_archive.tar")
+
+    with stem.descriptor.reader.DescriptorReader(test_path) as reader:
+      for desc in reader:
+        self.assertEqual(test_path, desc.get_path())
+        self.assertTrue(desc.get_archive_path() in expected_archive_paths)
+
   def test_archived_uncompressed(self):
     """
     Checks that we can read descriptors from an uncompressed archive.
diff --git a/test/settings.cfg b/test/settings.cfg
index a5abc91..b467fd4 100644
--- a/test/settings.cfg
+++ b/test/settings.cfg
@@ -157,7 +157,7 @@ target.torrc RUN_PTRACE   => PORT, PTRACE
 pyflakes.ignore stem/prereq.py => 'RSA' imported but unused
 pyflakes.ignore stem/prereq.py => 'asn1' imported but unused
 pyflakes.ignore stem/prereq.py => 'long_to_bytes' imported but unused
-pyflakes.ignore stem/descriptor/__init__.py => redefinition of unused 'OrderedDict' from line 54
+pyflakes.ignore stem/descriptor/__init__.py => redefinition of unused 'OrderedDict' from line 55
 pyflakes.ignore stem/util/str_tools.py => redefinition of function '_to_bytes' from line 54
 pyflakes.ignore stem/util/str_tools.py => redefinition of function '_to_unicode' from line 60
 pyflakes.ignore test/mocking.py => undefined name 'builtins'





More information about the tor-commits mailing list