
commit f83c7efce3380c60c81028802e198791aa30dfbd Author: Damian Johnson <atagar@torproject.org> Date: Sat Feb 16 10:53:39 2013 -0800 Adding get_archive_path() method to descriptors We can't use a TarInfo's 'name' attribute for get_path() since that corresponds to its location within the archive. That said, I've often wanted both paths so both fixing get_path() for tarballs and adding a get_archive_path(). --- stem/descriptor/__init__.py | 17 +++++++++++++++++ stem/descriptor/reader.py | 9 +++++++++ test/integ/descriptor/reader.py | 18 ++++++++++++++++++ test/settings.cfg | 2 +- 4 files changed, 45 insertions(+), 1 deletions(-) diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py index 6e5d68f..87872d6 100644 --- a/stem/descriptor/__init__.py +++ b/stem/descriptor/__init__.py @@ -9,6 +9,7 @@ Package for parsing and processing descriptor data. Descriptor - Common parent for all descriptor file types. |- get_path - location of the descriptor on disk if it came from a file + |- get_archive_path - location of the descriptor within the archive it came from |- get_unrecognized_lines - unparsed descriptor content +- __str__ - string that the descriptor was made from @@ -250,6 +251,7 @@ class Descriptor(object): def __init__(self, contents): self._path = None + self._archive_path = None self._raw_contents = contents def get_path(self): @@ -261,6 +263,18 @@ class Descriptor(object): return self._path + def get_archive_path(self): + """ + If this descriptor came from an archive then provides its path within the + archive. This is only set if the descriptor came from a + :class:`~stem.descriptor.reader.DescriptorReader`, and is **None** if this + descriptor didn't come from an archive. + + :returns: **str** with the descriptor's path within the archive + """ + + return self._archive_path + def get_unrecognized_lines(self): """ Provides a list of lines that were either ignored or had data that we did @@ -275,6 +289,9 @@ class Descriptor(object): def _set_path(self, path): self._path = path + def _set_archive_path(self, path): + self._archive_path = path + def __str__(self): return self._raw_contents diff --git a/stem/descriptor/reader.py b/stem/descriptor/reader.py index 92adeaf..b017e06 100644 --- a/stem/descriptor/reader.py +++ b/stem/descriptor/reader.py @@ -545,10 +545,19 @@ class DescriptorReader(object): if tar_entry.isfile(): entry = tar_file.extractfile(tar_entry) + # The parse_file() function uses the 'name' attribute to figure out + # the file that it came from. In the case of TarInfo instances the + # 'name' is the path within the archive. We'll want that for + # _set_archive_path(). + + archive_path = entry.name + entry.name = target + for desc in stem.descriptor.parse_file(entry, validate = self._validate, document_handler = self._document_handler): if self._is_stopped.isSet(): return + desc._set_archive_path(archive_path) self._unreturned_descriptors.put(desc) self._iter_notice.set() diff --git a/test/integ/descriptor/reader.py b/test/integ/descriptor/reader.py index 936cf39..3ed47ae 100644 --- a/test/integ/descriptor/reader.py +++ b/test/integ/descriptor/reader.py @@ -266,6 +266,24 @@ class TestDescriptorReader(unittest.TestCase): with reader: self.assertEqual(0, len(list(reader))) + def test_archived_paths(self): + """ + Checks the get_path() and get_archive_path() for a tarball. + """ + + expected_archive_paths = ( + "descriptor_archive/0/2/02c311d3d789f3f55c0880b5c85f3c196343552c", + "descriptor_archive/1/b/1bb798cae15e21479db0bc700767eee4733e9d4a", + "descriptor_archive/1/b/1ef75fef564180d8b3f72c6f8635ff0cd855f92c", + ) + + test_path = os.path.join(DESCRIPTOR_TEST_DATA, "descriptor_archive.tar") + + with stem.descriptor.reader.DescriptorReader(test_path) as reader: + for desc in reader: + self.assertEqual(test_path, desc.get_path()) + self.assertTrue(desc.get_archive_path() in expected_archive_paths) + def test_archived_uncompressed(self): """ Checks that we can read descriptors from an uncompressed archive. diff --git a/test/settings.cfg b/test/settings.cfg index a5abc91..b467fd4 100644 --- a/test/settings.cfg +++ b/test/settings.cfg @@ -157,7 +157,7 @@ target.torrc RUN_PTRACE => PORT, PTRACE pyflakes.ignore stem/prereq.py => 'RSA' imported but unused pyflakes.ignore stem/prereq.py => 'asn1' imported but unused pyflakes.ignore stem/prereq.py => 'long_to_bytes' imported but unused -pyflakes.ignore stem/descriptor/__init__.py => redefinition of unused 'OrderedDict' from line 54 +pyflakes.ignore stem/descriptor/__init__.py => redefinition of unused 'OrderedDict' from line 55 pyflakes.ignore stem/util/str_tools.py => redefinition of function '_to_bytes' from line 54 pyflakes.ignore stem/util/str_tools.py => redefinition of function '_to_unicode' from line 60 pyflakes.ignore test/mocking.py => undefined name 'builtins'