[tor-commits] [stem/master] CollecTor File class

atagar at torproject.org atagar at torproject.org
Sat Aug 17 20:44:27 UTC 2019


commit 790af0715095e06d559a963dfeff0a3b41e1eb51
Author: Damian Johnson <atagar at torproject.org>
Date:   Sun Jul 7 13:01:25 2019 -0700

    CollecTor File class
    
    On reflection, think a file-level object will be more useful than representing
    the index (we don't need anything from it beside files).
---
 stem/descriptor/collector.py      | 45 +++++++++++++++++++++++++++++++++++++++
 test/unit/descriptor/collector.py | 24 ++++++++++++++++-----
 2 files changed, 64 insertions(+), 5 deletions(-)

diff --git a/stem/descriptor/collector.py b/stem/descriptor/collector.py
index 770a1296..135a5d00 100644
--- a/stem/descriptor/collector.py
+++ b/stem/descriptor/collector.py
@@ -49,6 +49,7 @@ With this you can either download and read directly from CollecTor...
 .. versionadded:: 1.8.0
 """
 
+import datetime
 import json
 import sys
 import time
@@ -124,12 +125,16 @@ def _download(url, compression, timeout, retries):
 class Index(object):
   """
   Index of CollecTor's content.
+
+  :var hash files: mapping of paths to thier content
   """
 
   def __init__(self, content):
     self._str_content = content
     self._hash_content = Index._convert_paths(json.loads(content))
 
+    self.files = Index._get_files(self._hash_content, [])
+
   def __str__(self):
     return self._str_content
 
@@ -138,6 +143,31 @@ class Index(object):
       yield k, v
 
   @staticmethod
+  def _get_files(val, path):
+    """
+    Provies a mapping of paths to files within the index.
+
+    :param dict val: index hash
+    :param list path: path we've transversed into
+
+    :returns: **dict** mapping paths to files
+    """
+
+    files = {}
+
+    if isinstance(val, dict):
+      for k, v in val.items():
+        if k == 'files':
+          for filename, attr in v.items():
+            file_path = '/'.join(path + [filename])
+            files[file_path] = File(file_path, attr.get('size'), attr.get('last_modified'))
+        elif k == 'directories':
+          for filename, attr in v.items():
+            files.update(Index._get_files(attr, path + [filename]))
+
+    return files
+
+  @staticmethod
   def _convert_paths(val):
     """
     Key files and directories off their paths so we can transverse them more
@@ -161,6 +191,21 @@ class Index(object):
       return val
 
 
+class File(object):
+  """
+  File within CollecTor.
+
+  :var str path: file path within collector
+  :var int size: size of the file
+  :var datetime last_modified: when the file was last modified
+  """
+
+  def __init__(self, path, size, last_modified):
+    self.path = path
+    self.size = size
+    self.last_modified = datetime.datetime.strptime(last_modified, '%Y-%m-%d %H:%M')
+
+
 class CollecTor(object):
   """
   Downloader for descriptors from CollecTor. The contents of CollecTor are
diff --git a/test/unit/descriptor/collector.py b/test/unit/descriptor/collector.py
index 2e11c89d..cc12ee00 100644
--- a/test/unit/descriptor/collector.py
+++ b/test/unit/descriptor/collector.py
@@ -2,6 +2,7 @@
 Unit tests for stem.descriptor.collector.
 """
 
+import datetime
 import io
 import unittest
 
@@ -28,6 +29,9 @@ MINIMAL_INDEX = {
 
 MINIMAL_INDEX_JSON = b'{"index_created":"2017-12-25 21:06","build_revision":"56a303e","path":"https://collector.torproject.org"}'
 
+with open(get_resource('collector_index.json'), 'rb') as index_file:
+  EXAMPLE_INDEX_CONTENT = index_file.read()
+
 
 class TestCollector(unittest.TestCase):
   @patch(URL_OPEN)
@@ -114,10 +118,20 @@ class TestCollector(unittest.TestCase):
         collector = CollecTor(compression = compression)
         self.assertRaisesRegexp(IOError, 'Unable to decompress %s response' % compression, collector.index)
 
-  @patch(URL_OPEN)
-  def test_real_index(self, urlopen_mock):
-    with open(get_resource('collector_index.json'), 'rb') as index_file:
-      urlopen_mock.return_value = io.BytesIO(index_file.read())
-
+  @patch(URL_OPEN, Mock(return_value = io.BytesIO(EXAMPLE_INDEX_CONTENT)))
+  def test_real_index(self):
     collector = CollecTor(compression = Compression.PLAINTEXT)
     self.assertEqual(EXAMPLE_INDEX, dict(collector.index()))
+
+  @patch(URL_OPEN, Mock(return_value = io.BytesIO(EXAMPLE_INDEX_CONTENT)))
+  def test_contents(self):
+    collector = CollecTor(compression = Compression.PLAINTEXT)
+    index = collector.index()
+
+    self.assertEqual(85, len(index.files))
+    test_path = 'archive/relay-descriptors/extra-infos/extra-infos-2007-09.tar.xz'
+
+    extrainfo_file = index.files[test_path]
+    self.assertEqual(test_path, extrainfo_file.path)
+    self.assertEqual(6459884, extrainfo_file.size)
+    self.assertEqual(datetime.datetime(2016, 6, 23, 9, 54), extrainfo_file.last_modified)





More information about the tor-commits mailing list