[stem/master] Remove deprecated modules

10 Feb 2020

commit c1c4e7a288d26a2895838f2ed121786078db42fe
Author: Damian Johnson <atagar@torproject.org>
Date:   Mon Jan 6 15:07:47 2020 -0800

    Remove deprecated modules
    
    We're dropping stem.descriptor's reader and export module due to lack of use...
    
      * I wrote stem.descriptor.reader at Karsten's suggestion to read descriptors
        from disk, and track when those on-disk files change. The design seemed to
        be for usage within CollecTor, but never was.
    
        In practice stem.descriptor.from_file() provides a simpler mechanism to
        read descriptors form disk.
    
      * stem.descriptor.export was contributed by a university student in Stem's
        early days. I've never used it nor found anyone else who does.
    
        This module serializes descriptors to a CSV, which is moot since
        descriptors already have a string representation we can read and
        write...
    
          with open('/path/to/descriptor', 'w') as descriptor_file:
            descriptor_file.write(str(my_descriptor))
    
          my_descriptor = stem.descriptor.from_file('/path/to/descriptor', 'server-descriptor 1.0')
---
 docs/api.rst                     |  10 +-
 docs/api/descriptor/export.rst   |   5 -
 docs/api/descriptor/reader.rst   |   5 -
 docs/contents.rst                |   5 +-
 stem/control.py                  |   1 -
 stem/descriptor/__init__.py      |  12 +-
 stem/descriptor/export.py        | 111 -------
 stem/descriptor/reader.py        | 563 -----------------------------------
 test/integ/control/controller.py |   1 -
 test/settings.cfg                |   3 -
 test/unit/descriptor/export.py   |  94 ------
 test/unit/descriptor/reader.py   | 625 ---------------------------------------
 12 files changed, 7 insertions(+), 1428 deletions(-)

diff --git a/docs/api.rst b/docs/api.rst
index cbbf0dd0..58604e90 100644
--- a/docs/api.rst
+++ b/docs/api.rst
@@ -21,11 +21,9 @@ Controller
 Descriptors
 -----------
 
-To read descriptors from disk use :func:`~stem.descriptor.__init__.parse_file` for
-individual files and `stem.descriptor.reader
-<api/descriptor/reader.html>`_ for batches. You can also use
-`stem.descriptor.remote <api/descriptor/remote.html>`_ to download descriptors
-remotely like Tor does.
+To read descriptors from disk use :func:`~stem.descriptor.__init__.parse_file`.
+You can also use `stem.descriptor.remote <api/descriptor/remote.html>`_ to
+download descriptors remotely like Tor does.
 
 * **Classes**
 
@@ -41,10 +39,8 @@ remotely like Tor does.
  * `stem.descriptor.certificate <api/descriptor/certificate.html>`_ - `Ed25519 certificates <https://gitweb.torproject.org/torspec.git/tree/cert-spec.txt>`_.
 
 * `stem.directory <api/directory.html>`_ - Directory authority and fallback directory information.
-* `stem.descriptor.reader <api/descriptor/reader.html>`_ - Reads and parses descriptor files from disk.
 * `stem.descriptor.remote <api/descriptor/remote.html>`_ - Downloads descriptors from directory mirrors and authorities.
 * `stem.descriptor.collector <api/descriptor/collector.html>`_ - Downloads past descriptors from `CollecTor <https://metrics.torproject.org/collector.html>`_.
-* `stem.descriptor.export <api/descriptor/export.html>`_ - Exports descriptors to other formats.
 
 Utilities
 ---------
diff --git a/docs/api/descriptor/export.rst b/docs/api/descriptor/export.rst
deleted file mode 100644
index a39e7773..00000000
--- a/docs/api/descriptor/export.rst
+++ /dev/null
@@ -1,5 +0,0 @@
-Descriptor Exporter
-===================
-
-.. automodule:: stem.descriptor.export
-
diff --git a/docs/api/descriptor/reader.rst b/docs/api/descriptor/reader.rst
deleted file mode 100644
index 89c1a69f..00000000
--- a/docs/api/descriptor/reader.rst
+++ /dev/null
@@ -1,5 +0,0 @@
-Descriptor Reader
-=================
-
-.. automodule:: stem.descriptor.reader
-
diff --git a/docs/contents.rst b/docs/contents.rst
index 87e75220..99ca686b 100644
--- a/docs/contents.rst
+++ b/docs/contents.rst
@@ -46,6 +46,7 @@ Contents
    api/descriptor/certificate
    api/descriptor/collector
    api/descriptor/descriptor
+   api/descriptor/remote
    api/descriptor/server_descriptor
    api/descriptor/extrainfo_descriptor
    api/descriptor/microdescriptor
@@ -54,10 +55,6 @@ Contents
    api/descriptor/hidden_service
    api/descriptor/tordnsel
 
-   api/descriptor/export
-   api/descriptor/reader
-   api/descriptor/remote
-
    api/util/init
    api/util/conf
    api/util/connection
diff --git a/stem/control.py b/stem/control.py
index 4adec330..9fda9d34 100644
--- a/stem/control.py
+++ b/stem/control.py
@@ -258,7 +258,6 @@ import threading
 import time
 
 import stem.descriptor.microdescriptor
-import stem.descriptor.reader
 import stem.descriptor.router_status_entry
 import stem.descriptor.server_descriptor
 import stem.exit_policy
diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py
index fff08910..11fff944 100644
--- a/stem/descriptor/__init__.py
+++ b/stem/descriptor/__init__.py
@@ -110,12 +110,10 @@ __all__ = [
   'bandwidth_file',
   'certificate',
   'collector',
-  'export',
   'extrainfo_descriptor',
   'hidden_service',
   'microdescriptor',
   'networkstatus',
-  'reader',
   'remote',
   'router_status_entry',
   'server_descriptor',
@@ -297,10 +295,6 @@ def parse_file(descriptor_file, descriptor_type = None, validate = False, docume
   * The filename if it matches something from tor's data directory. For
     instance, tor's 'cached-descriptors' contains server descriptors.
 
-  This is a handy function for simple usage, but if you're reading multiple
-  descriptor files you might want to consider the
-  :class:`~stem.descriptor.reader.DescriptorReader`.
-
   Descriptor types include the following, including further minor versions (ie.
   if we support 1.1 then we also support everything from 1.0 and most things
   from 1.2, but not 2.0)...
@@ -940,9 +934,9 @@ class Descriptor(object):
   def get_archive_path(self):
     """
     If this descriptor came from an archive then provides its path within the
-    archive. This is only set if the descriptor came from a
-    :class:`~stem.descriptor.reader.DescriptorReader`, and is **None** if this
-    descriptor didn't come from an archive.
+    archive. This is only set if the descriptor was read by
+    :class:`~stem.descriptor.__init__.parse_file` from an archive, and **None**
+    otherwise.
 
     :returns: **str** with the descriptor's path within the archive
     """
diff --git a/stem/descriptor/export.py b/stem/descriptor/export.py
deleted file mode 100644
index 35835d7c..00000000
--- a/stem/descriptor/export.py
+++ /dev/null
@@ -1,111 +0,0 @@
-# Copyright 2012-2020, Damian Johnson and The Tor Project
-# See LICENSE for licensing information
-
-"""
-Toolkit for exporting descriptors to other formats.
-
-**Module Overview:**
-
-::
-
-  export_csv - Exports descriptors to a CSV
-  export_csv_file - Writes exported CSV output to a file
-
-.. deprecated:: 1.7.0
-
-   This module will likely be removed in Stem 2.0 due to lack of usage. If you
-   use this modle please `let me know <https://www.atagar.com/contact/>`_.
-"""
-
-import io
-import csv
-
-import stem.descriptor
-import stem.prereq
-
-
-class _ExportDialect(csv.excel):
-  lineterminator = '\n'
-
-
-def export_csv(descriptors, included_fields = (), excluded_fields = (), header = True):
-  """
-  Provides a newline separated CSV for one or more descriptors. If simply
-  provided with descriptors then the CSV contains all of its attributes,
-  labeled with a header row. Either 'included_fields' or 'excluded_fields' can
-  be used for more granular control over its attributes and the order.
-
-  :param Descriptor,list descriptors: either a
-    :class:`~stem.descriptor.Descriptor` or list of descriptors to be exported
-  :param list included_fields: attributes to include in the csv
-  :param list excluded_fields: attributes to exclude from the csv
-  :param bool header: if **True** then the first line will be a comma separated
-    list of the attribute names
-
-  :returns: **str** of the CSV for the descriptors, one per line
-  :raises: **ValueError** if descriptors contain more than one descriptor type
-  """
-
-  output_buffer = io.StringIO()
-  export_csv_file(output_buffer, descriptors, included_fields, excluded_fields, header)
-  return output_buffer.getvalue()
-
-
-def export_csv_file(output_file, descriptors, included_fields = (), excluded_fields = (), header = True):
-  """
-  Similar to :func:`stem.descriptor.export.export_csv`, except that the CSV is
-  written directly to a file.
-
-  :param file output_file: file to be written to
-  :param Descriptor,list descriptors: either a
-    :class:`~stem.descriptor.Descriptor` or list of descriptors to be exported
-  :param list included_fields: attributes to include in the csv
-  :param list excluded_fields: attributes to exclude from the csv
-  :param bool header: if **True** then the first line will be a comma separated
-    list of the attribute names
-
-  :returns: **str** of the CSV for the descriptors, one per line
-  :raises: **ValueError** if descriptors contain more than one descriptor type
-  """
-
-  if isinstance(descriptors, stem.descriptor.Descriptor):
-    descriptors = (descriptors,)
-
-  if not descriptors:
-    return
-
-  descriptor_type = type(descriptors[0])
-  descriptor_type_label = descriptor_type.__name__
-  included_fields = list(included_fields)
-
-  # If the user didn't specify the fields to include then export everything,
-  # ordered alphabetically. If they did specify fields then make sure that
-  # they exist.
-
-  desc_attr = sorted(vars(descriptors[0]).keys())
-
-  if included_fields:
-    for field in included_fields:
-      if field not in desc_attr:
-        raise ValueError("%s does not have a '%s' attribute, valid fields are: %s" % (descriptor_type_label, field, ', '.join(desc_attr)))
-  else:
-    included_fields = [attr for attr in desc_attr if not attr.startswith('_')]
-
-  for field in excluded_fields:
-    try:
-      included_fields.remove(field)
-    except ValueError:
-      pass
-
-  writer = csv.DictWriter(output_file, included_fields, dialect = _ExportDialect(), extrasaction='ignore')
-
-  if header:
-    writer.writeheader()
-
-  for desc in descriptors:
-    if not isinstance(desc, stem.descriptor.Descriptor):
-      raise ValueError('Unable to export a descriptor CSV since %s is not a descriptor.' % type(desc).__name__)
-    elif descriptor_type != type(desc):
-      raise ValueError('To export a descriptor CSV all of the descriptors must be of the same type. First descriptor was a %s but we later got a %s.' % (descriptor_type_label, type(desc)))
-
-    writer.writerow(vars(desc))
diff --git a/stem/descriptor/reader.py b/stem/descriptor/reader.py
deleted file mode 100644
index e75cdb7e..00000000
--- a/stem/descriptor/reader.py
+++ /dev/null
@@ -1,563 +0,0 @@
-# Copyright 2012-2020, Damian Johnson and The Tor Project
-# See LICENSE for licensing information
-
-"""
-Utilities for reading descriptors from local directories and archives. This is
-mostly done through the :class:`~stem.descriptor.reader.DescriptorReader`
-class, which is an iterator for the descriptor data in a series of
-destinations. For example...
-
-::
-
-  my_descriptors = [
-    '/tmp/server-descriptors-2012-03.tar.bz2',
-    '/tmp/archived_descriptors/',
-  ]
-
-  # prints the contents of all the descriptor files
-  with DescriptorReader(my_descriptors) as reader:
-    for descriptor in reader:
-      print descriptor
-
-This ignores files that cannot be processed due to read errors or unparsable
-content. To be notified of skipped files you can register a listener with
-:func:`~stem.descriptor.reader.DescriptorReader.register_skip_listener`.
-
-The :class:`~stem.descriptor.reader.DescriptorReader` keeps track of the last
-modified timestamps for descriptor files that it has read so it can skip
-unchanged files if run again. This listing of processed files can also be
-persisted and applied to other
-:class:`~stem.descriptor.reader.DescriptorReader` instances. For example, the
-following prints descriptors as they're changed over the course of a minute,
-and picks up where it left off if run again...
-
-::
-
-  reader = DescriptorReader(['/tmp/descriptor_data'])
-
-  try:
-    processed_files = load_processed_files('/tmp/used_descriptors')
-    reader.set_processed_files(processed_files)
-  except: pass # could not load, maybe this is the first run
-
-  start_time = time.time()
-
-  while (time.time() - start_time) < 60:
-    # prints any descriptors that have changed since last checked
-    with reader:
-      for descriptor in reader:
-        print descriptor
-
-    time.sleep(1)
-
-  save_processed_files('/tmp/used_descriptors', reader.get_processed_files())
-
-**Module Overview:**
-
-::
-
-  load_processed_files - Loads a listing of processed files
-  save_processed_files - Saves a listing of processed files
-
-  DescriptorReader - Iterator for descriptor data on the local file system
-    |- get_processed_files - provides the listing of files that we've processed
-    |- set_processed_files - sets our tracking of the files we have processed
-    |- register_read_listener - adds a listener for when files are read
-    |- register_skip_listener - adds a listener that's notified of skipped files
-    |- start - begins reading descriptor data
-    |- stop - stops reading descriptor data
-    |- __enter__ / __exit__ - manages the descriptor reader thread in the context
-    +- __iter__ - iterates over descriptor data in unread files
-
-  FileSkipped - Base exception for a file that was skipped
-    |- AlreadyRead - We've already read a file with this last modified timestamp
-    |- ParsingFailure - Contents can't be parsed as descriptor data
-    |- UnrecognizedType - File extension indicates non-descriptor data
-    +- ReadFailed - Wraps an error that was raised while reading the file
-       +- FileMissing - File does not exist
-
-.. deprecated:: 1.8.0
-
-   This module will likely be removed in Stem 2.0 due to lack of usage. If you
-   use this modle please `let me know <https://www.atagar.com/contact/>`_.
-"""
-
-import mimetypes
-import os
-import queue
-import tarfile
-import threading
-
-import stem.descriptor
-import stem.prereq
-import stem.util
-import stem.util.str_tools
-import stem.util.system
-
-# flag to indicate when the reader thread is out of descriptor files to read
-FINISHED = 'DONE'
-
-
-class FileSkipped(Exception):
-  "Base error when we can't provide descriptor data from a file."
-
-
-class AlreadyRead(FileSkipped):
-  """
-  Already read a file with this 'last modified' timestamp or later.
-
-  :param int last_modified: unix timestamp for when the file was last modified
-  :param int last_modified_when_read: unix timestamp for the modification time
-    when we last read this file
-  """
-
-  def __init__(self, last_modified, last_modified_when_read):
-    super(AlreadyRead, self).__init__('File has already been read since it was last modified. modification time: %s, last read: %s' % (last_modified, last_modified_when_read))
-    self.last_modified = last_modified
-    self.last_modified_when_read = last_modified_when_read
-
-
-class ParsingFailure(FileSkipped):
-  """
-  File contents could not be parsed as descriptor data.
-
-  :param ValueError exception: issue that arose when parsing
-  """
-
-  def __init__(self, parsing_exception):
-    super(ParsingFailure, self).__init__(parsing_exception)
-    self.exception = parsing_exception
-
-
-class UnrecognizedType(FileSkipped):
-  """
-  File doesn't contain descriptor data. This could either be due to its file
-  type or because it doesn't conform to a recognizable descriptor type.
-
-  :param tuple mime_type: the (type, encoding) tuple provided by mimetypes.guess_type()
-  """
-
-  def __init__(self, mime_type):
-    super(UnrecognizedType, self).__init__('Unrecognized mime type: %s (%s)' % mime_type)
-    self.mime_type = mime_type
-
-
-class ReadFailed(FileSkipped):
-  """
-  An IOError occurred while trying to read the file.
-
-  :param IOError exception: issue that arose when reading the file, **None** if
-    this arose due to the file not being present
-  """
-
-  def __init__(self, read_exception):
-    super(ReadFailed, self).__init__(read_exception)
-    self.exception = read_exception
-
-
-class FileMissing(ReadFailed):
-  'File does not exist.'
-
-  def __init__(self):
-    super(FileMissing, self).__init__('File does not exist')
-
-
-def load_processed_files(path):
-  """
-  Loads a dictionary of 'path => last modified timestamp' mappings, as
-  persisted by :func:`~stem.descriptor.reader.save_processed_files`, from a
-  file.
-
-  :param str path: location to load the processed files dictionary from
-
-  :returns: **dict** of 'path (**str**) => last modified unix timestamp
-    (**int**)' mappings
-
-  :raises:
-    * **IOError** if unable to read the file
-    * **TypeError** if unable to parse the file's contents
-  """
-
-  processed_files = {}
-
-  with open(path, 'rb') as input_file:
-    for line in input_file.readlines():
-      line = stem.util.str_tools._to_unicode(line.strip())
-
-      if not line:
-        continue  # skip blank lines
-
-      if ' ' not in line:
-        raise TypeError('Malformed line: %s' % line)
-
-      path, timestamp = line.rsplit(' ', 1)
-
-      if not os.path.isabs(path):
-        raise TypeError("'%s' is not an absolute path" % path)
-      elif not timestamp.isdigit():
-        raise TypeError("'%s' is not an integer timestamp" % timestamp)
-
-      processed_files[path] = int(timestamp)
-
-  return processed_files
-
-
-def save_processed_files(path, processed_files):
-  """
-  Persists a dictionary of 'path => last modified timestamp' mappings (as
-  provided by the DescriptorReader's
-  :func:`~stem.descriptor.reader.DescriptorReader.get_processed_files` method)
-  so that they can be loaded later and applied to another
-  :class:`~stem.descriptor.reader.DescriptorReader`.
-
-  :param str path: location to save the processed files dictionary to
-  :param dict processed_files: 'path => last modified' mappings
-
-  :raises:
-    * **IOError** if unable to write to the file
-    * **TypeError** if processed_files is of the wrong type
-  """
-
-  # makes the parent directory if it doesn't already exist
-
-  try:
-    path_dir = os.path.dirname(path)
-
-    if not os.path.exists(path_dir):
-      os.makedirs(path_dir)
-  except OSError as exc:
-    raise IOError(exc)
-
-  with open(path, 'w') as output_file:
-    for path, timestamp in list(processed_files.items()):
-      if not os.path.isabs(path):
-        raise TypeError('Only absolute paths are acceptable: %s' % path)
-
-      output_file.write('%s %i\n' % (path, timestamp))
-
-
-class DescriptorReader(object):
-  """
-  Iterator for the descriptor data on the local file system. This can process
-  text files, tarball archives (gzip or bzip2), or recurse directories.
-
-  By default this limits the number of descriptors that we'll read ahead before
-  waiting for our caller to fetch some of them. This is included to avoid
-  unbounded memory usage.
-
-  Our persistence_path argument is a convenient method to persist the listing
-  of files we have processed between runs, however it doesn't allow for error
-  handling. If you want that then use the
-  :func:`~stem.descriptor.reader.load_processed_files` and
-  :func:`~stem.descriptor.reader.save_processed_files` functions instead.
-
-  :param str,list target: path or list of paths for files or directories to be read from
-  :param bool validate: checks the validity of the descriptor's content if
-    **True**, skips these checks otherwise
-  :param bool follow_links: determines if we'll follow symlinks when traversing
-    directories
-  :param int buffer_size: descriptors we'll buffer before waiting for some to
-    be read, this is unbounded if zero
-  :param str persistence_path: if set we will load and save processed file
-    listings from this path, errors are ignored
-  :param stem.descriptor.__init__.DocumentHandler document_handler: method in
-    which to parse :class:`~stem.descriptor.networkstatus.NetworkStatusDocument`
-  :param dict kwargs: additional arguments for the descriptor constructor
-  """
-
-  def __init__(self, target, validate = False, follow_links = False, buffer_size = 100, persistence_path = None, document_handler = stem.descriptor.DocumentHandler.ENTRIES, **kwargs):
-    self._targets = [target] if isinstance(target, (bytes, str)) else target
-
-    # expand any relative paths we got
-
-    self._targets = list(map(os.path.abspath, self._targets))
-
-    self._validate = validate
-    self._follow_links = follow_links
-    self._persistence_path = persistence_path
-    self._document_handler = document_handler
-    self._kwargs = kwargs
-    self._read_listeners = []
-    self._skip_listeners = []
-    self._processed_files = {}
-
-    self._reader_thread = None
-    self._reader_thread_lock = threading.RLock()
-
-    self._iter_lock = threading.RLock()
-    self._iter_notice = threading.Event()
-
-    self._is_stopped = threading.Event()
-    self._is_stopped.set()
-
-    # Descriptors that we have read but not yet provided to the caller. A
-    # FINISHED entry is used by the reading thread to indicate the end.
-
-    self._unreturned_descriptors = queue.Queue(buffer_size)
-
-    if self._persistence_path:
-      try:
-        processed_files = load_processed_files(self._persistence_path)
-        self.set_processed_files(processed_files)
-      except:
-        pass
-
-  def get_processed_files(self):
-    """
-    For each file that we have read descriptor data from this provides a
-    mapping of the form...
-
-    ::
-
-      absolute path (str) => last modified unix timestamp (int)
-
-    This includes entries set through the
-    :func:`~stem.descriptor.reader.DescriptorReader.set_processed_files`
-    method. Each run resets this to only the files that were present during
-    that run.
-
-    :returns: **dict** with the absolute paths and unix timestamp for the last
-      modified times of the files we have processed
-    """
-
-    # make sure that we only provide back absolute paths
-    return dict((os.path.abspath(k), v) for (k, v) in list(self._processed_files.items()))
-
-  def set_processed_files(self, processed_files):
-    """
-    Sets the listing of the files we have processed. Most often this is used
-    with a newly created :class:`~stem.descriptor.reader.DescriptorReader` to
-    pre-populate the listing of descriptor files that we have seen.
-
-    :param dict processed_files: mapping of absolute paths (**str**) to unix
-      timestamps for the last modified time (**int**)
-    """
-
-    self._processed_files = dict(processed_files)
-
-  def register_read_listener(self, listener):
-    """
-    Registers a listener for when files are read. This is executed prior to
-    processing files. Listeners are expected to be of the form...
-
-    ::
-
-      my_listener(path)
-
-    :param functor listener: functor to be notified when files are read
-    """
-
-    self._read_listeners.append(listener)
-
-  def register_skip_listener(self, listener):
-    """
-    Registers a listener for files that are skipped. This listener is expected
-    to be a functor of the form...
-
-    ::
-
-      my_listener(path, exception)
-
-    :param functor listener: functor to be notified of files that are skipped
-      to read errors or because they couldn't be parsed as valid descriptor data
-    """
-
-    self._skip_listeners.append(listener)
-
-  def get_buffered_descriptor_count(self):
-    """
-    Provides the number of descriptors that are waiting to be iterated over.
-    This is limited to the buffer_size that we were constructed with.
-
-    :returns: **int** for the estimated number of currently enqueued
-      descriptors, this is not entirely reliable
-    """
-
-    return self._unreturned_descriptors.qsize()
-
-  def start(self):
-    """
-    Starts reading our descriptor files.
-
-    :raises: **ValueError** if we're already reading the descriptor files
-    """
-
-    with self._reader_thread_lock:
-      if self._reader_thread:
-        raise ValueError('Already running, you need to call stop() first')
-      else:
-        self._is_stopped.clear()
-        self._reader_thread = threading.Thread(target = self._read_descriptor_files, name='Descriptor reader')
-        self._reader_thread.setDaemon(True)
-        self._reader_thread.start()
-
-  def stop(self):
-    """
-    Stops further reading of descriptor files.
-    """
-
-    with self._reader_thread_lock:
-      self._is_stopped.set()
-      self._iter_notice.set()
-
-      # clears our queue to unblock enqueue calls
-
-      try:
-        while True:
-          self._unreturned_descriptors.get_nowait()
-      except queue.Empty:
-        pass
-
-      self._reader_thread.join()
-      self._reader_thread = None
-
-      if self._persistence_path:
-        try:
-          processed_files = self.get_processed_files()
-          save_processed_files(self._persistence_path, processed_files)
-        except:
-          pass
-
-  def _read_descriptor_files(self):
-    new_processed_files = {}
-    remaining_files = list(self._targets)
-
-    while remaining_files and not self._is_stopped.is_set():
-      target = remaining_files.pop(0)
-
-      if not os.path.exists(target):
-        self._notify_skip_listeners(target, FileMissing())
-        continue
-
-      if os.path.isdir(target):
-        walker = os.walk(target, followlinks = self._follow_links)
-        self._handle_walker(walker, new_processed_files)
-      else:
-        self._handle_file(target, new_processed_files)
-
-    self._processed_files = new_processed_files
-
-    if not self._is_stopped.is_set():
-      self._unreturned_descriptors.put(FINISHED)
-
-    self._iter_notice.set()
-
-  def __iter__(self):
-    with self._iter_lock:
-      while not self._is_stopped.is_set():
-        try:
-          descriptor = self._unreturned_descriptors.get_nowait()
-
-          if descriptor == FINISHED:
-            break
-          else:
-            yield descriptor
-        except queue.Empty:
-          self._iter_notice.wait()
-          self._iter_notice.clear()
-
-  def _handle_walker(self, walker, new_processed_files):
-    for root, _, files in walker:
-      for filename in files:
-        self._handle_file(os.path.join(root, filename), new_processed_files)
-
-        # this can take a while if, say, we're including the root directory
-        if self._is_stopped.is_set():
-          return
-
-  def _handle_file(self, target, new_processed_files):
-    # This is a file. Register its last modified timestamp and check if
-    # it's a file that we should skip.
-
-    try:
-      last_modified = int(os.stat(target).st_mtime)
-      last_used = self._processed_files.get(target)
-      new_processed_files[target] = last_modified
-    except OSError as exc:
-      self._notify_skip_listeners(target, ReadFailed(exc))
-      return
-
-    if last_used and last_used >= last_modified:
-      self._notify_skip_listeners(target, AlreadyRead(last_modified, last_used))
-      return
-
-    # Block devices and such are never descriptors, and can cause us to block
-    # for quite a while so skipping anything that isn't a regular file.
-
-    if not os.path.isfile(target):
-      return
-
-    # The mimetypes module only checks the file extension. To actually
-    # check the content (like the 'file' command) we'd need something like
-    # pymagic (https://github.com/cloudburst/pymagic).
-
-    target_type = mimetypes.guess_type(target)
-
-    if target_type[0] in (None, 'text/plain'):
-      # either '.txt' or an unknown type
-      self._handle_descriptor_file(target, target_type)
-    elif stem.util.system.is_tarfile(target):
-      # handles gzip, bz2, and decompressed tarballs among others
-      self._handle_archive(target)
-    else:
-      self._notify_skip_listeners(target, UnrecognizedType(target_type))
-
-  def _handle_descriptor_file(self, target, mime_type):
-    try:
-      self._notify_read_listeners(target)
-
-      with open(target, 'rb') as target_file:
-        for desc in stem.descriptor.parse_file(target_file, validate = self._validate, document_handler = self._document_handler, **self._kwargs):
-          if self._is_stopped.is_set():
-            return
-
-          self._unreturned_descriptors.put(desc)
-          self._iter_notice.set()
-    except TypeError:
-      self._notify_skip_listeners(target, UnrecognizedType(mime_type))
-    except ValueError as exc:
-      self._notify_skip_listeners(target, ParsingFailure(exc))
-    except IOError as exc:
-      self._notify_skip_listeners(target, ReadFailed(exc))
-
-  def _handle_archive(self, target):
-    try:
-      with tarfile.open(target) as tar_file:
-        self._notify_read_listeners(target)
-
-        for tar_entry in tar_file:
-          if tar_entry.isfile():
-            entry = tar_file.extractfile(tar_entry)
-
-            try:
-              for desc in stem.descriptor.parse_file(entry, validate = self._validate, document_handler = self._document_handler, **self._kwargs):
-                if self._is_stopped.is_set():
-                  return
-
-                desc._set_path(os.path.abspath(target))
-                desc._set_archive_path(tar_entry.name)
-                self._unreturned_descriptors.put(desc)
-                self._iter_notice.set()
-            except TypeError as exc:
-              self._notify_skip_listeners(target, ParsingFailure(exc))
-            except ValueError as exc:
-              self._notify_skip_listeners(target, ParsingFailure(exc))
-            finally:
-              entry.close()
-    except IOError as exc:
-      self._notify_skip_listeners(target, ReadFailed(exc))
-
-  def _notify_read_listeners(self, path):
-    for listener in self._read_listeners:
-      listener(path)
-
-  def _notify_skip_listeners(self, path, exception):
-    for listener in self._skip_listeners:
-      listener(path, exception)
-
-  def __enter__(self):
-    self.start()
-    return self
-
-  def __exit__(self, exit_type, value, traceback):
-    self.stop()
diff --git a/test/integ/control/controller.py b/test/integ/control/controller.py
index 257d9fbc..6903c65b 100644
--- a/test/integ/control/controller.py
+++ b/test/integ/control/controller.py
@@ -12,7 +12,6 @@ import unittest
 
 import stem.connection
 import stem.control
-import stem.descriptor.reader
 import stem.descriptor.router_status_entry
 import stem.directory
 import stem.response.protocolinfo
diff --git a/test/settings.cfg b/test/settings.cfg
index 1ec8176e..d22bec42 100644
--- a/test/settings.cfg
+++ b/test/settings.cfg
@@ -229,7 +229,6 @@ pyflakes.ignore stem/util/__init__.py => undefined name 'unicode'
 pyflakes.ignore stem/util/conf.py => undefined name 'unicode'
 pyflakes.ignore stem/util/test_tools.py => 'pyflakes' imported but unused
 pyflakes.ignore stem/util/test_tools.py => 'pycodestyle' imported but unused
-pyflakes.ignore test/unit/descriptor/reader.py => 'bz2' imported but unused
 pyflakes.ignore test/unit/response/events.py => 'from stem import *' used; unable to detect undefined names
 pyflakes.ignore test/unit/response/events.py => *may be undefined, or defined from star imports: stem
 pyflakes.ignore stem/util/str_tools.py => undefined name 'unicode'
@@ -254,8 +253,6 @@ test.unit_tests
 |test.unit.installation.TestInstallation
 |test.unit.descriptor.descriptor.TestDescriptor
 |test.unit.descriptor.compression.TestCompression
-|test.unit.descriptor.export.TestExport
-|test.unit.descriptor.reader.TestDescriptorReader
 |test.unit.descriptor.collector.TestCollector
 |test.unit.descriptor.remote.TestDescriptorDownloader
 |test.unit.descriptor.server_descriptor.TestServerDescriptor
diff --git a/test/unit/descriptor/export.py b/test/unit/descriptor/export.py
deleted file mode 100644
index d27ed241..00000000
--- a/test/unit/descriptor/export.py
+++ /dev/null
@@ -1,94 +0,0 @@
-"""
-Unit tests for stem.descriptor.export.
-"""
-
-import io
-import unittest
-
-import stem.prereq
-
-from stem.descriptor.server_descriptor import RelayDescriptor, BridgeDescriptor
-from stem.descriptor.export import export_csv, export_csv_file
-
-
-class TestExport(unittest.TestCase):
-  def test_minimal_descriptor(self):
-    """
-    Exports a single minimal tor server descriptor.
-    """
-
-    desc = RelayDescriptor.create({
-      'router': 'caerSidi 71.35.133.197 9001 0 0',
-      'published': '2012-03-01 17:15:27',
-    })
-
-    desc_csv = export_csv(desc, included_fields = ('nickname', 'address', 'published'), header = False)
-    expected = 'caerSidi,71.35.133.197,2012-03-01 17:15:27\n'
-    self.assertEqual(expected, desc_csv)
-
-    desc_csv = export_csv(desc, included_fields = ('nickname', 'address', 'published'), header = True)
-    expected = 'nickname,address,published\n' + expected
-    self.assertEqual(expected, desc_csv)
-
-  def test_multiple_descriptors(self):
-    """
-    Exports multiple descriptors, making sure that we get them back in the same
-    order.
-    """
-
-    nicknames = ('relay1', 'relay3', 'relay2', 'caerSidi', 'zeus')
-    descriptors = []
-
-    for nickname in nicknames:
-      router_line = '%s 71.35.133.197 9001 0 0' % nickname
-      descriptors.append(RelayDescriptor.create({'router': router_line}))
-
-    expected = '\n'.join(nicknames) + '\n'
-    self.assertEqual(expected, export_csv(descriptors, included_fields = ('nickname',), header = False))
-
-  def test_file_output(self):
-    """
-    Basic test for the export_csv_file() function, checking that it provides
-    the same output as export_csv().
-    """
-
-    desc = RelayDescriptor.create()
-    desc_csv = export_csv(desc)
-
-    csv_buffer = io.StringIO()
-    export_csv_file(csv_buffer, desc)
-
-    self.assertEqual(desc_csv, csv_buffer.getvalue())
-
-  def test_excludes_private_attr(self):
-    """
-    Checks that the default attributes for our csv output doesn't include private fields.
-    """
-
-    desc = RelayDescriptor.create()
-    desc_csv = export_csv(desc)
-
-    self.assertTrue(',signature' in desc_csv)
-    self.assertFalse(',_digest' in desc_csv)
-    self.assertFalse(',_annotation_lines' in desc_csv)
-
-  def test_empty_input(self):
-    """
-    Exercises when we don't provide any descriptors.
-    """
-    self.assertEqual('', export_csv([]))
-
-  def test_invalid_attributes(self):
-    """
-    Attempts to make a csv with attributes that don't exist.
-    """
-
-    desc = RelayDescriptor.create()
-    self.assertRaises(ValueError, export_csv, desc, ('nickname', 'blarg!'))
-
-  def test_multiple_descriptor_types(self):
-    """
-    Attempts to make a csv with multiple descriptor types.
-    """
-
-    self.assertRaises(ValueError, export_csv, (RelayDescriptor.create(), BridgeDescriptor.create()))
diff --git a/test/unit/descriptor/reader.py b/test/unit/descriptor/reader.py
deleted file mode 100644
index f49183e5..00000000
--- a/test/unit/descriptor/reader.py
+++ /dev/null
@@ -1,625 +0,0 @@
-"""
-Unit tests for stem.descriptor.reader.
-"""
-
-import getpass
-import io
-import os
-import shutil
-import signal
-import sys
-import tarfile
-import tempfile
-import time
-import unittest
-
-import stem.descriptor.reader
-import stem.util.str_tools
-import stem.util.system
-
-import test.unit.descriptor
-
-from unittest.mock import patch
-
-BASIC_LISTING = """
-/tmp 123
-/bin/grep 4567
-/file with spaces/and \\ stuff 890
-"""
-
-my_dir = os.path.dirname(__file__)
-DESCRIPTOR_TEST_DATA = os.path.join(my_dir, 'data')
-
-TAR_DESCRIPTORS = None
-
-
-def _get_raw_tar_descriptors():
-  global TAR_DESCRIPTORS
-
-  if not TAR_DESCRIPTORS:
-    test_path = os.path.join(DESCRIPTOR_TEST_DATA, 'descriptor_archive.tar')
-    raw_descriptors = []
-
-    with tarfile.open(test_path) as tar_file:
-      for tar_entry in tar_file:
-        if tar_entry.isfile():
-          entry = tar_file.extractfile(tar_entry)
-          entry.readline()  # strip header
-          raw_descriptors.append(entry.read().decode('utf-8', 'replace'))
-          entry.close()
-
-    TAR_DESCRIPTORS = raw_descriptors
-
-  return TAR_DESCRIPTORS
-
-
-class SkipListener:
-  def __init__(self):
-    self.results = []  # (path, exception) tuples that we've received
-
-  def listener(self, path, exception):
-    self.results.append((path, exception))
-
-
-class TestDescriptorReader(unittest.TestCase):
-  def setUp(self):
-    self.temp_directory = tempfile.mkdtemp()
-    self.test_listing_path = os.path.join(self.temp_directory, 'descriptor_processed_files')
-
-  def tearDown(self):
-    shutil.rmtree(self.temp_directory)
-
-  @patch('stem.descriptor.reader.open', create = True)
-  def test_load_processed_files(self, open_mock):
-    """
-    Successful load of content.
-    """
-
-    test_lines = (
-      '/dir/ 0',
-      '/dir/file 12345',
-      '/dir/file with spaces 7138743',
-      '  /dir/with extra space 12345   ',
-      '   \t   ',
-      '',
-      '/dir/after empty line 12345',
-    )
-
-    expected_value = {
-      '/dir/': 0,
-      '/dir/file': 12345,
-      '/dir/file with spaces': 7138743,
-      '/dir/with extra space': 12345,
-      '/dir/after empty line': 12345,
-    }
-
-    open_mock.return_value = io.BytesIO(stem.util.str_tools._to_bytes('\n'.join(test_lines)))
-    self.assertEqual(expected_value, stem.descriptor.reader.load_processed_files(''))
-
-  @patch('stem.descriptor.reader.open', create = True)
-  def test_load_processed_files_empty(self, open_mock):
-    """
-    Tests the load_processed_files() function with an empty file.
-    """
-
-    open_mock.return_value = io.BytesIO(stem.util.str_tools._to_bytes(''))
-    self.assertEqual({}, stem.descriptor.reader.load_processed_files(''))
-
-  @patch('stem.descriptor.reader.open', create = True)
-  def test_load_processed_files_no_file(self, open_mock):
-    """
-    Tests the load_processed_files() function content that is malformed because
-    it is missing the file path.
-    """
-
-    open_mock.return_value = io.BytesIO(stem.util.str_tools._to_bytes(' 12345'))
-    self.assertRaises(TypeError, stem.descriptor.reader.load_processed_files, '')
-
-  @patch('stem.descriptor.reader.open', create = True)
-  def test_load_processed_files_no_timestamp(self, open_mock):
-    """
-    Tests the load_processed_files() function content that is malformed because
-    it is missing the timestamp.
-    """
-
-    open_mock.return_value = io.BytesIO(stem.util.str_tools._to_bytes('/dir/file '))
-    self.assertRaises(TypeError, stem.descriptor.reader.load_processed_files, '')
-
-  @patch('stem.descriptor.reader.open', create = True)
-  def test_load_processed_files_malformed_file(self, open_mock):
-    """
-    Tests the load_processed_files() function content that is malformed because
-    it has an invalid file path.
-    """
-
-    open_mock.return_value = io.BytesIO(stem.util.str_tools._to_bytes('not_an_absolute_file 12345'))
-    self.assertRaises(TypeError, stem.descriptor.reader.load_processed_files, '')
-
-  @patch('stem.descriptor.reader.open', create = True)
-  def test_load_processed_files_malformed_timestamp(self, open_mock):
-    """
-    Tests the load_processed_files() function content that is malformed because
-    it has a non-numeric timestamp.
-    """
-
-    open_mock.return_value = io.BytesIO(stem.util.str_tools._to_bytes('/dir/file 123a'))
-    self.assertRaises(TypeError, stem.descriptor.reader.load_processed_files, '')
-
-  def test_load_processed_files_from_data(self):
-    """
-    Basic sanity test for loading a processed files listing from disk.
-    """
-
-    test_listing_path = self._make_processed_files_listing(BASIC_LISTING)
-    loaded_listing = stem.descriptor.reader.load_processed_files(test_listing_path)
-
-    expected_listing = {
-      '/tmp': 123,
-      '/bin/grep': 4567,
-      '/file with spaces/and \\ stuff': 890,
-    }
-
-    self.assertEqual(expected_listing, loaded_listing)
-
-  def test_load_processed_files_missing(self):
-    """
-    Tests the load_processed_files() function with a file that doesn't exist.
-    """
-
-    self.assertRaises(IOError, stem.descriptor.reader.load_processed_files, '/non-existant/path')
-
-  def test_load_processed_files_permissions(self):
-    """
-    Tests the load_processed_files() function with a file that can't be read
-    due to permissions.
-    """
-
-    # test relies on being unable to read a file
-
-    if getpass.getuser() == 'root':
-      self.skipTest('(running as root)')
-
-    # Skip the test on windows, since you can only set the file's
-    # read-only flag with os.chmod(). For more information see...
-    # http://docs.python.org/library/os.html#os.chmod
-
-    if stem.util.system.is_windows():
-      self.skipTest('(chmod not functional)')
-
-    test_listing_path = self._make_processed_files_listing(BASIC_LISTING)
-    os.chmod(test_listing_path, 0o077)  # remove read permissions
-    self.assertRaises(IOError, stem.descriptor.reader.load_processed_files, test_listing_path)
-
-  def test_save_processed_files(self):
-    """
-    Basic sanity test for persisting files listings to disk.
-    """
-
-    initial_listing = {
-      '/tmp': 123,
-      '/bin/grep': 4567,
-      '/file with spaces/and \\ stuff': 890,
-    }
-
-    # saves the initial_listing to a file then reloads it
-
-    stem.descriptor.reader.save_processed_files(self.test_listing_path, initial_listing)
-    loaded_listing = stem.descriptor.reader.load_processed_files(self.test_listing_path)
-
-    self.assertEqual(initial_listing, loaded_listing)
-
-  def test_save_processed_files_malformed(self):
-    """
-    Tests the save_processed_files() function with malformed data.
-    """
-
-    missing_filename = {'': 123}
-    relative_filename = {'foobar': 123}
-    string_timestamp = {'/tmp': '123a'}
-    temp_path = tempfile.mkstemp(prefix = 'stem-unit-tests-', text = True)[1]
-
-    for listing in (missing_filename, relative_filename, string_timestamp):
-      self.assertRaises(TypeError, stem.descriptor.reader.save_processed_files, temp_path, listing)
-
-    # Though our attempts to save the processed files fail we'll write an empty
-    # file. Cleaning it up.
-
-    try:
-      os.remove(temp_path)
-    except:
-      pass
-
-  def test_basic_example(self):
-    """
-    Exercises something similar to the first example in the header
-    documentation, checking that some of the contents match what we'd expect.
-    """
-
-    # snag some of the plaintext descriptors so we can later make sure that we
-    # iterate over them
-
-    descriptor_entries = []
-
-    descriptor_path = os.path.join(DESCRIPTOR_TEST_DATA, 'example_descriptor')
-
-    with open(descriptor_path) as descriptor_file:
-      descriptor_file.readline()  # strip header
-      descriptor_entries.append(descriptor_file.read())
-
-    # running this test multiple times to flush out concurrency issues
-
-    for _ in range(15):
-      remaining_entries = list(descriptor_entries)
-
-      with stem.descriptor.reader.DescriptorReader(descriptor_path) as reader:
-        for descriptor in reader:
-          descriptor_str = str(descriptor)
-
-          if descriptor_str in remaining_entries:
-            remaining_entries.remove(descriptor_str)
-          else:
-            # iterator is providing output that we didn't expect
-            self.fail()
-
-      # check that we've seen all of the descriptor_entries
-      self.assertTrue(len(remaining_entries) == 0)
-
-  def test_multiple_runs(self):
-    """
-    Runs a DescriptorReader instance multiple times over the same content,
-    making sure that it can be used repeatedly.
-    """
-
-    descriptor_path = os.path.join(DESCRIPTOR_TEST_DATA, 'example_descriptor')
-    reader = stem.descriptor.reader.DescriptorReader(descriptor_path)
-
-    with reader:
-      self.assertEqual(1, len(list(reader)))
-
-    # run it a second time, this shouldn't provide any descriptors because we
-    # have already read it
-
-    with reader:
-      self.assertEqual(0, len(list(reader)))
-
-    # clear the DescriptorReader's memory of seeing the file and run it again
-
-    reader.set_processed_files([])
-
-    with reader:
-      self.assertEqual(1, len(list(reader)))
-
-  def test_buffer_size(self):
-    """
-    Checks that we can process sets of descriptors larger than our buffer size,
-    that we don't exceed it, and that we can still stop midway through reading
-    them.
-    """
-
-    reader = stem.descriptor.reader.DescriptorReader(DESCRIPTOR_TEST_DATA, buffer_size = 2)
-
-    with reader:
-      self.assertTrue(reader.get_buffered_descriptor_count() <= 2)
-      time.sleep(0.001)
-      self.assertTrue(reader.get_buffered_descriptor_count() <= 2)
-
-  def test_persistence_path(self):
-    """
-    Check that the persistence_path argument loads and saves a a processed
-    files listing.
-    """
-
-    descriptor_path = os.path.join(DESCRIPTOR_TEST_DATA, 'example_descriptor')
-
-    # First run where the persistence_path doesn't yet exist. This just tests
-    # the saving functionality.
-
-    reader = stem.descriptor.reader.DescriptorReader(descriptor_path, persistence_path = self.test_listing_path)
-
-    with reader:
-      self.assertEqual(1, len(list(reader)))
-
-    # check that we've saved reading example_descriptor
-    self.assertTrue(os.path.exists(self.test_listing_path))
-
-    with open(self.test_listing_path) as persistence_file:
-      persistance_file_contents = persistence_file.read()
-      self.assertTrue(persistance_file_contents.startswith(descriptor_path))
-
-    # Try running again with a new reader but the same persistance path, if it
-    # reads and takes the persistence_path into account then it won't read the
-    # descriptor file. This in essence just tests its loading functionality.
-
-    reader = stem.descriptor.reader.DescriptorReader(descriptor_path, persistence_path = self.test_listing_path)
-
-    with reader:
-      self.assertEqual(0, len(list(reader)))
-
-  def test_archived_paths(self):
-    """
-    Checks the get_path() and get_archive_path() for a tarball.
-    """
-
-    expected_archive_paths = (
-      'descriptor_archive/0/2/02c311d3d789f3f55c0880b5c85f3c196343552c',
-      'descriptor_archive/1/b/1bb798cae15e21479db0bc700767eee4733e9d4a',
-      'descriptor_archive/1/b/1ef75fef564180d8b3f72c6f8635ff0cd855f92c',
-    )
-
-    test_path = os.path.join(DESCRIPTOR_TEST_DATA, 'descriptor_archive.tar')
-
-    with stem.descriptor.reader.DescriptorReader(test_path) as reader:
-      for desc in reader:
-        self.assertEqual(test_path, desc.get_path())
-        self.assertTrue(desc.get_archive_path() in expected_archive_paths)
-
-  def test_archived_uncompressed(self):
-    """
-    Checks that we can read descriptors from an uncompressed archive.
-    """
-
-    expected_results = _get_raw_tar_descriptors()
-    test_path = os.path.join(DESCRIPTOR_TEST_DATA, 'descriptor_archive.tar')
-
-    with stem.descriptor.reader.DescriptorReader(test_path) as reader:
-      read_descriptors = [str(desc) for desc in list(reader)]
-      self.assertEqual(expected_results, read_descriptors)
-
-  def test_archived_gzip(self):
-    """
-    Checks that we can read descriptors from a gzipped archive.
-    """
-
-    expected_results = _get_raw_tar_descriptors()
-    test_path = os.path.join(DESCRIPTOR_TEST_DATA, 'descriptor_archive.tar.gz')
-
-    with stem.descriptor.reader.DescriptorReader(test_path) as reader:
-      read_descriptors = [str(desc) for desc in list(reader)]
-      self.assertEqual(expected_results, read_descriptors)
-
-  def test_archived_bz2(self):
-    """
-    Checks that we can read descriptors from an bzipped archive.
-    """
-
-    # when python's compiled it only optionally has bz2 support
-
-    try:
-      import bz2
-    except ImportError:
-      self.skipTest('(bz2 unsupported}')
-
-    expected_results = _get_raw_tar_descriptors()
-    test_path = os.path.join(DESCRIPTOR_TEST_DATA, 'descriptor_archive.tar.bz2')
-
-    with stem.descriptor.reader.DescriptorReader(test_path) as reader:
-      read_descriptors = [str(desc) for desc in list(reader)]
-      self.assertEqual(expected_results, read_descriptors)
-
-  def test_stop(self):
-    """
-    Runs a DescriptorReader over the root directory, then checks that calling
-    stop() makes it terminate in a timely fashion.
-    """
-
-    # Skip on windows since SIGALRM is unavailable
-
-    if stem.util.system.is_windows():
-      self.skipTest('(SIGALRM unavailable)')
-
-    is_test_running = True
-    reader = stem.descriptor.reader.DescriptorReader('/usr')
-
-    # Fails the test after a couple seconds if we don't finish successfully.
-    # Depending on what we're blocked on this might not work when the test
-    # fails, requiring that we give a manual kill to the test.
-
-    def timeout_handler(signum, frame):
-      if is_test_running:
-        self.fail()
-
-    signal.signal(signal.SIGALRM, timeout_handler)
-    signal.alarm(2)
-
-    reader.start()
-    time.sleep(0.001)
-    reader.stop()
-    is_test_running = False
-
-  def test_get_processed_files(self):
-    """
-    Checks that get_processed_files() provides the expected results after
-    iterating over our test data.
-    """
-
-    desc_path = os.path.join(DESCRIPTOR_TEST_DATA, 'example_descriptor')
-    last_modified = int(os.stat(desc_path).st_mtime)
-
-    reader = stem.descriptor.reader.DescriptorReader(desc_path)
-
-    with reader:
-      list(reader)  # iterates over all of the descriptors
-
-    self.assertEqual({desc_path: last_modified}, reader.get_processed_files())
-
-  def test_skip_nondescriptor_contents(self):
-    """
-    Checks that the reader properly reports when it skips both binary and
-    plaintext non-descriptor files.
-    """
-
-    skip_listener = SkipListener()
-    reader = stem.descriptor.reader.DescriptorReader(os.path.join(DESCRIPTOR_TEST_DATA, 'unparseable'))
-    reader.register_skip_listener(skip_listener.listener)
-
-    expected_skip_files = ('riddle', 'tiny.png', 'vote', 'new_metrics_type', 'cached-microdesc-consensus_with_carriage_returns', 'extrainfo_nonascii_v3_reqs')
-
-    with reader:
-      list(reader)  # iterates over all of the descriptors
-
-    # strip anything with a .swp suffix (vim tmp files)
-
-    skip_listener.results = [(path, exc) for (path, exc) in skip_listener.results if not path.endswith('.swp')]
-
-    if len(skip_listener.results) != len(expected_skip_files):
-      expected_label = ',\n  '.join(expected_skip_files)
-      results_label = ',\n  '.join(['%s (%s)' % (path, exc) for (path, exc) in skip_listener.results])
-
-      self.fail('Skipped files that we should have been able to parse.\n\nExpected:\n  %s\n\nResult:\n  %s' % (expected_label, results_label))
-
-    for skip_path, skip_exception in skip_listener.results:
-      if not os.path.basename(skip_path) in expected_skip_files:
-        self.fail('Unexpected non-descriptor content: %s' % skip_path)
-
-      self.assertTrue(isinstance(skip_exception, stem.descriptor.reader.UnrecognizedType))
-
-  def test_skip_listener_already_read(self):
-    """
-    Checks that calling set_processed_files() prior to reading makes us skip
-    those files. This also doubles for testing that skip listeners are notified
-    of files that we've already read.
-    """
-
-    # path that we want the DescriptorReader to skip
-
-    test_path = os.path.join(DESCRIPTOR_TEST_DATA, 'example_descriptor')
-    initial_processed_files = {test_path: sys.maxsize}
-
-    skip_listener = SkipListener()
-    reader = stem.descriptor.reader.DescriptorReader(test_path)
-    reader.register_skip_listener(skip_listener.listener)
-    reader.set_processed_files(initial_processed_files)
-
-    self.assertEqual(initial_processed_files, reader.get_processed_files())
-
-    with reader:
-      list(reader)  # iterates over all of the descriptors
-
-    self.assertEqual(1, len(skip_listener.results))
-
-    skipped_path, skip_exception = skip_listener.results[0]
-    self.assertEqual(test_path, skipped_path)
-    self.assertTrue(isinstance(skip_exception, stem.descriptor.reader.AlreadyRead))
-    self.assertEqual(sys.maxsize, skip_exception.last_modified_when_read)
-
-  def test_skip_listener_unrecognized_type(self):
-    """
-    Listens for a file that's skipped because its file type isn't recognized.
-    """
-
-    # types are solely based on file extensions so making something that looks
-    # like an png image
-
-    test_path = os.path.join(self.temp_directory, 'test.png')
-
-    try:
-      test_file = open(test_path, 'w')
-      test_file.write('test data for test_skip_listener_unrecognized_type()')
-      test_file.close()
-
-      skip_listener = SkipListener()
-      reader = stem.descriptor.reader.DescriptorReader(test_path)
-      reader.register_skip_listener(skip_listener.listener)
-
-      with reader:
-        list(reader)  # iterates over all of the descriptors
-
-      self.assertEqual(1, len(skip_listener.results))
-
-      skipped_path, skip_exception = skip_listener.results[0]
-      self.assertEqual(test_path, skipped_path)
-      self.assertTrue(isinstance(skip_exception, stem.descriptor.reader.UnrecognizedType))
-      self.assertTrue(skip_exception.mime_type in (('image/png', None), ('image/x-png', None)))
-    finally:
-      if os.path.exists(test_path):
-        os.remove(test_path)
-
-  def test_skip_listener_read_failure(self):
-    """
-    Listens for a file that's skipped because we lack read permissions.
-    """
-
-    # test relies on being unable to read a file
-
-    if getpass.getuser() == 'root':
-      self.skipTest('(running as root)')
-    elif stem.util.system.is_windows():
-      self.skipTest('(chmod not functional)')
-
-    test_path = os.path.join(self.temp_directory, 'secret_file')
-
-    try:
-      test_file = open(test_path, 'w')
-      test_file.write('test data for test_skip_listener_unrecognized_type()')
-      test_file.close()
-
-      os.chmod(test_path, 0o077)  # remove read permissions
-
-      skip_listener = SkipListener()
-      reader = stem.descriptor.reader.DescriptorReader(test_path)
-      reader.register_skip_listener(skip_listener.listener)
-
-      with reader:
-        list(reader)  # iterates over all of the descriptors
-
-      self.assertEqual(1, len(skip_listener.results))
-
-      skipped_path, skip_exception = skip_listener.results[0]
-      self.assertEqual(test_path, skipped_path)
-      self.assertTrue(isinstance(skip_exception, stem.descriptor.reader.ReadFailed))
-      self.assertTrue(isinstance(skip_exception.exception, IOError))
-    finally:
-      if os.path.exists(test_path):
-        os.remove(test_path)
-
-  def test_skip_listener_file_missing(self):
-    """
-    Listens for a file that's skipped because the file doesn't exist.
-    """
-
-    test_path = '/non-existant/path'
-
-    skip_listener = SkipListener()
-    reader = stem.descriptor.reader.DescriptorReader(test_path)
-    reader.register_skip_listener(skip_listener.listener)
-
-    with reader:
-      list(reader)  # iterates over all of the descriptors
-
-    self.assertEqual(1, len(skip_listener.results))
-
-    skipped_path, skip_exception = skip_listener.results[0]
-    self.assertEqual(test_path, skipped_path)
-    self.assertTrue(isinstance(skip_exception, stem.descriptor.reader.FileMissing))
-
-  def test_unrecognized_metrics_type(self):
-    """
-    Parses a file that has a valid metrics header, but an unrecognized type.
-    """
-
-    test_path = test.unit.descriptor.get_resource('unparseable/new_metrics_type')
-
-    skip_listener = SkipListener()
-    reader = stem.descriptor.reader.DescriptorReader(test_path)
-    reader.register_skip_listener(skip_listener.listener)
-
-    with reader:
-      list(reader)  # iterates over all of the descriptors
-
-    self.assertEqual(1, len(skip_listener.results))
-
-    skipped_path, skip_exception = skip_listener.results[0]
-    self.assertEqual(test_path, skipped_path)
-    self.assertTrue(isinstance(skip_exception, stem.descriptor.reader.UnrecognizedType))
-    self.assertEqual((None, None), skip_exception.mime_type)
-
-  def _make_processed_files_listing(self, contents):
-    """
-    Writes the given 'processed file' listing to disk, returning the path where
-    it is located.
-    """
-
-    with open(self.test_listing_path, 'w') as test_listing_file:
-      test_listing_file.write(contents)
-
-    return self.test_listing_path

    

atagar＠torproject.org

tags

participants (1)