[tor-commits] [stem/master] Revising descriptor exporter and associated tests

atagar at torproject.org atagar at torproject.org
Sat Aug 4 19:19:49 UTC 2012


commit 7022021c4207a0066cb93261629f2ba020d307f6
Author: Damian Johnson <atagar at torproject.org>
Date:   Sat Aug 4 12:13:47 2012 -0700

    Revising descriptor exporter and associated tests
    
    Several changes for the prior csv export functionality, especially its tests.
    Most notably this includes...
    
    * rewrite of the tests to be simpler and break down by test cases
    * newline breaks rather than windows style '\r\n'
    * excluding private attributes (attributes that start with an underscore)
    * writing directly to a file in export_csv_file() rather than buffering to a string first
    * general refactoring to hopefully improve readability
---
 run_tests.py                     |    6 +-
 stem/descriptor/__init__.py      |    2 +-
 stem/descriptor/export.py        |  148 ++++++++++++++----------------
 test/unit/descriptor/__init__.py |    2 +-
 test/unit/descriptor/export.py   |  189 ++++++++++++++------------------------
 5 files changed, 142 insertions(+), 205 deletions(-)

diff --git a/run_tests.py b/run_tests.py
index bed8e50..4ad551a 100755
--- a/run_tests.py
+++ b/run_tests.py
@@ -16,6 +16,7 @@ import test.output
 import test.runner
 import test.check_whitespace
 import test.unit.connection.authentication
+import test.unit.descriptor.export
 import test.unit.descriptor.reader
 import test.unit.descriptor.server_descriptor
 import test.unit.descriptor.extrainfo_descriptor
@@ -50,8 +51,6 @@ import test.integ.util.proc
 import test.integ.util.system
 import test.integ.process
 import test.integ.version
-#TODO move this by other descriptor tests
-import test.unit.descriptor.export
 
 import stem.prereq
 import stem.util.conf
@@ -109,6 +108,7 @@ UNIT_TESTS = (
   test.unit.util.proc.TestProc,
   test.unit.util.system.TestSystem,
   test.unit.util.tor_tools.TestTorTools,
+  test.unit.descriptor.export.TestExport,
   test.unit.descriptor.reader.TestDescriptorReader,
   test.unit.descriptor.server_descriptor.TestServerDescriptor,
   test.unit.descriptor.extrainfo_descriptor.TestExtraInfoDescriptor,
@@ -123,9 +123,7 @@ UNIT_TESTS = (
   test.unit.response.protocolinfo.TestProtocolInfoResponse,
   test.unit.response.authchallenge.TestAuthChallengeResponse,
   test.unit.connection.authentication.TestAuthenticate,
-  test.unit.descriptor.export.TestExport,
 )
-#  TODO move export test to proper location
 
 INTEG_TESTS = (
   test.integ.util.conf.TestConf,
diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py
index bb6f874..45cd016 100644
--- a/stem/descriptor/__init__.py
+++ b/stem/descriptor/__init__.py
@@ -13,7 +13,7 @@ Package for parsing and processing descriptor data.
 """
 
 __all__ = [
-  "descriptor",
+  "export",
   "reader",
   "extrainfo_descriptor",
   "server_descriptor",
diff --git a/stem/descriptor/export.py b/stem/descriptor/export.py
index 89a5644..16a148d 100644
--- a/stem/descriptor/export.py
+++ b/stem/descriptor/export.py
@@ -1,97 +1,89 @@
-import os, csv, sets, cStringIO
+"""
+Utilities for exporting descriptors to other formats.
+"""
 
+import csv
+import cStringIO
 
-def export_csv(descriptor, include_fields=(), exclude_fields=()):
+import stem.descriptor
+
+class _ExportDialect(csv.excel):
+  lineterminator = '\n'
+
+def export_csv(descriptors, included_fields = (), excluded_fields = (), header = True):
   """
-  Takes a single descriptor object, puts it in a list, and passes it to
-  descriptors_csv_exp to build a csv.
+  Provides a newline separated CSV for one or more descriptors. If simply
+  provided with descriptors then the CSV contains all of its attributes,
+  labelled with a header row. Either 'included_fields' or 'excluded_fields' can
+  be used for more granular control over its attributes and the order.
   
-  :param object descriptor: single descriptor whose attributes will be returned as a string.
-  :param list include_fields: list of attribute fields to include in the csv string.
-  :param list exclude_fields: list of attribute fields to exclude from csv string.
+  :param Descriptor,list descriptors: descriptor or list of descriptors to be exported
+  :param list included_fields: attributes to include in the csv
+  :param list excluded_fields: attributes to exclude from the csv
+  :param bool header: if True then the first line will be a comma separated list of the attribute names
   
-  :returns: single csv line as a string with one descriptor attribute per cell.
+  :returns: str of the CSV for the descriptors, one per line
+  :raises: ValueError if descriptors contain more than one descriptor type
   """
   
-  descr = (descriptor,)
-  return export_csvs(descr, include_fields=include_fields, exclude_fields=exclude_fields)
+  output_buffer = cStringIO.StringIO()
+  export_csv_file(output_buffer, descriptors, included_fields, excluded_fields, header)
+  return output_buffer.getvalue()
 
-
-def export_csvs(descriptors, include_fields=[], exclude_fields=[], header=False):
+def export_csv_file(output_file, descriptors, included_fields = (), excluded_fields = (), header = True):
   """
-  Takes an iterable of descriptors, returns a string with one line per descriptor
-  where each line is a comma separated list of descriptor attributes.
+  Similar to :func:`stem.descriptor.export.export_csv`, except that the CSV is
+  written directly to a file.
   
-  :param list descrs: List of descriptor objects whose attributes will be written.
-  :param list include_fields: list of attribute fields to include in the csv string.
-  :param list exclude_fields: list of attribute fields to exclude from csv string.
-  :param bool header: whether or not a header is requested.
+  :param file output_file: file to be written to
+  :param Descriptor,list descriptors: descriptor or list of descriptors to be exported
+  :param list included_fields: attributes to include in the csv
+  :param list excluded_fields: attributes to exclude from the csv
+  :param bool header: if True then the first line will be a comma separated list of the attribute names
   
-  :returns: csv string with one descriptor per line and one attribute per cell.
-  :raises: ValueError if more than one descriptor type (e.g. server_descriptor,
-    extrainfo_descriptor) is provided in the iterable.
+  :returns: str of the CSV for the descriptors, one per line
+  :raises: ValueError if descriptors contain more than one descriptor type
   """
   
-  # Need a file object to write to with DictWriter.
-  temp_file = cStringIO.StringIO()
+  if isinstance(descriptors, stem.descriptor.Descriptor):
+    descriptors = (descriptors,)
   
-  first = True
+  if not descriptors:
+    return
   
-  for desc in descriptors:
-    #umport sys
-    attr = vars(desc)
-    
-    # Defining incl_fields and the dwriter object requires having access
-    # to a descriptor object.
-    if first:
-      # All descriptor objects should be of the same type
-      # (i.e. server_descriptor.RelayDesrciptor)
-      desc_type = type(desc)
-      
-      # define incl_fields, 4 cases where final case is incl_fields already
-      # defined and excl_fields left blank, so no action is necessary.
-      if not include_fields and exclude_fields:
-        incl = set(attr.keys())
-        include_fields = list(incl.difference(exclude_fields))
-      
-      elif not include_fields and not exclude_fields:
-        include_fields = attr.keys()
-      
-      elif include_fields and exclude_fields:
-        incl = set(include_fields)
-        include_fields = list(incl.difference(exclude_fields))
-      
-      dwriter = csv.DictWriter(temp_file, include_fields, extrasaction='ignore')
-      
-      if header:
-        dwriter.writeheader()
-      first = False
-    
-    if desc_type == type(desc):
-      dwriter.writerow(attr)
-    else:
-      raise ValueError('More than one descriptor type provided. Started with a %s, and just got a %s' % (desc_type, type(desc)))
+  descriptor_type = type(descriptors[0])
+  descriptor_type_label = descriptor_type.__name__
+  included_fields = list(included_fields)
   
-  return temp_file.getvalue()
-  # cStringIO files are closed automatically when the current scope is exited.
-
-def export_csv_file(descriptors, document, include_fields=(), exclude_fields=(), header=True):
-  """
-  Writes descriptor attributes to a csv file on disk.
+  # If the user didn't specify the fields to include then export everything,
+  # ordered alphabetically. If they did specify fields then make sure that
+  # they exist.
   
-  Calls get_csv_lines with the given argument, then writes the returned string
-  to a file location specified by document_location.
-  Precondition that param document has a 'write' attribute.
+  desc_attr = sorted(vars(descriptors[0]).keys())
   
-  :param list descriptors: descriptor objects with attributes to export as csv file.
-  :param object document: File object to be written to.
-  :param bool header: defaults to true, determines if document will have a header row.
-  :param list include_fields: list of attribute fields to include in the csv line.
-  :param list exclude_fields: list of attribute fields to exclude from csv line.
-  """
+  if included_fields:
+    for field in included_fields:
+      if not field in desc_attr:
+        raise ValueError("%s does not have a '%s' attribute, valid fields are: %s" % (descriptor_type_label, field, ", ".join(desc_attr)))
+  else:
+    included_fields = [attr for attr in desc_attr if not attr.startswith('_')]
+  
+  for field in excluded_fields:
+    try:
+      included_fields.remove(field)
+    except ValueError:
+      pass
+  
+  writer = csv.DictWriter(output_file, included_fields, dialect = _ExportDialect(), extrasaction='ignore')
+  
+  if header:
+    writer.writeheader()
   
-  try:
-    document.write(export_csvs(descriptors, include_fields=include_fields, exclude_fields=exclude_fields, header=header))
-  except AttributeError:
-    print "Provided %r object does not have a write() method." % document
-    raise
+  for desc in descriptors:
+    if not isinstance(desc, stem.descriptor.Descriptor):
+      raise ValueError("Unable to export a descriptor CSV since %s is not a descriptor." % type(desc).__name__)
+    elif descriptor_type != type(desc):
+      raise ValueError("To export a descriptor CSV all of the descriptors must be of the same type. First descriptor was a %s but we later got a %s." % (descriptor_type_label, type(desc)))
+    
+    writer.writerow(vars(desc))
+
diff --git a/test/unit/descriptor/__init__.py b/test/unit/descriptor/__init__.py
index cf4fceb..1837a07 100644
--- a/test/unit/descriptor/__init__.py
+++ b/test/unit/descriptor/__init__.py
@@ -2,5 +2,5 @@
 Unit tests for stem.descriptor.
 """
 
-__all__ = ["reader", "extrainfo_descriptor", "server_descriptor"]
+__all__ = ["export", "reader", "extrainfo_descriptor", "server_descriptor"]
 
diff --git a/test/unit/descriptor/export.py b/test/unit/descriptor/export.py
index 12d5ed3..6ef52cb 100644
--- a/test/unit/descriptor/export.py
+++ b/test/unit/descriptor/export.py
@@ -1,146 +1,93 @@
 """
-Unit testing code for the stem.descriptor.export module
+Unit tests for stem.descriptor.export.
 """
-import unittest
-import stem.descriptor.export as export
-import test.mocking as mocking
-
-from collections import namedtuple
-import stem.descriptor as desc
-import cStringIO
-
-# Create descriptor objects.
-DESCR_DICT = {'average_bandwidth': 5242880, 'onion_key': 'RSA PUB = JAIK', 'address': '79.139.135.90', '_digest': None, 'exit_policy': ['reject *:*'], 'fingerprint': 'AAAAAAAAAAAAAAAAAAA'}
-DESCR2_DICT = {'average_bandwidth': 5555555, 'onion_key': 'RSA PUB = GOUS', 'address': '100.1.1.1', '_digest': None, 'exit_policy': ['reject *:*'], 'fingerprint': 'BBBBBBBBBBBBBBBBBBBBB'}
-DESCR3_DICT = {'bandwidth':12345,'average_bandwidth': 6666666, 'address': '101.0.0.1','extra_info':None}
-RAW = 'router TORsinn3r 46.17.96.217 9001 0 0 platform Tor 0.2.3.19-rc on Linux bandwidth 4 5 6 ...andonandon'
-
-descriptor = desc.Descriptor(RAW)
-descriptor.__dict__.update(DESCR_DICT)
-
-descriptor2 = desc.Descriptor(RAW)
-descriptor2.__dict__.update(DESCR2_DICT)
-
-descriptor3 = desc.server_descriptor.RelayDescriptor(RAW, validate=False)
-descriptor3.__dict__.update(DESCR3_DICT)
-
-# Expected return csv strings.
-SINGLE_ALL = '5242880,RSA PUB = JAIK,AAAAAAAAAAAAAAAAAAA,,router TORsinn3r 46.17.96.217 9001 0 0 platform Tor 0.2.3.19-rc on Linux bandwidth 4 5 6 ...andonandon,[\'reject *:*\'],79.139.135.90,'
-SINGLE_PART = '79.139.135.90,[\'reject *:*\']'
-SINGLE_PART2 = '5242880,,router TORsinn3r 46.17.96.217 9001 0 0 platform Tor 0.2.3.19-rc on Linux bandwidth 4 5 6 ...andonandon,[\'reject *:*\'],79.139.135.90,'
-SINGLE_PART3 = '79.139.135.90,AAAAAAAAAAAAAAAAAAA'
 
-DOUBLE_ALL = '5242880,RSA PUB = JAIK,AAAAAAAAAAAAAAAAAAA,,router TORsinn3r 46.17.96.217 9001 0 0 platform Tor 0.2.3.19-rc on Linux bandwidth 4 5 6 ...andonandon,[\'reject *:*\'],79.139.135.90,\r\n5555555,RSA PUB = GOUS,BBBBBBBBBBBBBBBBBBBBB,,router TORsinn3r 46.17.96.217 9001 0 0 platform Tor 0.2.3.19-rc on Linux bandwidth 4 5 6 ...andonandon,[\'reject *:*\'],100.1.1.1,\r\n'
-DOUBLE_PART = '79.139.135.90,[\'reject *:*\']\r\n100.1.1.1,[\'reject *:*\']\r\n'
-DOUBLE_PART2 = '5242880,,router TORsinn3r 46.17.96.217 9001 0 0 platform Tor 0.2.3.19-rc on Linux bandwidth 4 5 6 ...andonandon,[\'reject *:*\'],79.139.135.90,\r\n5555555,,router TORsinn3r 46.17.96.217 9001 0 0 platform Tor 0.2.3.19-rc on Linux bandwidth 4 5 6 ...andonandon,[\'reject *:*\'],100.1.1.1,\r\n'
+import StringIO
+import unittest
 
-SINGLE_ALL_HEAD = 'average_bandwidth,onion_key,fingerprint,_digest,_raw_contents,exit_policy,address,_path\r\n' + SINGLE_ALL + '\r\n'
-SINGLE_PART3_HEAD = 'address,fingerprint\r\n' + SINGLE_PART3
-DOUBLE_ALL_HEAD = 'average_bandwidth,onion_key,fingerprint,_digest,_raw_contents,exit_policy,address,_path\r\n' + DOUBLE_ALL
-DOUBLE_PART_HEAD = 'address,exit_policy\r\n' + DOUBLE_PART
-DOUBLE_PART2_HEAD = 'average_bandwidth,_digest,_raw_contents,exit_policy,address,_path\r\n' + DOUBLE_PART2
+from stem.descriptor.export import export_csv, export_csv_file
+from stem.descriptor.server_descriptor import RelayDescriptor, BridgeDescriptor
+from test.mocking import get_server_descriptor
 
 class TestExport(unittest.TestCase):
-  def tearDown(self):
-    mocking.revert_mocking()
-  
-  def test_export_csv(self):
+  def test_minimal_descriptor(self):
     """
-    Tests the export_csv function which takes a single descriptor object.
+    Exports a single minimal tor server descriptor.
     """
-    Fields = namedtuple('Fields', 'include_fields exclude_fields')
     
-    # Descriptors must be an iterable
-    # named tuples replace dictionaries as dict keys must immutable.
-    ret_vals = {((descriptor,), Fields(include_fields=(), exclude_fields=())):SINGLE_ALL,
-                ((descriptor,), Fields(include_fields=('address', 'exit_policy'),
-                  exclude_fields=())):SINGLE_PART,
-                ((descriptor,), Fields(include_fields=(),
-                  exclude_fields=('onion_key', 'fingerprint'))):SINGLE_PART2,
-                ((descriptor,), Fields(include_fields=('address', 'exit_policy', 'fingerprint'),
-                  exclude_fields=('fingerprint',))):SINGLE_PART,
-                ((descriptor,), Fields(include_fields=('address', 'fingerprint'),
-                  exclude_fields=('_digest',))):SINGLE_PART3
-                }
-    mocking.mock(export.export_csvs, mocking.return_for_args(ret_vals, kwarg_type=Fields))
+    desc = RelayDescriptor(get_server_descriptor())
     
-    # Used tuples for incl/exclude_fields for parameter matching with ret_vals dict.
-    self.assertEqual(SINGLE_ALL, export.export_csv(descriptor))
-    self.assertEqual(SINGLE_PART, export.export_csv(descriptor,
-      include_fields=('address', 'exit_policy')))
-    self.assertEqual(SINGLE_PART2, export.export_csv(descriptor,
-      exclude_fields=('onion_key', 'fingerprint')))
-    self.assertEqual(SINGLE_PART, export.export_csv(descriptor,
-      include_fields=('address', 'exit_policy', 'fingerprint'), exclude_fields=('fingerprint',)))
-    self.assertEqual(SINGLE_PART3, export.export_csv(descriptor,
-      include_fields=('address', 'fingerprint'), exclude_fields=('_digest',)))
-      
+    desc_csv = export_csv(desc, included_fields = ('nickname', 'address', 'published'), header = False)
+    expected = "caerSidi,71.35.133.197,2012-03-01 17:15:27\n"
+    self.assertEquals(expected, desc_csv)
+    
+    desc_csv = export_csv(desc, included_fields = ('nickname', 'address', 'published'), header = True)
+    expected = "nickname,address,published\n" + expected
+    self.assertEquals(expected, desc_csv)
   
-  def test_export_csvs(self):
+  def test_multiple_descriptors(self):
     """
-    Test the export_csvs function which takes a list of descriptor objects.
+    Exports multiple descriptors, making sure that we get them back in the same
+    order.
     """
     
-    # Single descriptor
-    self.assertEquals(SINGLE_ALL + "\r\n", export.export_csvs([descriptor]))
-    self.assertEqual(SINGLE_PART + "\r\n", export.export_csvs([descriptor],
-      include_fields=['address', 'exit_policy']))
-    self.assertEqual(SINGLE_PART2 + "\r\n", export.export_csvs([descriptor],
-      exclude_fields=['onion_key', 'fingerprint']))
-    self.assertEqual(SINGLE_PART + "\r\n", export.export_csvs([descriptor],
-      include_fields=['address', 'exit_policy', 'fingerprint'], exclude_fields=['fingerprint']))
-    
-    # Multiple descriptors
-    self.assertEqual(DOUBLE_ALL, export.export_csvs([descriptor, descriptor2]))
-    self.assertEqual(DOUBLE_PART, export.export_csvs([descriptor, descriptor2],
-      include_fields=['address', 'exit_policy']))
-    self.assertEqual(DOUBLE_PART2, export.export_csvs([descriptor, descriptor2],
-      exclude_fields=['onion_key', 'fingerprint']))
-    self.assertEqual(DOUBLE_PART, export.export_csvs([descriptor, descriptor2],
-      include_fields=['address', 'exit_policy', 'fingerprint'], exclude_fields=['fingerprint']))
+    nicknames = ('relay1', 'relay3', 'relay2', 'caerSidi', 'zeus')
+    descriptors = []
     
-    # Tests with headers
-    self.assertEqual(SINGLE_ALL_HEAD, export.export_csvs([descriptor], header=True))
-    self.assertEqual(SINGLE_PART3_HEAD + "\r\n", export.export_csvs([descriptor],
-      include_fields=['address', 'fingerprint'], exclude_fields=['_digest'], header=True))
-    self.assertEqual(DOUBLE_ALL_HEAD, export.export_csvs([descriptor, descriptor2], header=True))
-    self.assertEqual(DOUBLE_PART_HEAD, export.export_csvs([descriptor, descriptor2],
-      include_fields=['address', 'exit_policy'], header=True))
-    self.assertEqual(DOUBLE_PART2_HEAD, export.export_csvs([descriptor, descriptor2],
-      exclude_fields=['onion_key', 'fingerprint'], header=True))
+    for nickname in nicknames:
+      router_line = "%s 71.35.133.197 9001 0 0" % nickname
+      descriptors.append(RelayDescriptor(get_server_descriptor({'router': router_line})))
     
-    # Other tests
-    self.assertRaises(ValueError, export.export_csvs, [descriptor, descriptor3])
-    self.assertRaises(ValueError, export.export_csvs, [descriptor, descriptor3],
-      include_fields=['onion_key', 'address', 'fingerprint'], exclude_fields=['onion_key'])
-      
+    expected = "\n".join(nicknames) + "\n"
+    self.assertEqual(expected, export_csv(descriptors, included_fields = ('nickname',), header = False))
   
-  
-  def test_export_csv_file(self):
+  def test_file_output(self):
     """
-    Tests the export_csv_file function.
+    Basic test for the export_csv_file() function, checking that it provides
+    the same output as export_csv().
     """
-    sample_csv_string = 'This, is, a, sample, string.\r\nline, two.\r\n'
-    sample_csv_string2 = 'Another, sample\r\n,, second,\r\n'
-    sample_file = cStringIO.StringIO()
     
-    # Must use named tuples again for ret_vals dictionary.
-    Fields = namedtuple('Fields', 'include_fields exclude_fields header')
+    desc = RelayDescriptor(get_server_descriptor())
+    desc_csv = export_csv(desc)
     
-    ret_vals = {((descriptor,), Fields(include_fields=(), exclude_fields=(), header=True)):sample_csv_string,
-      ((descriptor,), Fields(include_fields=('address', 'onion_key'), exclude_fields=('address',), header=False)):sample_csv_string2}
-    # TODO Ask Danner: mock it once then do both tests (not including assertRaises), or do separate mockings.
-    #    the latter requires that we still include empty incl_fields and excl_fields parameters instead of
-    #    letting them default to [].  Same for header.
-    mocking.mock(export.export_csvs, mocking.return_for_args(ret_vals, kwarg_type=Fields))
+    csv_buffer = StringIO.StringIO()
+    export_csv_file(csv_buffer, desc)
     
-    export.export_csv_file((descriptor,), sample_file)
-    self.assertEqual(sample_csv_string, sample_file.getvalue())
+    self.assertEqual(desc_csv, csv_buffer.getvalue())
+  
+  def test_excludes_private_attr(self):
+    """
+    Checks that the default attributes for our csv output doesn't include private fields.
+    """
     
-    sample_file = cStringIO.StringIO()
+    desc = RelayDescriptor(get_server_descriptor())
+    desc_csv = export_csv(desc)
     
-    export.export_csv_file((descriptor,), sample_file, include_fields=('address', 'onion_key'), exclude_fields=('address',), header=False)
-    self.assertEqual(sample_csv_string2, sample_file.getvalue())
+    self.assertTrue(',signature' in desc_csv)
+    self.assertFalse(',_digest' in desc_csv)
+    self.assertFalse(',_annotation_lines' in desc_csv)
+  
+  def test_empty_input(self):
+    """
+    Exercises when we don't provide any descriptors.
+    """
     
-    # Make sure error is Raised when necessary.
-    self.assertRaises(AttributeError, export.export_csv_file, (descriptor,), sample_csv_string)
+    self.assertEquals("", export_csv([]))
+  
+  def test_invalid_attributes(self):
+    """
+    Attempts to make a csv with attributes that don't exist.
+    """
+    
+    desc = RelayDescriptor(get_server_descriptor())
+    self.assertRaises(ValueError, export_csv, desc, ('nickname', 'blarg!'))
+  
+  def test_multiple_descriptor_types(self):
+    """
+    Attempts to make a csv with multiple descriptor types.
+    """
+    
+    server_desc = RelayDescriptor(get_server_descriptor())
+    bridge_desc = BridgeDescriptor(get_server_descriptor(is_bridge = True))
+    self.assertRaises(ValueError, export_csv, (server_desc, bridge_desc))
+





More information about the tor-commits mailing list