commit 7022021c4207a0066cb93261629f2ba020d307f6 Author: Damian Johnson atagar@torproject.org Date: Sat Aug 4 12:13:47 2012 -0700
Revising descriptor exporter and associated tests
Several changes for the prior csv export functionality, especially its tests. Most notably this includes...
* rewrite of the tests to be simpler and break down by test cases * newline breaks rather than windows style '\r\n' * excluding private attributes (attributes that start with an underscore) * writing directly to a file in export_csv_file() rather than buffering to a string first * general refactoring to hopefully improve readability --- run_tests.py | 6 +- stem/descriptor/__init__.py | 2 +- stem/descriptor/export.py | 148 ++++++++++++++---------------- test/unit/descriptor/__init__.py | 2 +- test/unit/descriptor/export.py | 189 ++++++++++++++------------------------ 5 files changed, 142 insertions(+), 205 deletions(-)
diff --git a/run_tests.py b/run_tests.py index bed8e50..4ad551a 100755 --- a/run_tests.py +++ b/run_tests.py @@ -16,6 +16,7 @@ import test.output import test.runner import test.check_whitespace import test.unit.connection.authentication +import test.unit.descriptor.export import test.unit.descriptor.reader import test.unit.descriptor.server_descriptor import test.unit.descriptor.extrainfo_descriptor @@ -50,8 +51,6 @@ import test.integ.util.proc import test.integ.util.system import test.integ.process import test.integ.version -#TODO move this by other descriptor tests -import test.unit.descriptor.export
import stem.prereq import stem.util.conf @@ -109,6 +108,7 @@ UNIT_TESTS = ( test.unit.util.proc.TestProc, test.unit.util.system.TestSystem, test.unit.util.tor_tools.TestTorTools, + test.unit.descriptor.export.TestExport, test.unit.descriptor.reader.TestDescriptorReader, test.unit.descriptor.server_descriptor.TestServerDescriptor, test.unit.descriptor.extrainfo_descriptor.TestExtraInfoDescriptor, @@ -123,9 +123,7 @@ UNIT_TESTS = ( test.unit.response.protocolinfo.TestProtocolInfoResponse, test.unit.response.authchallenge.TestAuthChallengeResponse, test.unit.connection.authentication.TestAuthenticate, - test.unit.descriptor.export.TestExport, ) -# TODO move export test to proper location
INTEG_TESTS = ( test.integ.util.conf.TestConf, diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py index bb6f874..45cd016 100644 --- a/stem/descriptor/__init__.py +++ b/stem/descriptor/__init__.py @@ -13,7 +13,7 @@ Package for parsing and processing descriptor data. """
__all__ = [ - "descriptor", + "export", "reader", "extrainfo_descriptor", "server_descriptor", diff --git a/stem/descriptor/export.py b/stem/descriptor/export.py index 89a5644..16a148d 100644 --- a/stem/descriptor/export.py +++ b/stem/descriptor/export.py @@ -1,97 +1,89 @@ -import os, csv, sets, cStringIO +""" +Utilities for exporting descriptors to other formats. +"""
+import csv +import cStringIO
-def export_csv(descriptor, include_fields=(), exclude_fields=()): +import stem.descriptor + +class _ExportDialect(csv.excel): + lineterminator = '\n' + +def export_csv(descriptors, included_fields = (), excluded_fields = (), header = True): """ - Takes a single descriptor object, puts it in a list, and passes it to - descriptors_csv_exp to build a csv. + Provides a newline separated CSV for one or more descriptors. If simply + provided with descriptors then the CSV contains all of its attributes, + labelled with a header row. Either 'included_fields' or 'excluded_fields' can + be used for more granular control over its attributes and the order.
- :param object descriptor: single descriptor whose attributes will be returned as a string. - :param list include_fields: list of attribute fields to include in the csv string. - :param list exclude_fields: list of attribute fields to exclude from csv string. + :param Descriptor,list descriptors: descriptor or list of descriptors to be exported + :param list included_fields: attributes to include in the csv + :param list excluded_fields: attributes to exclude from the csv + :param bool header: if True then the first line will be a comma separated list of the attribute names
- :returns: single csv line as a string with one descriptor attribute per cell. + :returns: str of the CSV for the descriptors, one per line + :raises: ValueError if descriptors contain more than one descriptor type """
- descr = (descriptor,) - return export_csvs(descr, include_fields=include_fields, exclude_fields=exclude_fields) + output_buffer = cStringIO.StringIO() + export_csv_file(output_buffer, descriptors, included_fields, excluded_fields, header) + return output_buffer.getvalue()
- -def export_csvs(descriptors, include_fields=[], exclude_fields=[], header=False): +def export_csv_file(output_file, descriptors, included_fields = (), excluded_fields = (), header = True): """ - Takes an iterable of descriptors, returns a string with one line per descriptor - where each line is a comma separated list of descriptor attributes. + Similar to :func:`stem.descriptor.export.export_csv`, except that the CSV is + written directly to a file.
- :param list descrs: List of descriptor objects whose attributes will be written. - :param list include_fields: list of attribute fields to include in the csv string. - :param list exclude_fields: list of attribute fields to exclude from csv string. - :param bool header: whether or not a header is requested. + :param file output_file: file to be written to + :param Descriptor,list descriptors: descriptor or list of descriptors to be exported + :param list included_fields: attributes to include in the csv + :param list excluded_fields: attributes to exclude from the csv + :param bool header: if True then the first line will be a comma separated list of the attribute names
- :returns: csv string with one descriptor per line and one attribute per cell. - :raises: ValueError if more than one descriptor type (e.g. server_descriptor, - extrainfo_descriptor) is provided in the iterable. + :returns: str of the CSV for the descriptors, one per line + :raises: ValueError if descriptors contain more than one descriptor type """
- # Need a file object to write to with DictWriter. - temp_file = cStringIO.StringIO() + if isinstance(descriptors, stem.descriptor.Descriptor): + descriptors = (descriptors,)
- first = True + if not descriptors: + return
- for desc in descriptors: - #umport sys - attr = vars(desc) - - # Defining incl_fields and the dwriter object requires having access - # to a descriptor object. - if first: - # All descriptor objects should be of the same type - # (i.e. server_descriptor.RelayDesrciptor) - desc_type = type(desc) - - # define incl_fields, 4 cases where final case is incl_fields already - # defined and excl_fields left blank, so no action is necessary. - if not include_fields and exclude_fields: - incl = set(attr.keys()) - include_fields = list(incl.difference(exclude_fields)) - - elif not include_fields and not exclude_fields: - include_fields = attr.keys() - - elif include_fields and exclude_fields: - incl = set(include_fields) - include_fields = list(incl.difference(exclude_fields)) - - dwriter = csv.DictWriter(temp_file, include_fields, extrasaction='ignore') - - if header: - dwriter.writeheader() - first = False - - if desc_type == type(desc): - dwriter.writerow(attr) - else: - raise ValueError('More than one descriptor type provided. Started with a %s, and just got a %s' % (desc_type, type(desc))) + descriptor_type = type(descriptors[0]) + descriptor_type_label = descriptor_type.__name__ + included_fields = list(included_fields)
- return temp_file.getvalue() - # cStringIO files are closed automatically when the current scope is exited. - -def export_csv_file(descriptors, document, include_fields=(), exclude_fields=(), header=True): - """ - Writes descriptor attributes to a csv file on disk. + # If the user didn't specify the fields to include then export everything, + # ordered alphabetically. If they did specify fields then make sure that + # they exist.
- Calls get_csv_lines with the given argument, then writes the returned string - to a file location specified by document_location. - Precondition that param document has a 'write' attribute. + desc_attr = sorted(vars(descriptors[0]).keys())
- :param list descriptors: descriptor objects with attributes to export as csv file. - :param object document: File object to be written to. - :param bool header: defaults to true, determines if document will have a header row. - :param list include_fields: list of attribute fields to include in the csv line. - :param list exclude_fields: list of attribute fields to exclude from csv line. - """ + if included_fields: + for field in included_fields: + if not field in desc_attr: + raise ValueError("%s does not have a '%s' attribute, valid fields are: %s" % (descriptor_type_label, field, ", ".join(desc_attr))) + else: + included_fields = [attr for attr in desc_attr if not attr.startswith('_')] + + for field in excluded_fields: + try: + included_fields.remove(field) + except ValueError: + pass + + writer = csv.DictWriter(output_file, included_fields, dialect = _ExportDialect(), extrasaction='ignore') + + if header: + writer.writeheader()
- try: - document.write(export_csvs(descriptors, include_fields=include_fields, exclude_fields=exclude_fields, header=header)) - except AttributeError: - print "Provided %r object does not have a write() method." % document - raise + for desc in descriptors: + if not isinstance(desc, stem.descriptor.Descriptor): + raise ValueError("Unable to export a descriptor CSV since %s is not a descriptor." % type(desc).__name__) + elif descriptor_type != type(desc): + raise ValueError("To export a descriptor CSV all of the descriptors must be of the same type. First descriptor was a %s but we later got a %s." % (descriptor_type_label, type(desc))) + + writer.writerow(vars(desc)) + diff --git a/test/unit/descriptor/__init__.py b/test/unit/descriptor/__init__.py index cf4fceb..1837a07 100644 --- a/test/unit/descriptor/__init__.py +++ b/test/unit/descriptor/__init__.py @@ -2,5 +2,5 @@ Unit tests for stem.descriptor. """
-__all__ = ["reader", "extrainfo_descriptor", "server_descriptor"] +__all__ = ["export", "reader", "extrainfo_descriptor", "server_descriptor"]
diff --git a/test/unit/descriptor/export.py b/test/unit/descriptor/export.py index 12d5ed3..6ef52cb 100644 --- a/test/unit/descriptor/export.py +++ b/test/unit/descriptor/export.py @@ -1,146 +1,93 @@ """ -Unit testing code for the stem.descriptor.export module +Unit tests for stem.descriptor.export. """ -import unittest -import stem.descriptor.export as export -import test.mocking as mocking - -from collections import namedtuple -import stem.descriptor as desc -import cStringIO - -# Create descriptor objects. -DESCR_DICT = {'average_bandwidth': 5242880, 'onion_key': 'RSA PUB = JAIK', 'address': '79.139.135.90', '_digest': None, 'exit_policy': ['reject *:*'], 'fingerprint': 'AAAAAAAAAAAAAAAAAAA'} -DESCR2_DICT = {'average_bandwidth': 5555555, 'onion_key': 'RSA PUB = GOUS', 'address': '100.1.1.1', '_digest': None, 'exit_policy': ['reject *:*'], 'fingerprint': 'BBBBBBBBBBBBBBBBBBBBB'} -DESCR3_DICT = {'bandwidth':12345,'average_bandwidth': 6666666, 'address': '101.0.0.1','extra_info':None} -RAW = 'router TORsinn3r 46.17.96.217 9001 0 0 platform Tor 0.2.3.19-rc on Linux bandwidth 4 5 6 ...andonandon' - -descriptor = desc.Descriptor(RAW) -descriptor.__dict__.update(DESCR_DICT) - -descriptor2 = desc.Descriptor(RAW) -descriptor2.__dict__.update(DESCR2_DICT) - -descriptor3 = desc.server_descriptor.RelayDescriptor(RAW, validate=False) -descriptor3.__dict__.update(DESCR3_DICT) - -# Expected return csv strings. -SINGLE_ALL = '5242880,RSA PUB = JAIK,AAAAAAAAAAAAAAAAAAA,,router TORsinn3r 46.17.96.217 9001 0 0 platform Tor 0.2.3.19-rc on Linux bandwidth 4 5 6 ...andonandon,['reject *:*'],79.139.135.90,' -SINGLE_PART = '79.139.135.90,['reject *:*']' -SINGLE_PART2 = '5242880,,router TORsinn3r 46.17.96.217 9001 0 0 platform Tor 0.2.3.19-rc on Linux bandwidth 4 5 6 ...andonandon,['reject *:*'],79.139.135.90,' -SINGLE_PART3 = '79.139.135.90,AAAAAAAAAAAAAAAAAAA'
-DOUBLE_ALL = '5242880,RSA PUB = JAIK,AAAAAAAAAAAAAAAAAAA,,router TORsinn3r 46.17.96.217 9001 0 0 platform Tor 0.2.3.19-rc on Linux bandwidth 4 5 6 ...andonandon,['reject *:*'],79.139.135.90,\r\n5555555,RSA PUB = GOUS,BBBBBBBBBBBBBBBBBBBBB,,router TORsinn3r 46.17.96.217 9001 0 0 platform Tor 0.2.3.19-rc on Linux bandwidth 4 5 6 ...andonandon,['reject *:*'],100.1.1.1,\r\n' -DOUBLE_PART = '79.139.135.90,['reject *:*']\r\n100.1.1.1,['reject *:*']\r\n' -DOUBLE_PART2 = '5242880,,router TORsinn3r 46.17.96.217 9001 0 0 platform Tor 0.2.3.19-rc on Linux bandwidth 4 5 6 ...andonandon,['reject *:*'],79.139.135.90,\r\n5555555,,router TORsinn3r 46.17.96.217 9001 0 0 platform Tor 0.2.3.19-rc on Linux bandwidth 4 5 6 ...andonandon,['reject *:*'],100.1.1.1,\r\n' +import StringIO +import unittest
-SINGLE_ALL_HEAD = 'average_bandwidth,onion_key,fingerprint,_digest,_raw_contents,exit_policy,address,_path\r\n' + SINGLE_ALL + '\r\n' -SINGLE_PART3_HEAD = 'address,fingerprint\r\n' + SINGLE_PART3 -DOUBLE_ALL_HEAD = 'average_bandwidth,onion_key,fingerprint,_digest,_raw_contents,exit_policy,address,_path\r\n' + DOUBLE_ALL -DOUBLE_PART_HEAD = 'address,exit_policy\r\n' + DOUBLE_PART -DOUBLE_PART2_HEAD = 'average_bandwidth,_digest,_raw_contents,exit_policy,address,_path\r\n' + DOUBLE_PART2 +from stem.descriptor.export import export_csv, export_csv_file +from stem.descriptor.server_descriptor import RelayDescriptor, BridgeDescriptor +from test.mocking import get_server_descriptor
class TestExport(unittest.TestCase): - def tearDown(self): - mocking.revert_mocking() - - def test_export_csv(self): + def test_minimal_descriptor(self): """ - Tests the export_csv function which takes a single descriptor object. + Exports a single minimal tor server descriptor. """ - Fields = namedtuple('Fields', 'include_fields exclude_fields')
- # Descriptors must be an iterable - # named tuples replace dictionaries as dict keys must immutable. - ret_vals = {((descriptor,), Fields(include_fields=(), exclude_fields=())):SINGLE_ALL, - ((descriptor,), Fields(include_fields=('address', 'exit_policy'), - exclude_fields=())):SINGLE_PART, - ((descriptor,), Fields(include_fields=(), - exclude_fields=('onion_key', 'fingerprint'))):SINGLE_PART2, - ((descriptor,), Fields(include_fields=('address', 'exit_policy', 'fingerprint'), - exclude_fields=('fingerprint',))):SINGLE_PART, - ((descriptor,), Fields(include_fields=('address', 'fingerprint'), - exclude_fields=('_digest',))):SINGLE_PART3 - } - mocking.mock(export.export_csvs, mocking.return_for_args(ret_vals, kwarg_type=Fields)) + desc = RelayDescriptor(get_server_descriptor())
- # Used tuples for incl/exclude_fields for parameter matching with ret_vals dict. - self.assertEqual(SINGLE_ALL, export.export_csv(descriptor)) - self.assertEqual(SINGLE_PART, export.export_csv(descriptor, - include_fields=('address', 'exit_policy'))) - self.assertEqual(SINGLE_PART2, export.export_csv(descriptor, - exclude_fields=('onion_key', 'fingerprint'))) - self.assertEqual(SINGLE_PART, export.export_csv(descriptor, - include_fields=('address', 'exit_policy', 'fingerprint'), exclude_fields=('fingerprint',))) - self.assertEqual(SINGLE_PART3, export.export_csv(descriptor, - include_fields=('address', 'fingerprint'), exclude_fields=('_digest',))) - + desc_csv = export_csv(desc, included_fields = ('nickname', 'address', 'published'), header = False) + expected = "caerSidi,71.35.133.197,2012-03-01 17:15:27\n" + self.assertEquals(expected, desc_csv) + + desc_csv = export_csv(desc, included_fields = ('nickname', 'address', 'published'), header = True) + expected = "nickname,address,published\n" + expected + self.assertEquals(expected, desc_csv)
- def test_export_csvs(self): + def test_multiple_descriptors(self): """ - Test the export_csvs function which takes a list of descriptor objects. + Exports multiple descriptors, making sure that we get them back in the same + order. """
- # Single descriptor - self.assertEquals(SINGLE_ALL + "\r\n", export.export_csvs([descriptor])) - self.assertEqual(SINGLE_PART + "\r\n", export.export_csvs([descriptor], - include_fields=['address', 'exit_policy'])) - self.assertEqual(SINGLE_PART2 + "\r\n", export.export_csvs([descriptor], - exclude_fields=['onion_key', 'fingerprint'])) - self.assertEqual(SINGLE_PART + "\r\n", export.export_csvs([descriptor], - include_fields=['address', 'exit_policy', 'fingerprint'], exclude_fields=['fingerprint'])) - - # Multiple descriptors - self.assertEqual(DOUBLE_ALL, export.export_csvs([descriptor, descriptor2])) - self.assertEqual(DOUBLE_PART, export.export_csvs([descriptor, descriptor2], - include_fields=['address', 'exit_policy'])) - self.assertEqual(DOUBLE_PART2, export.export_csvs([descriptor, descriptor2], - exclude_fields=['onion_key', 'fingerprint'])) - self.assertEqual(DOUBLE_PART, export.export_csvs([descriptor, descriptor2], - include_fields=['address', 'exit_policy', 'fingerprint'], exclude_fields=['fingerprint'])) + nicknames = ('relay1', 'relay3', 'relay2', 'caerSidi', 'zeus') + descriptors = []
- # Tests with headers - self.assertEqual(SINGLE_ALL_HEAD, export.export_csvs([descriptor], header=True)) - self.assertEqual(SINGLE_PART3_HEAD + "\r\n", export.export_csvs([descriptor], - include_fields=['address', 'fingerprint'], exclude_fields=['_digest'], header=True)) - self.assertEqual(DOUBLE_ALL_HEAD, export.export_csvs([descriptor, descriptor2], header=True)) - self.assertEqual(DOUBLE_PART_HEAD, export.export_csvs([descriptor, descriptor2], - include_fields=['address', 'exit_policy'], header=True)) - self.assertEqual(DOUBLE_PART2_HEAD, export.export_csvs([descriptor, descriptor2], - exclude_fields=['onion_key', 'fingerprint'], header=True)) + for nickname in nicknames: + router_line = "%s 71.35.133.197 9001 0 0" % nickname + descriptors.append(RelayDescriptor(get_server_descriptor({'router': router_line})))
- # Other tests - self.assertRaises(ValueError, export.export_csvs, [descriptor, descriptor3]) - self.assertRaises(ValueError, export.export_csvs, [descriptor, descriptor3], - include_fields=['onion_key', 'address', 'fingerprint'], exclude_fields=['onion_key']) - + expected = "\n".join(nicknames) + "\n" + self.assertEqual(expected, export_csv(descriptors, included_fields = ('nickname',), header = False))
- - def test_export_csv_file(self): + def test_file_output(self): """ - Tests the export_csv_file function. + Basic test for the export_csv_file() function, checking that it provides + the same output as export_csv(). """ - sample_csv_string = 'This, is, a, sample, string.\r\nline, two.\r\n' - sample_csv_string2 = 'Another, sample\r\n,, second,\r\n' - sample_file = cStringIO.StringIO()
- # Must use named tuples again for ret_vals dictionary. - Fields = namedtuple('Fields', 'include_fields exclude_fields header') + desc = RelayDescriptor(get_server_descriptor()) + desc_csv = export_csv(desc)
- ret_vals = {((descriptor,), Fields(include_fields=(), exclude_fields=(), header=True)):sample_csv_string, - ((descriptor,), Fields(include_fields=('address', 'onion_key'), exclude_fields=('address',), header=False)):sample_csv_string2} - # TODO Ask Danner: mock it once then do both tests (not including assertRaises), or do separate mockings. - # the latter requires that we still include empty incl_fields and excl_fields parameters instead of - # letting them default to []. Same for header. - mocking.mock(export.export_csvs, mocking.return_for_args(ret_vals, kwarg_type=Fields)) + csv_buffer = StringIO.StringIO() + export_csv_file(csv_buffer, desc)
- export.export_csv_file((descriptor,), sample_file) - self.assertEqual(sample_csv_string, sample_file.getvalue()) + self.assertEqual(desc_csv, csv_buffer.getvalue()) + + def test_excludes_private_attr(self): + """ + Checks that the default attributes for our csv output doesn't include private fields. + """
- sample_file = cStringIO.StringIO() + desc = RelayDescriptor(get_server_descriptor()) + desc_csv = export_csv(desc)
- export.export_csv_file((descriptor,), sample_file, include_fields=('address', 'onion_key'), exclude_fields=('address',), header=False) - self.assertEqual(sample_csv_string2, sample_file.getvalue()) + self.assertTrue(',signature' in desc_csv) + self.assertFalse(',_digest' in desc_csv) + self.assertFalse(',_annotation_lines' in desc_csv) + + def test_empty_input(self): + """ + Exercises when we don't provide any descriptors. + """
- # Make sure error is Raised when necessary. - self.assertRaises(AttributeError, export.export_csv_file, (descriptor,), sample_csv_string) + self.assertEquals("", export_csv([])) + + def test_invalid_attributes(self): + """ + Attempts to make a csv with attributes that don't exist. + """ + + desc = RelayDescriptor(get_server_descriptor()) + self.assertRaises(ValueError, export_csv, desc, ('nickname', 'blarg!')) + + def test_multiple_descriptor_types(self): + """ + Attempts to make a csv with multiple descriptor types. + """ + + server_desc = RelayDescriptor(get_server_descriptor()) + bridge_desc = BridgeDescriptor(get_server_descriptor(is_bridge = True)) + self.assertRaises(ValueError, export_csv, (server_desc, bridge_desc)) +
tor-commits@lists.torproject.org