commit f9a2ef8d0dd60a73c7cfe5a329b20329c4b4218c Author: Damian Johnson atagar@torproject.org Date: Sun Aug 19 11:54:29 2012 -0700
Hybrid approach between iterator and attribute
Ok, changed my mind. At least partly. If the user is using parse_file() then we want to provide them with router entries and a bare document, like I was. However, if they're calling the document constructors then they've already read the complete document into memory so there's little reason not to parse it.
In short, documents now have a 'routers' attribute. That attribute is filled with the parsed routers if we use the constructor directly, and left empty if we used parse_file(). --- stem/descriptor/networkstatus.py | 75 +++++++++++++++++++++++++------- test/integ/descriptor/networkstatus.py | 8 ++-- 2 files changed, 63 insertions(+), 20 deletions(-)
diff --git a/stem/descriptor/networkstatus.py b/stem/descriptor/networkstatus.py index 5054628..a91dcc5 100644 --- a/stem/descriptor/networkstatus.py +++ b/stem/descriptor/networkstatus.py @@ -76,7 +76,9 @@ Flag = stem.util.enum.Enum(
def parse_file(document_file, validate = True, is_microdescriptor = False): """ - Parses a network status document and provides a NetworkStatusDocument object. + Parses a network status and iterates over the RouterDescriptor or + RouterMicrodescriptor in it. The document that these instances reference have + an empty 'rotuers' attribute to allow for limited memory usage.
:param file document_file: file with network status document content :param bool validate: checks the validity of the document's contents if True, skips these checks otherwise @@ -89,29 +91,31 @@ def parse_file(document_file, validate = True, is_microdescriptor = False): * IOError if the file can't be read """
+ header, footer, routers_end = _get_document_content(document_file, validate) + document_data = "".join(header + footer) + if not is_microdescriptor: - document_type, router_type = NetworkStatusDocument, RouterDescriptor + document = NetworkStatusDocument(document_data, validate) + router_type = RouterDescriptor else: - document_type, router_type = MicrodescriptorConsensus, RouterMicrodescriptor - - document, routers_start, routers_end = _get_document(document_file, validate, document_type) - document_file.seek(routers_start) + document = MicrodescriptorConsensus(document_data, validate) + router_type = RouterMicrodescriptor
- while document_file.tell() < routers_end: - desc_content = "".join(_read_until_keywords("r", document_file, ignore_first = True, end_position = routers_end)) - yield router_type(desc_content, document, validate) + for desc in _get_routers(document_file, validate, document, routers_end, router_type): + yield desc
-def _get_document(document_file, validate, document_type): +def _get_document_content(document_file, validate): """ Network status documents consist of three sections: header, router entries, and the footer. This provides back a tuple with the following... - (NetworkStatusDocument, routers_start, routers_end) + (header_lines, footer_lines, routers_end) + + This leaves the document_file at the start of the router entries.
:param file document_file: file with network status document content :param bool validate: checks the validity of the document's contents if True, skips these checks otherwise - :param object document_type: consensus document class to construct
- :returns: tuple with the network status document and range that has the routers + :returns: tuple with the network status document content and ending position of the routers
:raises: * ValueError if the contents is malformed and validate is True @@ -132,9 +136,31 @@ def _get_document(document_file, validate, document_type): routers_end = document_file.tell() footer = document_file.readlines()
- document_data = "".join(header + footer) + document_file.seek(routers_start) + return (header, footer, routers_end) + +def _get_routers(document_file, validate, document, end_position, router_type): + """ + Iterates over the router entries in a given document. The document_file is + expected to be at the start of the router section and the end_position + desigates where that section ends. + + :param file document_file: file with network status document content + :param bool validate: checks the validity of the document's contents if True, skips these checks otherwise + :param object document: document the descriptors originate from + :param int end_position: location in the document_file where the router section ends + :param class router_type: router class to construct
- return (document_type(document_data, validate), routers_start, routers_end) + :returns: iterator over router_type instances + + :raises: + * ValueError if the contents is malformed and validate is True + * IOError if the file can't be read + """ + + while document_file.tell() < end_position: + desc_content = "".join(_read_until_keywords("r", document_file, ignore_first = True, end_position = end_position)) + yield router_type(desc_content, document, validate)
class NetworkStatusDocument(stem.descriptor.Descriptor): """ @@ -142,6 +168,8 @@ class NetworkStatusDocument(stem.descriptor.Descriptor):
This could be a v3 consensus or vote document.
+ :var tuple routers: RouterDescriptor contained in the document + :var bool validated: ***** whether the document is validated :var str network_status_version: ***** a document format version. For v3 documents this is "3" :var str vote_status: ***** status of the vote (is either "vote" or "consensus") @@ -198,7 +226,19 @@ class NetworkStatusDocument(stem.descriptor.Descriptor): self.params = {} self.bandwidth_weights = {}
- self._parse(raw_content) + document_file = StringIO(raw_content) + header, footer, routers_end = _get_document_content(document_file, validate) + + document_content = "".join(header + footer) + self._parse(document_content) + + if document_file.tell() < routers_end: + self.routers = tuple(_get_routers(document_file, validate, self, routers_end, self._get_router_type())) + else: + self.routers = () + + def _get_router_type(self): + return RouterDescriptor
def _validate_network_status_version(self): return self.network_status_version == "3" @@ -586,6 +626,9 @@ class MicrodescriptorConsensus(NetworkStatusDocument): | **~** attribute appears only in consensuses """
+ def _get_router_type(self): + return RouterMicrodescriptor + def _validate_network_status_version(self): return self.network_status_version == "3 microdesc"
diff --git a/test/integ/descriptor/networkstatus.py b/test/integ/descriptor/networkstatus.py index f95270b..d16929d 100644 --- a/test/integ/descriptor/networkstatus.py +++ b/test/integ/descriptor/networkstatus.py @@ -74,8 +74,8 @@ class TestNetworkStatusDocument(unittest.TestCase): descriptor_path = test.integ.descriptor.get_resource("cached-consensus")
descriptor_file = file(descriptor_path) - router1 = next(stem.descriptor.networkstatus.parse_file(descriptor_file)) - desc = router1.document + desc = stem.descriptor.networkstatus.NetworkStatusDocument(descriptor_file.read()) + router1 = desc.routers[0] descriptor_file.close()
self.assertEquals(True, desc.validated) @@ -167,8 +167,8 @@ I/TJmV928na7RLZe2mGHCAW3VQOvV+QkCfj05VZ8CsY= descriptor_path = test.integ.descriptor.get_resource("vote")
descriptor_file = file(descriptor_path) - router1 = next(stem.descriptor.networkstatus.parse_file(descriptor_file)) - desc = router1.document + desc = stem.descriptor.networkstatus.NetworkStatusDocument(descriptor_file.read()) + router1 = desc.routers[0] descriptor_file.close()
self.assertEquals(True, desc.validated)