commit 4819e2fc97f76424a3dead741d327be711f53a8d Author: Damian Johnson atagar@torproject.org Date: Tue Jun 5 21:15:37 2012 -0700
Converting stem.descriptor.* to reStructuredText
Fingers so sore... --- stem/descriptor/__init__.py | 74 ++++----- stem/descriptor/extrainfo_descriptor.py | 275 +++++++++++++++---------------- stem/descriptor/reader.py | 110 ++++++------- stem/descriptor/server_descriptor.py | 212 +++++++++++------------- 4 files changed, 318 insertions(+), 353 deletions(-)
diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py index 1de1dac..37f9ec7 100644 --- a/stem/descriptor/__init__.py +++ b/stem/descriptor/__init__.py @@ -1,11 +1,15 @@ """ Package for parsing and processing descriptor data.
-parse_file - Iterates over the descriptors in a file. -Descriptor - Common parent for all descriptor file types. - |- get_path - location of the descriptor on disk if it came from a file - |- get_unrecognized_lines - unparsed descriptor content - +- __str__ - string that the descriptor was made from +**Module Overview:** + +:: + + parse_file - Iterates over the descriptors in a file. + Descriptor - Common parent for all descriptor file types. + |- get_path - location of the descriptor on disk if it came from a file + |- get_unrecognized_lines - unparsed descriptor content + +- __str__ - string that the descriptor was made from """
__all__ = ["descriptor", "reader", "extrainfo_descriptor", "server_descriptor", "parse_file", "Descriptor"] @@ -23,16 +27,14 @@ def parse_file(path, descriptor_file): """ Provides an iterator for the descriptors within a given file.
- Arguments: - path (str) - absolute path to the file's location on disk - descriptor_file (file) - opened file with the descriptor contents + :param str path: absolute path to the file's location on disk + :param file descriptor_file: opened file with the descriptor contents
- Returns: - iterator for Descriptor instances in the file + :returns: iterator for :class:`stem.descriptor.Descriptor` instances in the file
- Raises: - TypeError if we can't match the contents of the file to a descriptor type - IOError if unable to read from the descriptor_file + :raises: + * TypeError if we can't match the contents of the file to a descriptor type + * IOError if unable to read from the descriptor_file """
import stem.descriptor.server_descriptor @@ -93,8 +95,7 @@ class Descriptor: """ Provides the absolute path that we loaded this descriptor from.
- Returns: - str with the absolute path of the descriptor source + :returns: str with the absolute path of the descriptor source """
return self._path @@ -105,8 +106,7 @@ class Descriptor: not know how to process. This is most common due to new descriptor fields that this library does not yet know how to process. Patches welcome!
- Returns: - list of lines of unrecognized content + :returns: list of lines of unrecognized content """
raise NotImplementedError @@ -122,13 +122,11 @@ def _read_until_keyword(keyword, descriptor_file, inclusive = False): Reads from the descriptor file until we get to the given keyword or reach the end of the file.
- Arguments: - keyword (str) - keyword we want to read until - descriptor_file (file) - file with the descriptor content - inclusive (bool) - includes the line with the keyword if True + :param str keyword: keyword we want to read until + :param file descriptor_file: file with the descriptor content + :param bool inclusive: includes the line with the keyword if True
- Returns: - list with the lines until we find the keyword + :returns: list with the lines until we find the keyword """
content = [] @@ -156,15 +154,11 @@ def _get_pseudo_pgp_block(remaining_contents): Checks if given contents begins with a pseudo-Open-PGP-style block and, if so, pops it off and provides it back to the caller.
- Arguments: - remaining_contents (list) - lines to be checked for a public key block + :param list remaining_contents: lines to be checked for a public key block
- Returns: - str with the armor wrapped contents or None if it doesn't exist + :returns: str with the armor wrapped contents or None if it doesn't exist
- Raises: - ValueError if the contents starts with a key block but it's malformed (for - instance, if it lacks an ending line) + :raises: ValueError if the contents starts with a key block but it's malformed (for instance, if it lacks an ending line) """
if not remaining_contents: @@ -202,19 +196,17 @@ def _get_descriptor_components(raw_contents, validate, extra_keywords): entries because this influences the resulting exit policy, but for everything else in server descriptors the order does not matter.
- Arguments: - raw_contents (str) - descriptor content provided by the relay - validate (bool) - checks the validity of the descriptor's content if - True, skips these checks otherwise - extra_keywords (list) - entity keywords to put into a separate listing with - ordering intact + :param str raw_contents: descriptor content provided by the relay + :param bool validate: checks the validity of the descriptor's content if True, skips these checks otherwise + :param list extra_keywords: entity keywords to put into a separate listing with ordering intact
- Returns: + :returns: tuple with the following attributes... - entries (dict) - keyword => (value, pgp key) entries - first_keyword (str) - keyword of the first line - last_keyword (str) - keyword of the last line - extra_entries (list) - lines containing entries matching extra_keywords + + * **entries (dict)** - keyword => (value, pgp key) entries + * **first_keyword (str)** - keyword of the first line + * **last_keyword (str)** - keyword of the last line + * **extra_entries (list)** - lines containing entries matching extra_keywords """
entries = {} diff --git a/stem/descriptor/extrainfo_descriptor.py b/stem/descriptor/extrainfo_descriptor.py index 1c00d34..ff37064 100644 --- a/stem/descriptor/extrainfo_descriptor.py +++ b/stem/descriptor/extrainfo_descriptor.py @@ -10,33 +10,39 @@ cannot be requested of bridges.
Extra-info descriptors are available from a few sources...
-- if you have 'DownloadExtraInfo 1' in your torrc... - - control port via 'GETINFO extra-info/digest/*' queries - - the 'cached-extrainfo' file in tor's data directory -- tor metrics, at https://metrics.torproject.org/data.html -- directory authorities and mirrors via their DirPort +* if you have 'DownloadExtraInfo 1' in your torrc...
-DirResponses - known statuses for ExtraInfoDescriptor's dir_*_responses - |- OK - network status requests that were answered - |- NOT_ENOUGH_SIGS - network status wasn't signed by enough authorities - |- UNAVAILABLE - requested network status was unavailable - |- NOT_FOUND - requested network status was not found - |- NOT_MODIFIED - network status unmodified since If-Modified-Since time - +- BUSY - directory was busy + * control port via 'GETINFO extra-info/digest/*' queries + * the 'cached-extrainfo' file in tor's data directory
-DirStats - known stats for ExtraInfoDescriptor's dir_*_direct_dl and dir_*_tunneled_dl - |- COMPLETE - requests that completed successfully - |- TIMEOUT - requests that didn't complete within a ten minute timeout - |- RUNNING - requests still in procress when measurement's taken - |- MIN - smallest rate at which a descriptor was downloaded in B/s - |- MAX - largest rate at which a descriptor was downloaded in B/s - |- D1-4 and D6-9 - rate of the slowest x/10 download rates in B/s - |- Q1 and Q3 - rate of the slowest and fastest querter download rates in B/s - +- MD - median download rate in B/s +* tor metrics, at https://metrics.torproject.org/data.html +* directory authorities and mirrors via their DirPort
-parse_file - Iterates over the extra-info descriptors in a file. -ExtraInfoDescriptor - Tor extra-info descriptor. - +- get_unrecognized_lines - lines with unrecognized content +**Module Overview:** + +:: + + DirResponses - known statuses for ExtraInfoDescriptor's dir_*_responses + |- OK - network status requests that were answered + |- NOT_ENOUGH_SIGS - network status wasn't signed by enough authorities + |- UNAVAILABLE - requested network status was unavailable + |- NOT_FOUND - requested network status was not found + |- NOT_MODIFIED - network status unmodified since If-Modified-Since time + +- BUSY - directory was busy + + DirStats - known stats for ExtraInfoDescriptor's dir_*_direct_dl and dir_*_tunneled_dl + |- COMPLETE - requests that completed successfully + |- TIMEOUT - requests that didn't complete within a ten minute timeout + |- RUNNING - requests still in procress when measurement's taken + |- MIN - smallest rate at which a descriptor was downloaded in B/s + |- MAX - largest rate at which a descriptor was downloaded in B/s + |- D1-4 and D6-9 - rate of the slowest x/10 download rates in B/s + |- Q1 and Q3 - rate of the slowest and fastest querter download rates in B/s + +- MD - median download rate in B/s + + parse_file - Iterates over the extra-info descriptors in a file. + ExtraInfoDescriptor - Tor extra-info descriptor. + +- get_unrecognized_lines - lines with unrecognized content """
import re @@ -112,17 +118,14 @@ def parse_file(descriptor_file, validate = True): """ Iterates over the extra-info descriptors in a file.
- Arguments: - descriptor_file (file) - file with descriptor content - validate (bool) - checks the validity of the descriptor's content if - True, skips these checks otherwise + :param file descriptor_file: file with descriptor content + :param bool validate: checks the validity of the descriptor's content if True, skips these checks otherwise
- Returns: - iterator for ExtraInfoDescriptor instances in the file + :returns: iterator for ExtraInfoDescriptor instances in the file
- Raises: - ValueError if the contents is malformed and validate is True - IOError if the file can't be read + :raises: + * ValueError if the contents is malformed and validate is True + * IOError if the file can't be read """
while True: @@ -140,16 +143,12 @@ def _parse_timestamp_and_interval(keyword, content): """ Parses a 'YYYY-MM-DD HH:MM:SS (NSEC s) *' entry.
- Arguments: - keyword (str) - line's keyword - content (str) - line content to be parsed + :param str keyword: line's keyword + :param str content: line content to be parsed
- Returns: - tuple of the form... - (timestamp (datetime), interval (int), remaining content (str)) + :returns: tuple of the form ``(timestamp (datetime), interval (int), remaining content (str))``
- Raises: - ValueError if the content is malformed + :raises: ValueError if the content is malformed """
line = "%s %s" % (keyword, content) @@ -174,92 +173,97 @@ class ExtraInfoDescriptor(stem.descriptor.Descriptor): """ Extra-info descriptor document.
- Attributes: - nickname (str) - relay's nickname (*) - fingerprint (str) - identity key fingerprint (*) - published (datetime) - time in GMT when this descriptor was made (*) - geoip_db_digest (str) - sha1 of geoIP database file - signature (str) - signature for this extrainfo descriptor (*) - - conn_bi_direct_end (datetime) - end of the sampling interval - conn_bi_direct_interval (int) - seconds per interval - conn_bi_direct_below (int) - connections that read/wrote less than 20 KiB - conn_bi_direct_read (int) - connections that read at least 10x more than wrote - conn_bi_direct_write (int) - connections that wrote at least 10x more than read - conn_bi_direct_both (int) - remaining connections - - Bytes read/written for relayed traffic: - read_history_end (datetime) - end of the sampling interval - read_history_interval (int) - seconds per interval - read_history_values (list) - bytes read during each interval - - write_history_end (datetime) - end of the sampling interval - write_history_interval (int) - seconds per interval - write_history_values (list) - bytes written during each interval - - Cell relaying statistics: - cell_stats_end (datetime) - end of the period when stats were gathered - cell_stats_interval (int) - length in seconds of the interval - cell_processed_cells (list) - measurement of processed cells per circuit - cell_queued_cells (list) - measurement of queued cells per circuit - cell_time_in_queue (list) - mean enqueued time in milliseconds for cells - cell_circuits_per_decile (int) - mean number of circuits in a deciles - - Directory Mirror Attributes: - dir_stats_end (datetime) - end of the period when stats were gathered - dir_stats_interval (int) - length in seconds of the interval - dir_v2_ips (dict) - mapping of locales to rounded count of requester ips - dir_v3_ips (dict) - mapping of locales to rounded count of requester ips - dir_v2_share (float) - percent of total directory traffic it expects to serve - dir_v3_share (float) - percent of total directory traffic it expects to serve - dir_v2_requests (dict) - mapping of locales to rounded count of requests - dir_v3_requests (dict) - mapping of locales to rounded count of requests - - dir_v2_responses (dict) - mapping of DirResponses to their rounded count - dir_v3_responses (dict) - mapping of DirResponses to their rounded count - dir_v2_responses_unknown (dict) - mapping of unrecognized statuses to their count - dir_v3_responses_unknown (dict) - mapping of unrecognized statuses to their count - - dir_v2_direct_dl (dict) - mapping of DirStats to measurement over DirPort - dir_v3_direct_dl (dict) - mapping of DirStats to measurement over DirPort - dir_v2_direct_dl_unknown (dict) - mapping of unrecognized stats to their measurement - dir_v3_direct_dl_unknown (dict) - mapping of unrecognized stats to their measurement - - dir_v2_tunneled_dl (dict) - mapping of DirStats to measurement over ORPort - dir_v3_tunneled_dl (dict) - mapping of DirStats to measurement over ORPort - dir_v2_tunneled_dl_unknown (dict) - mapping of unrecognized stats to their measurement - dir_v3_tunneled_dl_unknown (dict) - mapping of unrecognized stats to their measurement - - Bytes read/written for directory mirroring: - dir_read_history_end (datetime) - end of the sampling interval - dir_read_history_interval (int) - seconds per interval - dir_read_history_values (list) - bytes read during each interval - - dir_write_history_end (datetime) - end of the sampling interval - dir_write_history_interval (int) - seconds per interval - dir_write_history_values (list) - bytes read during each interval - - Guard Attributes: - entry_stats_end (datetime) - end of the period when stats were gathered - entry_stats_interval (int) - length in seconds of the interval - entry_ips (dict) - mapping of locales to rounded count of unique user ips - - Exit Attributes: - exit_stats_end (datetime) - end of the period when stats were gathered - exit_stats_interval (int) - length in seconds of the interval - exit_kibibytes_written (dict) - traffic per port (keys are ints or 'other') - exit_kibibytes_read (dict) - traffic per port (keys are ints or 'other') - exit_streams_opened (dict) - streams per port (keys are ints or 'other') - - Bridge Attributes: - bridge_stats_end (datetime) - end of the period when stats were gathered - bridge_stats_interval (int) - length in seconds of the interval - bridge_ips (dict) - mapping of locales to rounded count of unique user ips - geoip_start_time (datetime) - (deprecated) replaced by bridge_stats_end - geoip_client_origins (dict) - (deprecated) replaced by bridge_ips - - (*) attribute is either required when we're parsed with validation or has a - default value, others are left as None if undefined + :var str nickname: ***** relay's nickname + :var str fingerprint: ***** identity key fingerprint + :var datetime published: ***** time in GMT when this descriptor was made + :var str geoip_db_digest: sha1 of geoIP database file + :var str signature: ***** signature for this extrainfo descriptor + + :var datetime conn_bi_direct_end: end of the sampling interval + :var int conn_bi_direct_interval: seconds per interval + :var int conn_bi_direct_below: connections that read/wrote less than 20 KiB + :var int conn_bi_direct_read: connections that read at least 10x more than wrote + :var int conn_bi_direct_write: connections that wrote at least 10x more than read + :var int conn_bi_direct_both: remaining connections + + **Bytes read/written for relayed traffic:** + + :var datetime read_history_end: end of the sampling interval + :var int read_history_interval: seconds per interval + :var list read_history_values: bytes read during each interval + + :var datetime write_history_end: end of the sampling interval + :var int write_history_interval: seconds per interval + :var list write_history_values: bytes written during each interval + + **Cell relaying statistics:** + + :var datetime cell_stats_end: end of the period when stats were gathered + :var int cell_stats_interval: length in seconds of the interval + :var list cell_processed_cells: measurement of processed cells per circuit + :var list cell_queued_cells: measurement of queued cells per circuit + :var list cell_time_in_queue: mean enqueued time in milliseconds for cells + :var int cell_circuits_per_decile: mean number of circuits in a deciles + + **Directory Mirror Attributes:** + + :var datetime dir_stats_end: end of the period when stats were gathered + :var int dir_stats_interval: length in seconds of the interval + :var dict dir_v2_ips: mapping of locales to rounded count of requester ips + :var dict dir_v3_ips: mapping of locales to rounded count of requester ips + :var float dir_v2_share: percent of total directory traffic it expects to serve + :var float dir_v3_share: percent of total directory traffic it expects to serve + :var dict dir_v2_requests: mapping of locales to rounded count of requests + :var dict dir_v3_requests: mapping of locales to rounded count of requests + + :var dict dir_v2_responses: mapping of DirResponses to their rounded count + :var dict dir_v3_responses: mapping of DirResponses to their rounded count + :var dict dir_v2_responses_unknown: mapping of unrecognized statuses to their count + :var dict dir_v3_responses_unknown: mapping of unrecognized statuses to their count + + :var dict dir_v2_direct_dl: mapping of DirStats to measurement over DirPort + :var dict dir_v3_direct_dl: mapping of DirStats to measurement over DirPort + :var dict dir_v2_direct_dl_unknown: mapping of unrecognized stats to their measurement + :var dict dir_v3_direct_dl_unknown: mapping of unrecognized stats to their measurement + + :var dict dir_v2_tunneled_dl: mapping of DirStats to measurement over ORPort + :var dict dir_v3_tunneled_dl: mapping of DirStats to measurement over ORPort + :var dict dir_v2_tunneled_dl_unknown: mapping of unrecognized stats to their measurement + :var dict dir_v3_tunneled_dl_unknown: mapping of unrecognized stats to their measurement + + **Bytes read/written for directory mirroring:** + + :var datetime dir_read_history_end: end of the sampling interval + :var int dir_read_history_interval: seconds per interval + :var list dir_read_history_values: bytes read during each interval + + :var datetime dir_write_history_end: end of the sampling interval + :var int dir_write_history_interval: seconds per interval + :var list dir_write_history_values: bytes read during each interval + + **Guard Attributes:** + + :var datetime entry_stats_end: end of the period when stats were gathered + :var int entry_stats_interval: length in seconds of the interval + :var dict entry_ips: mapping of locales to rounded count of unique user ips + + **Exit Attributes:** + + :var datetime exit_stats_end: end of the period when stats were gathered + :var int exit_stats_interval: length in seconds of the interval + :var dict exit_kibibytes_written: traffic per port (keys are ints or 'other') + :var dict exit_kibibytes_read: traffic per port (keys are ints or 'other') + :var dict exit_streams_opened: streams per port (keys are ints or 'other') + + **Bridge Attributes:** + + :var datetime bridge_stats_end: end of the period when stats were gathered + :var int bridge_stats_interval: length in seconds of the interval + :var dict bridge_ips: mapping of locales to rounded count of unique user ips + :var datetime geoip_start_time: replaced by bridge_stats_end (deprecated) + :var dict geoip_client_origins: replaced by bridge_ips (deprecated) + + ***** attribute is either required when we're parsed with validation or has a default value, others are left as None if undefined """
def __init__(self, raw_contents, validate = True): @@ -272,13 +276,10 @@ class ExtraInfoDescriptor(stem.descriptor.Descriptor): validation can be disables to either improve performance or be accepting of malformed data.
- Arguments: - raw_contents (str) - extra-info content provided by the relay - validate (bool) - checks the validity of the extra-info descriptor if - True, skips these checks otherwise + :param str raw_contents: extra-info content provided by the relay + :param bool validate: checks the validity of the extra-info descriptor if True, skips these checks otherwise
- Raises: - ValueError if the contents is malformed and validate is True + :raises: ValueError if the contents is malformed and validate is True """
stem.descriptor.Descriptor.__init__(self, raw_contents) @@ -385,12 +386,10 @@ class ExtraInfoDescriptor(stem.descriptor.Descriptor): Parses a series of 'keyword => (value, pgp block)' mappings and applies them as attributes.
- Arguments: - entries (dict) - descriptor contents to be applied - validate (bool) - checks the validity of descriptor content if True + :param dict entries: descriptor contents to be applied + :param bool validate: checks the validity of descriptor content if True
- Raises: - ValueError if an error occures in validation + :raises: ValueError if an error occures in validation """
for keyword, values in entries.items(): diff --git a/stem/descriptor/reader.py b/stem/descriptor/reader.py index 1110854..acc9f8f 100644 --- a/stem/descriptor/reader.py +++ b/stem/descriptor/reader.py @@ -3,6 +3,8 @@ Utilities for reading descriptors from local directories and archives. This is mostly done through the DescriptorReader class, which is an iterator for the descriptor data in a series of destinations. For example...
+:: + my_descriptors = [ "/tmp/server-descriptors-2012-03.tar.bz2", "/tmp/archived_descriptors/", @@ -15,7 +17,7 @@ descriptor data in a series of destinations. For example...
This ignores files that cannot be processed due to read errors or unparsable content. To be notified of skipped files you can register a listener with -register_skip_listener(). +:func:`stem.descriptor.reader.DescriptorReader.register_skip_listener`.
The DescriptorReader keeps track of the last modified timestamps for descriptor files that it has read so it can skip unchanged files if ran again. This @@ -24,6 +26,8 @@ DescriptorReaders. For instance, the following prints descriptors as they're changed over the course of a minute, and picks up where it left off if ran again...
+:: + reader = DescriptorReader(["/tmp/descriptor_data"])
try: @@ -43,25 +47,28 @@ again...
save_processed_files("/tmp/used_descriptors", reader.get_processed_files())
+**Module Overview:**
-load_processed_files - Loads a listing of processed files. -save_processed_files - Saves a listing of processed files. - -DescriptorReader - Iterator for descriptor data on the local file system. - |- get_processed_files - provides the listing of files that we've processed - |- set_processed_files - sets our tracking of the files we have processed - |- register_skip_listener - adds a listener that's notified of skipped files - |- start - begins reading descriptor data - |- stop - stops reading descriptor data - |- __enter__ / __exit__ - manages the descriptor reader thread in the context - +- __iter__ - iterates over descriptor data in unread files +::
-FileSkipped - Base exception for a file that was skipped. - |- AlreadyRead - We've already read a file with this last modified timestamp. - |- ParsingFailure - Contents can't be parsed as descriptor data. - |- UnrecognizedType - File extension indicates non-descriptor data. - +- ReadFailed - Wraps an error that was raised while reading the file. - +- FileMissing - File does not exist. + load_processed_files - Loads a listing of processed files. + save_processed_files - Saves a listing of processed files. + + DescriptorReader - Iterator for descriptor data on the local file system. + |- get_processed_files - provides the listing of files that we've processed + |- set_processed_files - sets our tracking of the files we have processed + |- register_skip_listener - adds a listener that's notified of skipped files + |- start - begins reading descriptor data + |- stop - stops reading descriptor data + |- __enter__ / __exit__ - manages the descriptor reader thread in the context + +- __iter__ - iterates over descriptor data in unread files + + FileSkipped - Base exception for a file that was skipped. + |- AlreadyRead - We've already read a file with this last modified timestamp. + |- ParsingFailure - Contents can't be parsed as descriptor data. + |- UnrecognizedType - File extension indicates non-descriptor data. + +- ReadFailed - Wraps an error that was raised while reading the file. + +- FileMissing - File does not exist. """
import os @@ -119,17 +126,16 @@ class FileMissing(ReadFailed): def load_processed_files(path): """ Loads a dictionary of 'path => last modified timestamp' mappings, as - persisted by save_processed_files(), from a file. + persisted by :func:`stem.descriptor.reader.save_processed_files`, from a + file.
- Arguments: - path (str) - location to load the processed files dictionary from + :param str path: location to load the processed files dictionary from
- Returns: - dict of 'path (str) => last modified unix timestamp (int)' mappings + :returns: dict of 'path (str) => last modified unix timestamp (int)' mappings
- Raises: - IOError if unable to read the file - TypeError if unable to parse the file's contents + :raises: + * IOError if unable to read the file + * TypeError if unable to parse the file's contents """
processed_files = {} @@ -160,13 +166,12 @@ def save_processed_files(path, processed_files): provided by the DescriptorReader's get_processed_files() method) so that they can be loaded later and applied to another DescriptorReader.
- Arguments: - path (str) - location to save the processed files dictionary to - processed_files (dict) - 'path => last modified' mappings + :param str path: location to save the processed files dictionary to + :param dict processed_files: 'path => last modified' mappings
- Raises: - IOError if unable to write to the file - TypeError if processed_files is of the wrong type + :raises: + * IOError if unable to write to the file + * TypeError if processed_files is of the wrong type """
# makes the parent directory if it doesn't already exist @@ -196,15 +201,10 @@ class DescriptorReader: handling. If you want that then use the load/save_processed_files functions instead.
- Arguments: - target (str, list) - path or list of paths for files or directories to be - read from - follow_links (bool) - determines if we'll follow symlinks when traversing - directories - buffer_size (int) - descriptors we'll buffer before waiting for some to - be read, this is unbounded if zero - persistence_path (str) - if set we will load and save processed file - listings from this path, errors are ignored + :param str,list target: path or list of paths for files or directories to be read from + :param bool follow_links: determines if we'll follow symlinks when traversing directories + :param int buffer_size: descriptors we'll buffer before waiting for some to be read, this is unbounded if zero + :param str persistence_path: if set we will load and save processed file listings from this path, errors are ignored """
def __init__(self, target, follow_links = False, buffer_size = 100, persistence_path = None): @@ -241,14 +241,14 @@ class DescriptorReader: For each file that we have read descriptor data from this provides a mapping of the form...
- absolute path (str) => last modified unix timestamp (int) + :: + + absolute path (str) => last modified unix timestamp (int)
This includes entries set through the set_processed_files() method. After each run is reset to only the files that were present during that run.
- Returns: - dict with the absolute paths and unix timestamp for the last modified - times of the files we have processed + :returns: dict with the absolute paths and unix timestamp for the last modified times of the files we have processed """
# make sure that we only provide back absolute paths @@ -260,9 +260,7 @@ class DescriptorReader: as a method for pre-populating the listing of descriptor files that we have seen.
- Arguments: - processed_files (dict) - mapping of absolute paths (str) to unix - timestamps for the last modified time (int) + :param dict processed_files: mapping of absolute paths (str) to unix timestamps for the last modified time (int) """
self._processed_files = dict(processed_files) @@ -272,12 +270,11 @@ class DescriptorReader: Registers a listener for files that are skipped. This listener is expected to be a functor of the form...
- my_listener(path, exception) + :: + + my_listener(path, exception)
- Arguments: - listener (functor) - functor to be notified of files that are skipped to - read errors or because they couldn't be parsed as - valid descriptor data + :param functor listener: functor to be notified of files that are skipped to read errors or because they couldn't be parsed as valid descriptor data """
self._skip_listeners.append(listener) @@ -287,9 +284,7 @@ class DescriptorReader: Provides the number of descriptors that are waiting to be iterated over. This is limited to the buffer_size that we were constructed with.
- Returns: - int for the estimated number of currently enqueued descriptors, this is - not entirely reliable + :returns: int for the estimated number of currently enqueued descriptors, this is not entirely reliable """
return self._unreturned_descriptors.qsize() @@ -298,8 +293,7 @@ class DescriptorReader: """ Starts reading our descriptor files.
- Raises: - ValueError if we're already reading the descriptor files + :raises: ValueError if we're already reading the descriptor files """
with self._reader_thread_lock: diff --git a/stem/descriptor/server_descriptor.py b/stem/descriptor/server_descriptor.py index b1b02e3..e19a3bd 100644 --- a/stem/descriptor/server_descriptor.py +++ b/stem/descriptor/server_descriptor.py @@ -3,24 +3,28 @@ Parsing for Tor server descriptors, which contains the infrequently changing information about a Tor relay (contact information, exit policy, public keys, etc). This information is provided from a few sources...
-- control port via 'GETINFO desc/*' queries -- the 'cached-descriptors' file in tor's data directory -- tor metrics, at https://metrics.torproject.org/data.html -- directory authorities and mirrors via their DirPort +* control port via 'GETINFO desc/*' queries +* the 'cached-descriptors' file in tor's data directory +* tor metrics, at https://metrics.torproject.org/data.html +* directory authorities and mirrors via their DirPort
-parse_file - Iterates over the server descriptors in a file. -ServerDescriptor - Tor server descriptor. - | |- RelayDescriptor - Server descriptor for a relay. - | | +- is_valid - checks the signature against the descriptor content - | | - | +- BridgeDescriptor - Scrubbed server descriptor for a bridge. - | |- is_scrubbed - checks if our content has been properly scrubbed - | +- get_scrubbing_issues - description of issues with our scrubbing - | - |- digest - calculates the digest value for our content - |- get_unrecognized_lines - lines with unrecognized content - |- get_annotations - dictionary of content prior to the descriptor entry - +- get_annotation_lines - lines that provided the annotations +**Module Overview:** + +:: + + parse_file - Iterates over the server descriptors in a file. + ServerDescriptor - Tor server descriptor. + | |- RelayDescriptor - Server descriptor for a relay. + | | +- is_valid - checks the signature against the descriptor content + | | + | +- BridgeDescriptor - Scrubbed server descriptor for a bridge. + | |- is_scrubbed - checks if our content has been properly scrubbed + | +- get_scrubbing_issues - description of issues with our scrubbing + | + |- digest - calculates the digest value for our content + |- get_unrecognized_lines - lines with unrecognized content + |- get_annotations - dictionary of content prior to the descriptor entry + +- get_annotation_lines - lines that provided the annotations """
import re @@ -39,7 +43,7 @@ try: import rsa IS_RSA_AVAILABLE = True except ImportError: - log.info("Unable to import the rsa module. Because of this we'll be unable to verify descriptor integrity.") + log.info("Unable to import the rsa module. Because of this we'll be unable to verify descriptor signature integrity.") IS_RSA_AVAILABLE = False
# relay descriptors must have exactly one of the following @@ -75,17 +79,14 @@ def parse_file(descriptor_file, validate = True): Iterates over the server descriptors in a file. This can read either relay or bridge server descriptors.
- Arguments: - descriptor_file (file) - file with descriptor content - validate (bool) - checks the validity of the descriptor's content if - True, skips these checks otherwise + :param file descriptor_file: file with descriptor content + :param bool validate: checks the validity of the descriptor's content if True, skips these checks otherwise
- Returns: - iterator for ServerDescriptor instances in the file + :returns: iterator for ServerDescriptor instances in the file
- Raises: - ValueError if the contents is malformed and validate is True - IOError if the file can't be read + :raises: + * ValueError if the contents is malformed and validate is True + * IOError if the file can't be read """
# Handler for relay descriptors @@ -134,48 +135,46 @@ class ServerDescriptor(stem.descriptor.Descriptor): """ Common parent for server descriptors.
- Attributes: - nickname (str) - relay's nickname (*) - fingerprint (str) - identity key fingerprint - published (datetime) - time in GMT when this descriptor was made (*) - - address (str) - IPv4 address of the relay (*) - or_port (int) - port used for relaying (*) - socks_port (int) - (deprecated, always zero) port used as client (*) - dir_port (int) - port used for descriptor mirroring (*) - - platform (str) - line with operating system and tor version - tor_version (stem.version.Version) - version of tor - operating_system (str) - operating system - uptime (int) - uptime when published in seconds - contact (str) - contact information - exit_policy (stem.exit_policy.ExitPolicy) - stated exit policy (*) - family (list) - nicknames or fingerprints of declared family (*) - - average_bandwidth (int) - averate rate it's willing to relay in bytes/s (*) - burst_bandwidth (int) - burst rate it's willing to relay in bytes/s (*) - observed_bandwidth (int) - estimated capacity based on usage in bytes/s (*) - - link_protocols (list) - link protocols supported by the relay - circuit_protocols (list) - circuit protocols supported by the relay - hibernating (bool) - hibernating when published (*) - allow_single_hop_exits (bool) - flag if single hop exiting is allowed (*) - extra_info_cache (bool) - flag if a mirror for extra-info documents (*) - extra_info_digest (str) - hex encoded digest of our extra-info document - hidden_service_dir (list) - hidden service descriptor versions it stores - eventdns (bool) - (deprecated, always unset) flag for evdns backend - - Deprecated, moved to extra-info descriptor... - read_history_end (datetime) - end of the sampling interval - read_history_interval (int) - seconds per interval - read_history_values (list) - bytes read during each interval - - write_history_end (datetime) - end of the sampling interval - write_history_interval (int) - seconds per interval - write_history_values (list) - bytes written during each interval - - (*) attribute is either required when we're parsed with validation or has a - default value, others are left as None if undefined + :var str nickname: ***** relay's nickname + :var str fingerprint: identity key fingerprint + :var datetime published: ***** time in GMT when this descriptor was made + + :var str address: ***** IPv4 address of the relay + :var int or_port: ***** port used for relaying + :var int socks_port: ***** port used as client (deprecated, always zero) + :var int dir_port: ***** port used for descriptor mirroring + + :var str platform: line with operating system and tor version + :var stem.version.Version tor_version: version of tor + :var str operating_system: operating system + :var int uptime: uptime when published in seconds + :var str contact: contact information + :var stem.exit_policy.ExitPolicy exit_policy: ***** stated exit policy + :var list family: ***** nicknames or fingerprints of declared family + + :var int average_bandwidth: ***** averate rate it's willing to relay in bytes/s + :var int burst_bandwidth: ***** burst rate it's willing to relay in bytes/s + :var int observed_bandwidth: ***** estimated capacity based on usage in bytes/s + + :var list link_protocols: link protocols supported by the relay + :var list circuit_protocols: circuit protocols supported by the relay + :var bool hibernating: ***** hibernating when published + :var bool allow_single_hop_exits: ***** flag if single hop exiting is allowed + :var bool extra_info_cache: ***** flag if a mirror for extra-info documents + :var str extra_info_digest: hex encoded digest of our extra-info document + :var bool eventdns: flag for evdns backend (deprecated, always unset) + + Deprecated, moved to extra-info descriptor... + + :var datetime read_history_end: end of the sampling interval + :var int read_history_interval: seconds per interval + :var list read_history_values: bytes read during each interval + + :var datetime write_history_end: end of the sampling interval + :var int write_history_interval: seconds per interval + :var list write_history_values: bytes written during each interval + + ***** attribute is either required when we're parsed with validation or has a default value, others are left as None if undefined """
def __init__(self, raw_contents, validate = True, annotations = None): @@ -188,14 +187,11 @@ class ServerDescriptor(stem.descriptor.Descriptor): validation can be disables to either improve performance or be accepting of malformed data.
- Arguments: - raw_contents (str) - descriptor content provided by the relay - validate (bool) - checks the validity of the descriptor's content if - True, skips these checks otherwise - annotations (list) - lines that appeared prior to the descriptor + :param str raw_contents: descriptor content provided by the relay + :param bool validate: checks the validity of the descriptor's content if True, skips these checks otherwise + :param list annotations: lines that appeared prior to the descriptor
- Raises: - ValueError if the contents is malformed and validate is True + :raises: ValueError if the contents is malformed and validate is True """
stem.descriptor.Descriptor.__init__(self, raw_contents) @@ -262,11 +258,10 @@ class ServerDescriptor(stem.descriptor.Descriptor): server descriptor entry for this relay.
Note that network status entries exclude the padding, so you'll need to add - a '=' to it so they'll match... - https://en.wikipedia.org/wiki/Base64#Padding + a '=' to it so they'll match (`explanation + https://en.wikipedia.org/wiki/Base64#Padding`_).
- Returns: - str with the digest value for this server descriptor + :returns: str with the digest value for this server descriptor """
raise NotImplementedError("Unsupported Operation: this should be implemented by the ServerDescriptor subclass") @@ -279,11 +274,12 @@ class ServerDescriptor(stem.descriptor.Descriptor): Provides content that appeard prior to the descriptor. If this comes from the cached-descriptors file then this commonly contains content like...
+ :: + @downloaded-at 2012-03-18 21:18:29 @source "173.254.216.66"
- Returns: - dict with the key/value pairs in our annotations + :returns: dict with the key/value pairs in our annotations """
if self._annotation_dict is None: @@ -305,8 +301,7 @@ class ServerDescriptor(stem.descriptor.Descriptor): is the same as the get_annotations() results, but with the unparsed lines and ordering retained.
- Returns: - list with the lines of annotation that came before this descriptor + :returns: list with the lines of annotation that came before this descriptor """
return self._annotation_lines @@ -316,12 +311,10 @@ class ServerDescriptor(stem.descriptor.Descriptor): Parses a series of 'keyword => (value, pgp block)' mappings and applies them as attributes.
- Arguments: - entries (dict) - descriptor contents to be applied - validate (bool) - checks the validity of descriptor content if True + :param dict entries: descriptor contents to be applied + :param bool validate: checks the validity of descriptor content if True
- Raises: - ValueError if an error occures in validation + :raises: ValueError if an error occures in validation """
for keyword, values in entries.items(): @@ -516,13 +509,11 @@ class ServerDescriptor(stem.descriptor.Descriptor): Does a basic check that the entries conform to this descriptor type's constraints.
- Arguments: - entries (dict) - keyword => (value, pgp key) entries - first_keyword (str) - keyword of the first line - last_keyword (str) - keyword of the last line + :param dict entries: keyword => (value, pgp key) entries + :param str first_keyword: keyword of the first line + :param str last_keyword: keyword of the last line
- Raises: - ValueError if an issue arises in validation + :raises: ValueError if an issue arises in validation """
required_fields = self._required_fields() @@ -558,16 +549,13 @@ class ServerDescriptor(stem.descriptor.Descriptor):
class RelayDescriptor(ServerDescriptor): """ - Server descriptor, as specified in... - https://gitweb.torproject.org/torspec.git/blob/HEAD:/dir-spec.txt + Server descriptor (`specification https://gitweb.torproject.org/torspec.git/blob/HEAD:/dir-spec.txt`_)
- Attributes: - onion_key (str) - key used to encrypt EXTEND cells (*) - signing_key (str) - relay's long-term identity key (*) - signature (str) - signature for this descriptor (*) + :var str onion_key: ***** key used to encrypt EXTEND cells + :var str signing_key: ***** relay's long-term identity key + :var str signature: ***** signature for this descriptor
- (*) attribute is either required when we're parsed with validation or has a - default value, others are left as None if undefined + ***** attribute is either required when we're parsed with validation or has a default value, others are left as None if undefined """
def __init__(self, raw_contents, validate = True, annotations = None): @@ -593,8 +581,7 @@ class RelayDescriptor(ServerDescriptor): """ Validates that our content matches our signature.
- Returns: - True if our signature matches our content, False otherwise + :returns: True if our signature matches our content, False otherwise """
raise NotImplementedError # TODO: finish implementing @@ -668,13 +655,9 @@ class RelayDescriptor(ServerDescriptor):
class BridgeDescriptor(ServerDescriptor): """ - Bridge descriptor, as specified in... - https://metrics.torproject.org/formats.html#bridgedesc + Bridge descriptor (`specification https://metrics.torproject.org/formats.html#bridgedesc`_)
- Attributes: - address_alt (list) - alternative for our address/or_port attributes, each - entry is a tuple of the form... - (address (str), port (int), is_ipv6 (bool)) + :var list address_alt: alternative for our address/or_port attributes, each entry is a tuple of the form ``(address (str), port (int), is_ipv6 (bool))`` """
def __init__(self, raw_contents, validate = True, annotations = None): @@ -737,8 +720,7 @@ class BridgeDescriptor(ServerDescriptor): descriptor specification. Validation is a moving target so this may not be fully up to date.
- Returns: - True if we're scrubbed, False otherwise + :returns: True if we're scrubbed, False otherwise """
return self.get_scrubbing_issues() == [] @@ -747,9 +729,7 @@ class BridgeDescriptor(ServerDescriptor): """ Provides issues with our scrubbing.
- Returns: - list of strings which describe issues we have with our scrubbing, this - list is empty if we're properly scrubbed + :returns: list of strings which describe issues we have with our scrubbing, this list is empty if we're properly scrubbed """
if self._scrubbing_issues == None:
tor-commits@lists.torproject.org