commit 8323b633dbccba0b3e06362cc8961a6f303557d8 Author: Harry Bock hbock@ele.uri.edu Date: Wed Oct 20 00:43:42 2010 -0400
Add metadata to CSV export format.
Add metadata to CSV format specification and implementation (query and export). This allows torbel to know what version of the CSV format to use, allowing for changes in the future and backward compatibility.
Not implemented yet for the JSON format. --- __init__.py | 2 ++ controller.py | 4 ++-- doc/data-spec.txt | 13 +++++++++++++ query.py | 24 ++++++++++++++++++++++++ 4 files changed, 41 insertions(+), 2 deletions(-)
diff --git a/__init__.py b/__init__.py index e69de29..b5bf7e9 100644 --- a/__init__.py +++ b/__init__.py @@ -0,0 +1,2 @@ +# TorBEL export version format. +__export_version__ = 1 diff --git a/controller.py b/controller.py index 29beaff..807ce18 100644 --- a/controller.py +++ b/controller.py @@ -24,7 +24,7 @@ from twisted.internet import reactor
from TorCtl import TorCtl, TorUtil # torbel submodules -from torbel import scheduler, network, utils +from torbel import scheduler, network, utils, __export_version__ from torbel.logger import * from torbel.router import RouterRecord
@@ -598,7 +598,7 @@ class Controller(TorCtl.EventHandler): csv_file = open(fn_new, "w")
out = csv.writer(csv_file, dialect = csv.excel) - + out.writerow(["torbel", __export_version__]) # FIXME: Is it safe to just take the itervalues list? with self.consensus_cache_lock: for router in self.router_cache.itervalues(): diff --git a/doc/data-spec.txt b/doc/data-spec.txt index c5f4014..9c39d87 100644 --- a/doc/data-spec.txt +++ b/doc/data-spec.txt @@ -12,6 +12,8 @@ Status: Draft This document is a work-in-progress and the data format may change quickly during the summer in response to demand and implementation problems.
+ This document describes TorBEL export data format version 1. + 1. Exported Data
1.1. Records @@ -152,6 +154,17 @@ Status: Draft Fields that contain an escaped double quote are also enclosed in a set of double quotes.
+ Metadata indicating the CSV export format version must be present on the first + line in the format: + + torbel,VERSION + + Where VERSION is the export format version as an integer, and torbel is + the literal string 'torbel'. Additional metadata fields may be added + in future export versions and consumers MUST handle extraneous fields + on this line not specified in this document. + + Following the metadata line, all remaining lines are router data rows. The fields of each row are, in order:
ExitAddress, RouterID, Nickname, LastTestedTimestamp, InConsensus, diff --git a/query.py b/query.py index 69e9ec9..16a6e7a 100644 --- a/query.py +++ b/query.py @@ -10,6 +10,7 @@ import ipaddr import sys from socket import inet_aton, inet_ntoa from logger import * +from torbel import __export_version__
if sys.version_info >= (2,6): import json @@ -189,10 +190,14 @@ class ExitPolicyRule: return "reject " + ip + ":" + port
class ExitList: + class ImportError(ValueError): + pass + def __init__(self, filename, status_filename = None): self.cache_ip = {} self.cache_id = {}
+ self.version = None self.next_update = None self.last_update = None self.export_files = [] @@ -286,6 +291,25 @@ class ExitList: the TorBEL data-spec document. """ reader = csv.reader(infile, dialect = "excel") record = 1 + # Grab metadata row and export format version. + metadata = reader.next() + try: + self.version = int(metadata[1]) + if metadata[0] != "torbel": + raise self.ImportError("Invalid TorBEL export format.") + if self.version > __export_version__: + raise self.ImportError("Export version %d not supported!" % self.version) + + # ValueError will be raised if the first value on the metadata line + # is not an integer. + # IndexError is raised if the metadata line is empty. Not quite sure + # if this is actually possible! + # StopIteration is raised if we try to read from an empty file. + # All of these indicate the TorBEL export file is not actually + # a valid export. + except (ValueError, IndexError, StopIteration): + raise self.ImportError("Invalid TorBEL export format.") + for r in reader: try: data = {
tor-commits@lists.torproject.org