[tor-commits] [torbel/master] Add metadata to CSV export format.

sebastian at torproject.org sebastian at torproject.org
Sun Sep 4 07:25:41 UTC 2011


commit 8323b633dbccba0b3e06362cc8961a6f303557d8
Author: Harry Bock <hbock at ele.uri.edu>
Date:   Wed Oct 20 00:43:42 2010 -0400

    Add metadata to CSV export format.
    
    Add metadata to CSV format specification and implementation
    (query and export).  This allows torbel to know what version
    of the CSV format to use, allowing for changes in the future
    and backward compatibility.
    
    Not implemented yet for the JSON format.
---
 __init__.py       |    2 ++
 controller.py     |    4 ++--
 doc/data-spec.txt |   13 +++++++++++++
 query.py          |   24 ++++++++++++++++++++++++
 4 files changed, 41 insertions(+), 2 deletions(-)

diff --git a/__init__.py b/__init__.py
index e69de29..b5bf7e9 100644
--- a/__init__.py
+++ b/__init__.py
@@ -0,0 +1,2 @@
+# TorBEL export version format.
+__export_version__ = 1
diff --git a/controller.py b/controller.py
index 29beaff..807ce18 100644
--- a/controller.py
+++ b/controller.py
@@ -24,7 +24,7 @@ from twisted.internet import reactor
 
 from TorCtl import TorCtl, TorUtil
 # torbel submodules
-from torbel import scheduler, network, utils
+from torbel import scheduler, network, utils, __export_version__
 from torbel.logger import *
 from torbel.router import RouterRecord
 
@@ -598,7 +598,7 @@ class Controller(TorCtl.EventHandler):
                 csv_file = open(fn_new, "w")
                 
             out = csv.writer(csv_file, dialect = csv.excel)
-
+            out.writerow(["torbel", __export_version__])
             # FIXME: Is it safe to just take the itervalues list?
             with self.consensus_cache_lock:
                 for router in self.router_cache.itervalues():
diff --git a/doc/data-spec.txt b/doc/data-spec.txt
index c5f4014..9c39d87 100644
--- a/doc/data-spec.txt
+++ b/doc/data-spec.txt
@@ -12,6 +12,8 @@ Status: Draft
   This document is a work-in-progress and the data format may change quickly
   during the summer in response to demand and implementation problems.
 
+  This document describes TorBEL export data format version 1.
+
 1. Exported Data
 
 1.1. Records
@@ -152,6 +154,17 @@ Status: Draft
       Fields that contain an escaped double quote are also enclosed in a set
       of double quotes.
 
+  Metadata indicating the CSV export format version must be present on the first
+  line in the format:
+  
+    torbel,VERSION
+
+  Where VERSION is the export format version as an integer, and torbel is
+  the literal string 'torbel'. Additional metadata fields may be added
+  in future export versions and consumers MUST handle extraneous fields
+  on this line not specified in this document.
+
+  Following the metadata line, all remaining lines are router data rows.
   The fields of each row are, in order:
 
     ExitAddress, RouterID, Nickname, LastTestedTimestamp, InConsensus,
diff --git a/query.py b/query.py
index 69e9ec9..16a6e7a 100644
--- a/query.py
+++ b/query.py
@@ -10,6 +10,7 @@ import ipaddr
 import sys
 from socket import inet_aton, inet_ntoa
 from logger import *
+from torbel import __export_version__
 
 if sys.version_info >= (2,6):
     import json
@@ -189,10 +190,14 @@ class ExitPolicyRule:
             return "reject " + ip + ":" + port
             
 class ExitList:
+    class ImportError(ValueError):
+        pass
+    
     def __init__(self, filename, status_filename = None):
         self.cache_ip = {}
         self.cache_id = {}
 
+        self.version = None
         self.next_update = None
         self.last_update = None
         self.export_files = []
@@ -286,6 +291,25 @@ class ExitList:
             the TorBEL data-spec document. """
         reader = csv.reader(infile, dialect = "excel")
         record = 1
+        # Grab metadata row and export format version.
+        metadata = reader.next()
+        try:
+            self.version = int(metadata[1])
+            if metadata[0] != "torbel":
+                raise self.ImportError("Invalid TorBEL export format.")
+            if self.version > __export_version__:
+                raise self.ImportError("Export version %d not supported!" % self.version)
+
+        # ValueError will be raised if the first value on the metadata line
+        # is not an integer.
+        # IndexError is raised if the metadata line is empty.  Not quite sure
+        # if this is actually possible!
+        # StopIteration is raised if we try to read from an empty file.
+        # All of these indicate the TorBEL export file is not actually
+        # a valid export.
+        except (ValueError, IndexError, StopIteration):
+            raise self.ImportError("Invalid TorBEL export format.")
+
         for r in reader:
             try:
                 data = {





More information about the tor-commits mailing list