[tor-commits] [pyonionoo/master] Remove addresses, flags table; Add get_router_tuple

karsten at torproject.org karsten at torproject.org
Sun Sep 23 19:07:05 UTC 2012


commit 1ec3848f1385ee18598b3d07ab3c25871e83d8db
Author: Sathyanarayanan Gunasekaran <gsathya.ceg at gmail.com>
Date:   Sun Sep 23 09:38:35 2012 +0530

    Remove addresses, flags table; Add get_router_tuple
    
    Use summary table to handle all the data. Add a search, flags,
    and addresses column to the summary table.
    
    get_router_tuple(fields) returns the values corresponding to the
    fields
---
 pyonionoo/database.py |   78 +++++++++++++++----------------------------------
 pyonionoo/parser.py   |   38 ++++++++++++++++++++----
 2 files changed, 56 insertions(+), 60 deletions(-)

diff --git a/pyonionoo/database.py b/pyonionoo/database.py
index be70578..18eb4bc 100644
--- a/pyonionoo/database.py
+++ b/pyonionoo/database.py
@@ -45,30 +45,21 @@ TOTAL_ROWS = 0
 # will be made into an alias for rowid.
 summary_tbl_name = 'summary'
 summary_schema = """
-id INTEGER PRIMARY KEY, 
-type CHARACTER, 
-nickname STRING, 
-fingerprint STRING, 
-running BOOLEAN, 
-time_published STRING, 
-OR_port STRING, 
-dir_port STRING, 
-consensus_weight INTEGER, 
-country_code STRING, 
-hostname STRING, 
-time_lookup STRING
-"""
-
-addresses_tbl_name = 'addresses'
-addresses_schema = """
-id_of_row INTEGER, 
-address STRING
-"""
-
-flags_tbl_name = 'flags'
-flags_schema = """
-id_of_row INTEGER, 
-flag STRING
+id INTEGER PRIMARY KEY,
+type CHARACTER,
+nickname STRING,
+fingerprint STRING,
+running BOOLEAN,
+time_published STRING,
+or_port STRING,
+dir_port STRING,
+consensus_weight INTEGER,
+country_code STRING,
+hostname STRING,
+time_lookup STRING,
+flags STRING,
+addresses STRING,
+search STRING
 """
 
 def _create_table(conn, tbl_name, schema):
@@ -113,8 +104,6 @@ def bootstrap_database(metrics_out, summary_file):
 
     # Create the tables.
     _create_table(conn, summary_tbl_name, summary_schema)
-    _create_table(conn, flags_tbl_name, flags_schema)
-    _create_table(conn, addresses_tbl_name, addresses_schema)
 
     conn.commit()
 
@@ -122,13 +111,16 @@ def bootstrap_database(metrics_out, summary_file):
 
 def update_databases(summary_file=None):
     """
-    Updates all three databases information.
+    Updates the database.
 
     This operation operates as a single transaction.  Therefore,
     the database can be read by other requests while it is being
     performed, and those reads will correspond to the "un-updated"
     database.  Once this function completes, all future reads will be
     from the "updated" database.
+
+    @type summary_file: string
+    @param summary_file: full path to the summary file
     """
     global DB_CREATION_TIME, TABLE_ROWS
 
@@ -145,8 +137,6 @@ def update_databases(summary_file=None):
     logging.info("Deleting data from databases")
     CURSOR = conn.cursor()
     CURSOR.execute('DELETE FROM %s' % summary_tbl_name)
-    CURSOR.execute('DELETE FROM %s' % flags_tbl_name)
-    CURSOR.execute('DELETE FROM %s' %addresses_tbl_name)
     TABLE_ROWS = 0
 
     # Create the summary database.  We could accumulate all the router tuples
@@ -158,11 +148,9 @@ def update_databases(summary_file=None):
     # field in the flags/addresses table.  Here we can avoid all those
     # selects, because the rowid attribute of the cursor is set to that
     # id field right after we execute the (individual) insert statements.
-    summary_fields = ('type', 'nickname', 'fingerprint', 'running',
-                      'time_published', 'OR_port', 'dir_port', 'consensus_weight',
-                      'country_code', 'hostname', 'time_lookup')
-    addresses_fields = ('id_of_row', 'address')
-    flags_fields = ('id_of_row', 'flag')
+    summary_fields = ('type', 'nickname', 'fingerprint', 'running', 'time_published',
+                      'or_port', 'dir_port', 'consensus_weight', 'country_code',
+                      'hostname', 'time_lookup', 'flags', 'addresses', 'search')
 
     insert_stmt = 'insert into %s (%s) values (%s)'
 
@@ -170,14 +158,6 @@ def update_databases(summary_file=None):
     summary_insert_stmt = (insert_stmt % (summary_tbl_name, ','.join(summary_fields),
                                           ','.join(['?']*len(summary_fields))))
 
-    # create insertion statement for addresses table
-    addresses_insert_stmt = (insert_stmt % (addresses_tbl_name, ','.join(addresses_fields),
-                                            ','.join(['?']*len(addresses_fields))))
-
-    # create insertion statement for flags table
-    flags_insert_stmt = (insert_stmt % (flags_tbl_name, ','.join(flags_fields),
-                                        ','.join(['?']*len(flags_fields))))
-
     if not summary_file:
         # raise Exception?
         return
@@ -187,10 +167,7 @@ def update_databases(summary_file=None):
             router = Router()
             router.parse(line)
 
-            router_tuple = (router.type, router.nickname, router.fingerprint,
-                            router.running, router.time_published, router.orport,
-                            router.dirport, router.consensus_weight, router.country_code,
-                            router.hostname, router.time_of_lookup)
+            router_tuple = router.get_router_tuple(summary_fields)
 
             # TODO: Determine whether sqlite3 optimizes by remembering
             # this insert command and not parsing it every time it sees
@@ -201,13 +178,6 @@ def update_databases(summary_file=None):
             id_num = CURSOR.lastrowid
             TABLE_ROWS += 1
 
-            address_values = (id_num, router.address)
-            CURSOR.execute(addresses_insert_stmt, address_values)
-
-            for flag in router.flags:
-                flag_values = (id_num, flag)
-                CURSOR.execute(flags_insert_stmt, flag_values)
-
     conn.commit()
     logging.info("Table updated")
     DB_CREATION_TIME = time.time()
@@ -227,7 +197,7 @@ def query_summary_tbl(running_filter=None, type_filter=None, hex_fingerprint_fil
                       order_asc=True, offset_value=None, limit_value=None,
                       fields=('fingerprint',)):
     conn = get_database_conn()
-
+    cursor = conn.cursor()
     # Build up a WHERE clause based on the request parameters.  We only
     # consider the case in which the client specifies 'search' or
     # some subset (possibly empty) of {'running', 'type', 'lookup', 'country'}.
diff --git a/pyonionoo/parser.py b/pyonionoo/parser.py
index 5105782..b917a92 100644
--- a/pyonionoo/parser.py
+++ b/pyonionoo/parser.py
@@ -10,14 +10,14 @@ class Router:
         self.exit_addresses = None
         self.or_addresses = None
         self.time_published = None
-        self.orport = None
-        self.dirport = None
+        self.or_port = None
+        self.dir_port = None
         self.flags = None
         self.running = False
         self.consensus_weight = None
         self.country_code = None
         self.hostname = None
-        self.time_of_lookup = None
+        self.time_lookup = None
         self.type = None
 
     def parse(self, raw_content):
@@ -46,8 +46,8 @@ class Router:
         
         self.time_published = self._parse_timestamp(values[4] + ' ' + values[5])
         
-        self.orport = int(values[6])
-        self.dirport = int(values[7])
+        self.or_port = int(values[6])
+        self.dir_port = int(values[7])
         self.flags = values[8].split(',')
         for flag in self.flags:
             if flag == "Running":
@@ -55,7 +55,7 @@ class Router:
         self.consensus_weight = int(values[9])
         self.country_code = values[10]
         if values[11] != "null" : self.hostname = values[11]
-        self.time_of_lookup = int(values[12])
+        self.time_lookup = int(values[12])
     
     def _parse_timestamp(self, content):
       """
@@ -67,3 +67,29 @@ class Router:
         return timestamp
       except ValueError:
         raise ValueError("Timestamp wasn't parseable: %s" % line)
+
+    def get_router_tuple(self, fields):
+        """
+        Returns a tuple of values.
+
+        @param type: list/tuple
+        @param fields: attributes of Router for which the values must be returned
+
+        @rtype: tuple
+        @return: list of values corresponding to the fields
+        """
+
+        router_list = []
+        for field in fields:
+            if field == "search":
+                value = ' %s %s %s' % (self.fingerprint, self.nickname,
+                                       self.address)
+            elif field == "flags":
+                value = ' '.join(self.flags)
+                # add leading space
+                value = ' %s' % value
+            else:
+                value = getattr(self, field)
+            router_list.append(value)
+
+        return tuple(router_list)





More information about the tor-commits mailing list