commit 1ec3848f1385ee18598b3d07ab3c25871e83d8db Author: Sathyanarayanan Gunasekaran gsathya.ceg@gmail.com Date: Sun Sep 23 09:38:35 2012 +0530
Remove addresses, flags table; Add get_router_tuple
Use summary table to handle all the data. Add a search, flags, and addresses column to the summary table.
get_router_tuple(fields) returns the values corresponding to the fields --- pyonionoo/database.py | 78 +++++++++++++++---------------------------------- pyonionoo/parser.py | 38 ++++++++++++++++++++---- 2 files changed, 56 insertions(+), 60 deletions(-)
diff --git a/pyonionoo/database.py b/pyonionoo/database.py index be70578..18eb4bc 100644 --- a/pyonionoo/database.py +++ b/pyonionoo/database.py @@ -45,30 +45,21 @@ TOTAL_ROWS = 0 # will be made into an alias for rowid. summary_tbl_name = 'summary' summary_schema = """ -id INTEGER PRIMARY KEY, -type CHARACTER, -nickname STRING, -fingerprint STRING, -running BOOLEAN, -time_published STRING, -OR_port STRING, -dir_port STRING, -consensus_weight INTEGER, -country_code STRING, -hostname STRING, -time_lookup STRING -""" - -addresses_tbl_name = 'addresses' -addresses_schema = """ -id_of_row INTEGER, -address STRING -""" - -flags_tbl_name = 'flags' -flags_schema = """ -id_of_row INTEGER, -flag STRING +id INTEGER PRIMARY KEY, +type CHARACTER, +nickname STRING, +fingerprint STRING, +running BOOLEAN, +time_published STRING, +or_port STRING, +dir_port STRING, +consensus_weight INTEGER, +country_code STRING, +hostname STRING, +time_lookup STRING, +flags STRING, +addresses STRING, +search STRING """
def _create_table(conn, tbl_name, schema): @@ -113,8 +104,6 @@ def bootstrap_database(metrics_out, summary_file):
# Create the tables. _create_table(conn, summary_tbl_name, summary_schema) - _create_table(conn, flags_tbl_name, flags_schema) - _create_table(conn, addresses_tbl_name, addresses_schema)
conn.commit()
@@ -122,13 +111,16 @@ def bootstrap_database(metrics_out, summary_file):
def update_databases(summary_file=None): """ - Updates all three databases information. + Updates the database.
This operation operates as a single transaction. Therefore, the database can be read by other requests while it is being performed, and those reads will correspond to the "un-updated" database. Once this function completes, all future reads will be from the "updated" database. + + @type summary_file: string + @param summary_file: full path to the summary file """ global DB_CREATION_TIME, TABLE_ROWS
@@ -145,8 +137,6 @@ def update_databases(summary_file=None): logging.info("Deleting data from databases") CURSOR = conn.cursor() CURSOR.execute('DELETE FROM %s' % summary_tbl_name) - CURSOR.execute('DELETE FROM %s' % flags_tbl_name) - CURSOR.execute('DELETE FROM %s' %addresses_tbl_name) TABLE_ROWS = 0
# Create the summary database. We could accumulate all the router tuples @@ -158,11 +148,9 @@ def update_databases(summary_file=None): # field in the flags/addresses table. Here we can avoid all those # selects, because the rowid attribute of the cursor is set to that # id field right after we execute the (individual) insert statements. - summary_fields = ('type', 'nickname', 'fingerprint', 'running', - 'time_published', 'OR_port', 'dir_port', 'consensus_weight', - 'country_code', 'hostname', 'time_lookup') - addresses_fields = ('id_of_row', 'address') - flags_fields = ('id_of_row', 'flag') + summary_fields = ('type', 'nickname', 'fingerprint', 'running', 'time_published', + 'or_port', 'dir_port', 'consensus_weight', 'country_code', + 'hostname', 'time_lookup', 'flags', 'addresses', 'search')
insert_stmt = 'insert into %s (%s) values (%s)'
@@ -170,14 +158,6 @@ def update_databases(summary_file=None): summary_insert_stmt = (insert_stmt % (summary_tbl_name, ','.join(summary_fields), ','.join(['?']*len(summary_fields))))
- # create insertion statement for addresses table - addresses_insert_stmt = (insert_stmt % (addresses_tbl_name, ','.join(addresses_fields), - ','.join(['?']*len(addresses_fields)))) - - # create insertion statement for flags table - flags_insert_stmt = (insert_stmt % (flags_tbl_name, ','.join(flags_fields), - ','.join(['?']*len(flags_fields)))) - if not summary_file: # raise Exception? return @@ -187,10 +167,7 @@ def update_databases(summary_file=None): router = Router() router.parse(line)
- router_tuple = (router.type, router.nickname, router.fingerprint, - router.running, router.time_published, router.orport, - router.dirport, router.consensus_weight, router.country_code, - router.hostname, router.time_of_lookup) + router_tuple = router.get_router_tuple(summary_fields)
# TODO: Determine whether sqlite3 optimizes by remembering # this insert command and not parsing it every time it sees @@ -201,13 +178,6 @@ def update_databases(summary_file=None): id_num = CURSOR.lastrowid TABLE_ROWS += 1
- address_values = (id_num, router.address) - CURSOR.execute(addresses_insert_stmt, address_values) - - for flag in router.flags: - flag_values = (id_num, flag) - CURSOR.execute(flags_insert_stmt, flag_values) - conn.commit() logging.info("Table updated") DB_CREATION_TIME = time.time() @@ -227,7 +197,7 @@ def query_summary_tbl(running_filter=None, type_filter=None, hex_fingerprint_fil order_asc=True, offset_value=None, limit_value=None, fields=('fingerprint',)): conn = get_database_conn() - + cursor = conn.cursor() # Build up a WHERE clause based on the request parameters. We only # consider the case in which the client specifies 'search' or # some subset (possibly empty) of {'running', 'type', 'lookup', 'country'}. diff --git a/pyonionoo/parser.py b/pyonionoo/parser.py index 5105782..b917a92 100644 --- a/pyonionoo/parser.py +++ b/pyonionoo/parser.py @@ -10,14 +10,14 @@ class Router: self.exit_addresses = None self.or_addresses = None self.time_published = None - self.orport = None - self.dirport = None + self.or_port = None + self.dir_port = None self.flags = None self.running = False self.consensus_weight = None self.country_code = None self.hostname = None - self.time_of_lookup = None + self.time_lookup = None self.type = None
def parse(self, raw_content): @@ -46,8 +46,8 @@ class Router:
self.time_published = self._parse_timestamp(values[4] + ' ' + values[5])
- self.orport = int(values[6]) - self.dirport = int(values[7]) + self.or_port = int(values[6]) + self.dir_port = int(values[7]) self.flags = values[8].split(',') for flag in self.flags: if flag == "Running": @@ -55,7 +55,7 @@ class Router: self.consensus_weight = int(values[9]) self.country_code = values[10] if values[11] != "null" : self.hostname = values[11] - self.time_of_lookup = int(values[12]) + self.time_lookup = int(values[12])
def _parse_timestamp(self, content): """ @@ -67,3 +67,29 @@ class Router: return timestamp except ValueError: raise ValueError("Timestamp wasn't parseable: %s" % line) + + def get_router_tuple(self, fields): + """ + Returns a tuple of values. + + @param type: list/tuple + @param fields: attributes of Router for which the values must be returned + + @rtype: tuple + @return: list of values corresponding to the fields + """ + + router_list = [] + for field in fields: + if field == "search": + value = ' %s %s %s' % (self.fingerprint, self.nickname, + self.address) + elif field == "flags": + value = ' '.join(self.flags) + # add leading space + value = ' %s' % value + else: + value = getattr(self, field) + router_list.append(value) + + return tuple(router_list)