commit 0dabab48c6792579498692503afd8076b219662e Author: Karsten Loesing karsten.loesing@gmx.net Date: Fri Jun 28 19:39:12 2013 +0200
Refactor rDNS code and add more execution stats. --- src/org/torproject/onionoo/DetailsDataWriter.java | 117 +++---------- src/org/torproject/onionoo/DocumentStore.java | 4 + src/org/torproject/onionoo/LookupService.java | 20 +++ src/org/torproject/onionoo/Main.java | 48 +++--- src/org/torproject/onionoo/NodeDataWriter.java | 31 ++++ .../onionoo/ReverseDomainNameResolver.java | 178 ++++++++++++++++++++ 6 files changed, 273 insertions(+), 125 deletions(-)
diff --git a/src/org/torproject/onionoo/DetailsDataWriter.java b/src/org/torproject/onionoo/DetailsDataWriter.java index 3d49169..e4ccb78 100644 --- a/src/org/torproject/onionoo/DetailsDataWriter.java +++ b/src/org/torproject/onionoo/DetailsDataWriter.java @@ -2,8 +2,6 @@ * See LICENSE for licensing information */ package org.torproject.onionoo;
-import java.net.InetAddress; -import java.net.UnknownHostException; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; @@ -38,6 +36,8 @@ public class DetailsDataWriter {
private DescriptorSource descriptorSource;
+ private ReverseDomainNameResolver reverseDomainNameResolver; + private DocumentStore documentStore;
private SortedMap<String, NodeStatus> relays; @@ -45,8 +45,10 @@ public class DetailsDataWriter { private SortedMap<String, NodeStatus> bridges;
public DetailsDataWriter(DescriptorSource descriptorSource, + ReverseDomainNameResolver reverseDomainNameResolver, DocumentStore documentStore) { this.descriptorSource = descriptorSource; + this.reverseDomainNameResolver = reverseDomainNameResolver; this.documentStore = documentStore; }
@@ -63,109 +65,28 @@ public class DetailsDataWriter { } }
- private static final long RDNS_LOOKUP_MAX_REQUEST_MILLIS = 10L * 1000L; - private static final long RDNS_LOOKUP_MAX_DURATION_MILLIS = 5L * 60L - * 1000L; - private static final long RDNS_LOOKUP_MAX_AGE_MILLIS = 12L * 60L * 60L - * 1000L; - private static final int RDNS_LOOKUP_WORKERS_NUM = 5; - private Set<String> rdnsLookupJobs; - private Map<String, String> rdnsLookupResults; - private long startedRdnsLookups; - private List<RdnsLookupWorker> rdnsLookupWorkers; public void startReverseDomainNameLookups() { - this.startedRdnsLookups = System.currentTimeMillis(); - this.rdnsLookupJobs = new HashSet<String>(); + Map<String, Long> addressLastLookupTimes = + new HashMap<String, Long>(); for (NodeStatus relay : relays.values()) { - if (relay.getLastRdnsLookup() < this.startedRdnsLookups - - RDNS_LOOKUP_MAX_AGE_MILLIS) { - this.rdnsLookupJobs.add(relay.getAddress()); - } - } - this.rdnsLookupResults = new HashMap<String, String>(); - this.rdnsLookupWorkers = new ArrayList<RdnsLookupWorker>(); - for (int i = 0; i < RDNS_LOOKUP_WORKERS_NUM; i++) { - RdnsLookupWorker rdnsLookupWorker = new RdnsLookupWorker(); - this.rdnsLookupWorkers.add(rdnsLookupWorker); - rdnsLookupWorker.setDaemon(true); - rdnsLookupWorker.start(); + addressLastLookupTimes.put(relay.getAddress(), + relay.getLastRdnsLookup()); } + this.reverseDomainNameResolver.setAddresses(addressLastLookupTimes); + this.reverseDomainNameResolver.startReverseDomainNameLookups(); }
public void finishReverseDomainNameLookups() { - for (RdnsLookupWorker rdnsLookupWorker : this.rdnsLookupWorkers) { - try { - rdnsLookupWorker.join(); - } catch (InterruptedException e) { - /* This is not something that we can take care of. Just leave the - * worker thread alone. */ - } - } - synchronized (this.rdnsLookupResults) { - for (NodeStatus relay : relays.values()) { - if (this.rdnsLookupResults.containsKey(relay.getAddress())) { - relay.setHostName(this.rdnsLookupResults.get( - relay.getAddress())); - relay.setLastRdnsLookup(this.startedRdnsLookups); - } - } - } - } - - private class RdnsLookupWorker extends Thread { - public void run() { - while (System.currentTimeMillis() - RDNS_LOOKUP_MAX_DURATION_MILLIS - <= startedRdnsLookups) { - String rdnsLookupJob = null; - synchronized (rdnsLookupJobs) { - for (String job : rdnsLookupJobs) { - rdnsLookupJob = job; - rdnsLookupJobs.remove(job); - break; - } - } - if (rdnsLookupJob == null) { - break; - } - RdnsLookupRequest request = new RdnsLookupRequest(this, - rdnsLookupJob); - request.setDaemon(true); - request.start(); - try { - Thread.sleep(RDNS_LOOKUP_MAX_REQUEST_MILLIS); - } catch (InterruptedException e) { - /* Getting interrupted should be the default case. */ - } - String hostName = request.getHostName(); - if (hostName != null) { - synchronized (rdnsLookupResults) { - rdnsLookupResults.put(rdnsLookupJob, hostName); - } - } - } - } - } - - private class RdnsLookupRequest extends Thread { - RdnsLookupWorker parent; - String address, hostName; - public RdnsLookupRequest(RdnsLookupWorker parent, String address) { - this.parent = parent; - this.address = address; - } - public void run() { - try { - String result = InetAddress.getByName(this.address).getHostName(); - synchronized (this) { - this.hostName = result; - } - } catch (UnknownHostException e) { - /* We'll try again the next time. */ + this.reverseDomainNameResolver.finishReverseDomainNameLookups(); + Map<String, String> lookupResults = + this.reverseDomainNameResolver.getLookupResults(); + long startedRdnsLookups = + this.reverseDomainNameResolver.getLookupStartMillis(); + for (NodeStatus relay : relays.values()) { + if (lookupResults.containsKey(relay.getAddress())) { + relay.setHostName(lookupResults.get(relay.getAddress())); + relay.setLastRdnsLookup(startedRdnsLookups); } - this.parent.interrupt(); - } - public synchronized String getHostName() { - return hostName; } }
diff --git a/src/org/torproject/onionoo/DocumentStore.java b/src/org/torproject/onionoo/DocumentStore.java index 829f35f..11d7d1b 100644 --- a/src/org/torproject/onionoo/DocumentStore.java +++ b/src/org/torproject/onionoo/DocumentStore.java @@ -374,6 +374,10 @@ public class DocumentStore { }
public void flushDocumentCache() { + /* Write cached node statuses to disk, and write update file + * containing current time. It's important to write the update file + * now, not earlier, because the front-end should not read new node + * statuses until all details, bandwidths, and weights are ready. */ if (this.listedArchivedNodeStatuses) { this.writeNodeStatuses(false); this.writeNodeStatuses(true); diff --git a/src/org/torproject/onionoo/LookupService.java b/src/org/torproject/onionoo/LookupService.java index 4071e26..3791443 100644 --- a/src/org/torproject/onionoo/LookupService.java +++ b/src/org/torproject/onionoo/LookupService.java @@ -379,6 +379,10 @@ public class LookupService { lookupResults.put(addressString, lookupResult); }
+ /* Keep statistics. */ + this.addressesLookedUp += addressStrings.size(); + this.addressesResolved += lookupResults.size(); + return lookupResults; }
@@ -392,4 +396,20 @@ public class LookupService { String aSNumber; String aSName; } + + int addressesLookedUp = 0, addressesResolved = 0; + + public String getStatsString() { + StringBuilder sb = new StringBuilder(); + sb.append(" " + formatDecimalNumber(addressesLookedUp) + + " addresses looked up\n"); + sb.append(" " + formatDecimalNumber(addressesResolved) + + " addresses resolved\n"); + return sb.toString(); + } + + //TODO This method should go into a utility class. + private static String formatDecimalNumber(long decimalNumber) { + return String.format("%,d", decimalNumber); + } } diff --git a/src/org/torproject/onionoo/Main.java b/src/org/torproject/onionoo/Main.java index 2232d81..3fca6f0 100644 --- a/src/org/torproject/onionoo/Main.java +++ b/src/org/torproject/onionoo/Main.java @@ -10,19 +10,17 @@ import java.util.SortedMap; public class Main { public static void main(String[] args) {
- printStatus("Initializing descriptor source."); + printStatus("Initializing."); DescriptorSource dso = new DescriptorSource(new File("in"), new File("status")); printStatusTime("Initialized descriptor source"); - - printStatus("Initializing document store."); DocumentStore ds = new DocumentStore(new File("status"), new File("out")); printStatusTime("Initialized document store"); - - printStatus("Initializing lookup service."); LookupService ls = new LookupService(new File("geoip")); printStatusTime("Initialized Geoip lookup service"); + ReverseDomainNameResolver rdnr = new ReverseDomainNameResolver(); + printStatusTime("Initialized reverse domain name resolver");
printStatus("Updating internal node list."); NodeDataWriter ndw = new NodeDataWriter(dso, ls, ds); @@ -38,13 +36,14 @@ public class Main { printStatusTime("Set running bits"); ndw.writeStatusSummary(); printStatusTime("Wrote status summary"); + ndw.writeOutSummary(); + printStatusTime("Wrote out summary"); SortedMap<String, NodeStatus> currentNodes = ndw.getCurrentNodes(); SortedMap<String, Integer> lastBandwidthWeights = ndw.getLastBandwidthWeights(); - // TODO Could write statistics here, too.
printStatus("Updating detail data."); - DetailsDataWriter ddw = new DetailsDataWriter(dso, ds); + DetailsDataWriter ddw = new DetailsDataWriter(dso, rdnr, ds); // TODO Instead of using ndw's currentNodes and lastBandwidthWeights, // parse statuses once again, keeping separate parse history. Allows // us to run ndw and ddw in parallel in the future. Alternatively, @@ -67,7 +66,6 @@ public class Main { printStatusTime("Finished reverse domain name lookups"); ddw.writeOutDetails(); printStatusTime("Wrote detail data files"); - // TODO Could write statistics here, too.
printStatus("Updating bandwidth data."); BandwidthDataWriter bdw = new BandwidthDataWriter(dso, ds); @@ -81,7 +79,6 @@ public class Main { // future. bdw.deleteObsoleteBandwidthFiles(); printStatusTime("Deleted obsolete bandwidth files"); - // TODO Could write statistics here, too.
printStatus("Updating weights data."); WeightsDataWriter wdw = new WeightsDataWriter(dso, ds); @@ -98,27 +95,24 @@ public class Main { // which allows us to run ndw and wdw in parallel in the future. wdw.deleteObsoleteWeightsDataFiles(); printStatusTime("Deleted obsolete weights files"); - // TODO Could write statistics here, too. - - printStatus("Updating summary data."); - ndw.writeOutSummary(); - printStatusTime("Wrote out summary"); - // TODO Could write statistics here, too. - - // TODO "Shut down" lookup service and write statistics about number - // of (successfully) looked up addresses.
- printStatus("Shutting down descriptor source."); + printStatus("Shutting down."); dso.writeHistoryFiles(); printStatusTime("Wrote parse histories"); - printStatistics(dso.getStatsString()); - printStatusTime("Shut down descriptor source"); - - printStatus("Shutting down document store."); ds.flushDocumentCache(); printStatusTime("Flushed document cache"); - printStatistics(ds.getStatsString()); - printStatusTime("Shut down document store"); + + printStatus("Gathering statistics."); + printStatistics("Node data writer", ndw.getStatsString()); + /* TODO Add statistics to remaining *Writers. */ + //printStatistics("Details data writer", ddw.getStatsString()); + //printStatistics("Bandwidth data writer", bdw.getStatsString()); + //printStatistics("Weights data writer", wdw.getStatsString()); + printStatistics("Descriptor source", dso.getStatsString()); + printStatistics("Document store", ds.getStatsString()); + printStatistics("GeoIP lookup service", ls.getStatsString()); + printStatistics("Reverse domain name resolver", + rdnr.getStatsString());
printStatus("Terminating."); } @@ -131,8 +125,8 @@ public class Main { printedLastStatusMessage = System.currentTimeMillis(); }
- private static void printStatistics(String message) { - System.out.print(" Statistics:\n" + message); + private static void printStatistics(String component, String message) { + System.out.print(" " + component + " statistics:\n" + message); }
private static void printStatusTime(String message) { diff --git a/src/org/torproject/onionoo/NodeDataWriter.java b/src/org/torproject/onionoo/NodeDataWriter.java index 9cfbb21..4afc595 100644 --- a/src/org/torproject/onionoo/NodeDataWriter.java +++ b/src/org/torproject/onionoo/NodeDataWriter.java @@ -33,6 +33,9 @@ public class NodeDataWriter {
private SortedMap<String, Integer> lastBandwidthWeights = null;
+ private int relaysUpdated = 0, relaysAdded = 0, + relayConsensusesProcessed = 0, bridgesUpdated = 0, + bridgesAdded = 0, bridgeStatusesProcessed = 0; public NodeDataWriter(DescriptorSource descriptorSource, LookupService lookupService, DocumentStore documentStore) { this.descriptorSource = descriptorSource; @@ -115,10 +118,13 @@ public class NodeDataWriter { validAfterMillis, null); if (this.knownNodes.containsKey(fingerprint)) { this.knownNodes.get(fingerprint).update(newNodeStatus); + this.relaysUpdated++; } else { this.knownNodes.put(fingerprint, newNodeStatus); + this.relaysAdded++; } } + this.relayConsensusesProcessed++; if (this.relaysLastValidAfterMillis == validAfterMillis) { this.lastBandwidthWeights = consensus.getBandwidthWeights(); } @@ -195,10 +201,13 @@ public class NodeDataWriter { -1L, null, null, publishedMillis, -1L, null); if (this.knownNodes.containsKey(fingerprint)) { this.knownNodes.get(fingerprint).update(newNodeStatus); + this.bridgesUpdated++; } else { this.knownNodes.put(fingerprint, newNodeStatus); + this.bridgesAdded++; } } + this.bridgeStatusesProcessed++; }
public void writeStatusSummary() { @@ -233,5 +242,27 @@ public class NodeDataWriter { public SortedMap<String, Integer> getLastBandwidthWeights() { return this.lastBandwidthWeights; } + + public String getStatsString() { + StringBuilder sb = new StringBuilder(); + sb.append(" " + formatDecimalNumber(relayConsensusesProcessed) + + " relay consensuses processed\n"); + sb.append(" " + formatDecimalNumber(relaysUpdated) + + " relays updated\n"); + sb.append(" " + formatDecimalNumber(relaysAdded) + + " relays added\n"); + sb.append(" " + formatDecimalNumber(bridgeStatusesProcessed) + + " bridge statuses processed\n"); + sb.append(" " + formatDecimalNumber(bridgesUpdated) + + " bridges updated\n"); + sb.append(" " + formatDecimalNumber(bridgesAdded) + + " bridges added\n"); + return sb.toString(); + } + + //TODO This method should go into a utility class. + private static String formatDecimalNumber(long decimalNumber) { + return String.format("%,d", decimalNumber); + } }
diff --git a/src/org/torproject/onionoo/ReverseDomainNameResolver.java b/src/org/torproject/onionoo/ReverseDomainNameResolver.java new file mode 100644 index 0000000..b04ad0a --- /dev/null +++ b/src/org/torproject/onionoo/ReverseDomainNameResolver.java @@ -0,0 +1,178 @@ +/* Copyright 2013 The Tor Project + * See LICENSE for licensing information */ +package org.torproject.onionoo; + +import java.net.InetAddress; +import java.net.UnknownHostException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +public class ReverseDomainNameResolver { + + private class RdnsLookupWorker extends Thread { + public void run() { + while (System.currentTimeMillis() - RDNS_LOOKUP_MAX_DURATION_MILLIS + <= startedRdnsLookups) { + String rdnsLookupJob = null; + synchronized (rdnsLookupJobs) { + for (String job : rdnsLookupJobs) { + rdnsLookupJob = job; + rdnsLookupJobs.remove(job); + break; + } + } + if (rdnsLookupJob == null) { + break; + } + RdnsLookupRequest request = new RdnsLookupRequest(this, + rdnsLookupJob); + request.setDaemon(true); + request.start(); + try { + Thread.sleep(RDNS_LOOKUP_MAX_REQUEST_MILLIS); + } catch (InterruptedException e) { + /* Getting interrupted should be the default case. */ + } + String hostName = request.getHostName(); + if (hostName != null) { + synchronized (rdnsLookupResults) { + rdnsLookupResults.put(rdnsLookupJob, hostName); + } + } + long lookupMillis = request.getLookupMillis(); + if (lookupMillis >= 0L) { + synchronized (rdnsLookupMillis) { + rdnsLookupMillis.add(lookupMillis); + } + } + } + } + } + + private class RdnsLookupRequest extends Thread { + RdnsLookupWorker parent; + String address, hostName; + long lookupStartedMillis = -1L, lookupCompletedMillis = -1L; + public RdnsLookupRequest(RdnsLookupWorker parent, String address) { + this.parent = parent; + this.address = address; + } + public void run() { + this.lookupStartedMillis = System.currentTimeMillis(); + try { + String result = InetAddress.getByName(this.address).getHostName(); + synchronized (this) { + this.hostName = result; + } + } catch (UnknownHostException e) { + /* We'll try again the next time. */ + } + this.lookupCompletedMillis = System.currentTimeMillis(); + this.parent.interrupt(); + } + public synchronized String getHostName() { + return hostName; + } + public synchronized long getLookupMillis() { + return this.lookupCompletedMillis - this.lookupStartedMillis; + } + } + + private static final long RDNS_LOOKUP_MAX_REQUEST_MILLIS = 10L * 1000L; + private static final long RDNS_LOOKUP_MAX_DURATION_MILLIS = 5L * 60L + * 1000L; + private static final long RDNS_LOOKUP_MAX_AGE_MILLIS = 12L * 60L * 60L + * 1000L; + private static final int RDNS_LOOKUP_WORKERS_NUM = 5; + + private Map<String, Long> addressLastLookupTimes; + + private Set<String> rdnsLookupJobs; + + private Map<String, String> rdnsLookupResults; + + private List<Long> rdnsLookupMillis; + + private long startedRdnsLookups; + + private List<RdnsLookupWorker> rdnsLookupWorkers; + + public void setAddresses(Map<String, Long> addressLastLookupTimes) { + this.addressLastLookupTimes = addressLastLookupTimes; + } + + public void startReverseDomainNameLookups() { + this.startedRdnsLookups = System.currentTimeMillis(); + this.rdnsLookupJobs = new HashSet<String>(); + for (Map.Entry<String, Long> e : + this.addressLastLookupTimes.entrySet()) { + if (e.getValue() < this.startedRdnsLookups + - RDNS_LOOKUP_MAX_AGE_MILLIS) { + this.rdnsLookupJobs.add(e.getKey()); + } + } + this.rdnsLookupResults = new HashMap<String, String>(); + this.rdnsLookupMillis = new ArrayList<Long>(); + this.rdnsLookupWorkers = new ArrayList<RdnsLookupWorker>(); + for (int i = 0; i < RDNS_LOOKUP_WORKERS_NUM; i++) { + RdnsLookupWorker rdnsLookupWorker = new RdnsLookupWorker(); + this.rdnsLookupWorkers.add(rdnsLookupWorker); + rdnsLookupWorker.setDaemon(true); + rdnsLookupWorker.start(); + } + } + + public void finishReverseDomainNameLookups() { + for (RdnsLookupWorker rdnsLookupWorker : this.rdnsLookupWorkers) { + try { + rdnsLookupWorker.join(); + } catch (InterruptedException e) { + /* This is not something that we can take care of. Just leave the + * worker thread alone. */ + } + } + } + + public Map<String, String> getLookupResults() { + synchronized (this.rdnsLookupResults) { + return new HashMap<String, String>(this.rdnsLookupResults); + } + } + + public long getLookupStartMillis() { + return this.startedRdnsLookups; + } + + public String getStatsString() { + StringBuilder sb = new StringBuilder(); + sb.append(" " + formatDecimalNumber(rdnsLookupMillis.size()) + + " lookups performed\n"); + if (rdnsLookupMillis.size() > 0) { + Collections.sort(rdnsLookupMillis); + sb.append(" " + formatMillis(rdnsLookupMillis.get(0)) + + " minimum lookup time\n"); + sb.append(" " + formatMillis(rdnsLookupMillis.get( + rdnsLookupMillis.size() / 2)) + " median lookup time\n"); + sb.append(" " + formatMillis(rdnsLookupMillis.get( + rdnsLookupMillis.size() - 1)) + " maximum lookup time\n"); + } + return sb.toString(); + } + + //TODO This method should go into a utility class. + private static String formatDecimalNumber(long decimalNumber) { + return String.format("%,d", decimalNumber); + } + + // TODO This method should go into a utility class. + private static String formatMillis(long millis) { + return String.format("%02d:%02d.%03d minutes", + millis / (1000L * 60L), (millis / 1000L) % 60L, millis % 1000L); + } +} +