commit 73e5a6989df19923775978428cd9bb21e0a96dc4 Author: Karsten Loesing karsten.loesing@gmx.net Date: Wed Jun 18 11:38:07 2014 +0200
Avoid parsing descriptor contents to Maps.
Extra-info descriptors contain lots of comma-separated key=value lists that we store in SortedMap instances. But those occupy a lot of memory, and it's not certain that we'll ever want to use the contained keys or values.
New approach: when parsing a descriptor, use regular expressions to check if lines are valid, and delay parsing into maps until needed. --- .../descriptor/impl/ExtraInfoDescriptorImpl.java | 90 ++++++++++---------- .../torproject/descriptor/impl/ParseHelper.java | 59 ++++++++----- 2 files changed, 81 insertions(+), 68 deletions(-)
diff --git a/src/org/torproject/descriptor/impl/ExtraInfoDescriptorImpl.java b/src/org/torproject/descriptor/impl/ExtraInfoDescriptorImpl.java index 13fdfa8..836551a 100644 --- a/src/org/torproject/descriptor/impl/ExtraInfoDescriptorImpl.java +++ b/src/org/torproject/descriptor/impl/ExtraInfoDescriptorImpl.java @@ -712,28 +712,28 @@ public class ExtraInfoDescriptorImpl extends DescriptorImpl return this.dirreqStatsIntervalLength; }
- private SortedMap<String, Integer> dirreqV2Ips; + private String dirreqV2Ips; public SortedMap<String, Integer> getDirreqV2Ips() { - return this.dirreqV2Ips == null ? null : - new TreeMap<String, Integer>(this.dirreqV2Ips); + return ParseHelper.convertCommaSeparatedKeyIntegerValueList( + this.dirreqV2Ips); }
- private SortedMap<String, Integer> dirreqV3Ips; + private String dirreqV3Ips; public SortedMap<String, Integer> getDirreqV3Ips() { - return this.dirreqV3Ips == null ? null : - new TreeMap<String, Integer>(this.dirreqV3Ips); + return ParseHelper.convertCommaSeparatedKeyIntegerValueList( + this.dirreqV3Ips); }
- private SortedMap<String, Integer> dirreqV2Reqs; + private String dirreqV2Reqs; public SortedMap<String, Integer> getDirreqV2Reqs() { - return this.dirreqV2Reqs == null ? null : - new TreeMap<String, Integer>(this.dirreqV2Reqs); + return ParseHelper.convertCommaSeparatedKeyIntegerValueList( + this.dirreqV2Reqs); }
- private SortedMap<String, Integer> dirreqV3Reqs; + private String dirreqV3Reqs; public SortedMap<String, Integer> getDirreqV3Reqs() { - return this.dirreqV3Reqs == null ? null : - new TreeMap<String, Integer>(this.dirreqV3Reqs); + return ParseHelper.convertCommaSeparatedKeyIntegerValueList( + this.dirreqV3Reqs); }
private double dirreqV2Share = -1.0; @@ -746,40 +746,40 @@ public class ExtraInfoDescriptorImpl extends DescriptorImpl return this.dirreqV3Share; }
- private SortedMap<String, Integer> dirreqV2Resp; + private String dirreqV2Resp; public SortedMap<String, Integer> getDirreqV2Resp() { - return this.dirreqV2Resp == null ? null : - new TreeMap<String, Integer>(this.dirreqV2Resp); + return ParseHelper.convertCommaSeparatedKeyIntegerValueList( + this.dirreqV2Resp); }
- private SortedMap<String, Integer> dirreqV3Resp; + private String dirreqV3Resp; public SortedMap<String, Integer> getDirreqV3Resp() { - return this.dirreqV3Resp == null ? null : - new TreeMap<String, Integer>(this.dirreqV3Resp); + return ParseHelper.convertCommaSeparatedKeyIntegerValueList( + this.dirreqV3Resp); }
- private SortedMap<String, Integer> dirreqV2DirectDl; + private String dirreqV2DirectDl; public SortedMap<String, Integer> getDirreqV2DirectDl() { - return this.dirreqV2DirectDl == null ? null : - new TreeMap<String, Integer>(this.dirreqV2DirectDl); + return ParseHelper.convertCommaSeparatedKeyIntegerValueList( + this.dirreqV2DirectDl); }
- private SortedMap<String, Integer> dirreqV3DirectDl; + private String dirreqV3DirectDl; public SortedMap<String, Integer> getDirreqV3DirectDl() { - return this.dirreqV3DirectDl == null ? null : - new TreeMap<String, Integer>(this.dirreqV3DirectDl); + return ParseHelper.convertCommaSeparatedKeyIntegerValueList( + this.dirreqV3DirectDl); }
- private SortedMap<String, Integer> dirreqV2TunneledDl; + private String dirreqV2TunneledDl; public SortedMap<String, Integer> getDirreqV2TunneledDl() { - return this.dirreqV2TunneledDl == null ? null : - new TreeMap<String, Integer>(this.dirreqV2TunneledDl); + return ParseHelper.convertCommaSeparatedKeyIntegerValueList( + this.dirreqV2TunneledDl); }
- private SortedMap<String, Integer> dirreqV3TunneledDl; + private String dirreqV3TunneledDl; public SortedMap<String, Integer> getDirreqV3TunneledDl() { - return this.dirreqV3TunneledDl == null ? null : - new TreeMap<String, Integer>(this.dirreqV3TunneledDl); + return ParseHelper.convertCommaSeparatedKeyIntegerValueList( + this.dirreqV3TunneledDl); }
private BandwidthHistory dirreqReadHistory; @@ -802,10 +802,10 @@ public class ExtraInfoDescriptorImpl extends DescriptorImpl return this.entryStatsIntervalLength; }
- private SortedMap<String, Integer> entryIps; + private String entryIps; public SortedMap<String, Integer> getEntryIps() { - return this.entryIps == null ? null : - new TreeMap<String, Integer>(this.entryIps); + return ParseHelper.convertCommaSeparatedKeyIntegerValueList( + this.entryIps); }
private long cellStatsEndMillis = -1L; @@ -904,10 +904,10 @@ public class ExtraInfoDescriptorImpl extends DescriptorImpl return this.geoipStartTimeMillis; }
- private SortedMap<String, Integer> geoipClientOrigins; + private String geoipClientOrigins; public SortedMap<String, Integer> getGeoipClientOrigins() { - return this.geoipClientOrigins == null ? null : - new TreeMap<String, Integer>(this.geoipClientOrigins); + return ParseHelper.convertCommaSeparatedKeyIntegerValueList( + this.geoipClientOrigins); }
private long bridgeStatsEndMillis = -1L; @@ -920,22 +920,22 @@ public class ExtraInfoDescriptorImpl extends DescriptorImpl return this.bridgeStatsIntervalLength; }
- private SortedMap<String, Integer> bridgeIps; + private String bridgeIps; public SortedMap<String, Integer> getBridgeIps() { - return this.bridgeIps == null ? null : - new TreeMap<String, Integer>(this.bridgeIps); + return ParseHelper.convertCommaSeparatedKeyIntegerValueList( + this.bridgeIps); }
- private SortedMap<String, Integer> bridgeIpVersions; + private String bridgeIpVersions; public SortedMap<String, Integer> getBridgeIpVersions() { - return this.bridgeIpVersions == null ? null : - new TreeMap<String, Integer>(this.bridgeIpVersions); + return ParseHelper.convertCommaSeparatedKeyIntegerValueList( + this.bridgeIpVersions); }
- private SortedMap<String, Integer> bridgeIpTransports; + private String bridgeIpTransports; public SortedMap<String, Integer> getBridgeIpTransports() { - return this.bridgeIpTransports == null ? null : - new TreeMap<String, Integer>(this.bridgeIpTransports); + return ParseHelper.convertCommaSeparatedKeyIntegerValueList( + this.bridgeIpTransports); }
private List<String> transports = new ArrayList<String>(); diff --git a/src/org/torproject/descriptor/impl/ParseHelper.java b/src/org/torproject/descriptor/impl/ParseHelper.java index 4fbe34a..048225c 100644 --- a/src/org/torproject/descriptor/impl/ParseHelper.java +++ b/src/org/torproject/descriptor/impl/ParseHelper.java @@ -286,11 +286,13 @@ public class ParseHelper { toUpperCase(); }
- public static SortedMap<String, Integer> - parseCommaSeparatedKeyIntegerValueList(String line, - String[] partsNoOpt, int index, int keyLength) + private static Map<Integer, Pattern> + commaSeparatedKeyValueListPatterns = + new HashMap<Integer, Pattern>(); + public static String parseCommaSeparatedKeyIntegerValueList( + String line, String[] partsNoOpt, int index, int keyLength) throws DescriptorParseException { - SortedMap<String, Integer> result = new TreeMap<String, Integer>(); + String result = ""; if (partsNoOpt.length < index) { throw new DescriptorParseException("Line '" + line + "' does not " + "contain a key-value list at index " + index + "."); @@ -299,26 +301,37 @@ public class ParseHelper { + "unrecognized values beyond the expected key-value list at " + "index " + index + "."); } else if (partsNoOpt.length > index) { - String[] listElements = partsNoOpt[index].split(",", -1); - for (String listElement : listElements) { - String[] keyAndValue = listElement.split("="); - String key = null; - int value = -1; - if (keyAndValue.length == 2 && (keyLength == 0 || - keyAndValue[0].length() == keyLength)) { - try { - value = Integer.parseInt(keyAndValue[1]); - key = keyAndValue[0]; - } catch (NumberFormatException e) { - /* Handle below. */ - } - } - if (key == null) { - throw new DescriptorParseException("Line '" + line + "' " - + "contains an illegal key or value in list element '" - + listElement + "'."); + if (!commaSeparatedKeyValueListPatterns.containsKey(keyLength)) { + String keyPattern = "[0-9a-zA-Z?<>-]" + + (keyLength == 0 ? "+" : "{" + keyLength + "}"); + String valuePattern = "\-?[0-9]{1,9}"; + String patternString = String.format("^%s=%s(,%s=%s)*$", + keyPattern, valuePattern, keyPattern, valuePattern); + commaSeparatedKeyValueListPatterns.put(keyLength, + Pattern.compile(patternString)); + } + Pattern pattern = commaSeparatedKeyValueListPatterns.get( + keyLength); + if (pattern.matcher(partsNoOpt[index]).matches()) { + result = partsNoOpt[index]; + } else { + throw new DescriptorParseException("Line '" + line + "' " + + "contains an illegal key or value."); + } + } + return result; + } + + public static SortedMap<String, Integer> + convertCommaSeparatedKeyIntegerValueList(String validatedString) { + SortedMap<String, Integer> result = null; + if (validatedString != null) { + result = new TreeMap<String, Integer>(); + if (validatedString.contains("=")) { + for (String listElement : validatedString.split(",", -1)) { + String[] keyAndValue = listElement.split("="); + result.put(keyAndValue[0], Integer.parseInt(keyAndValue[1])); } - result.put(key, value); } } return result;
tor-commits@lists.torproject.org