commit 45403096a550186cfbecc035cd54c125e3b0237c Author: Damian Johnson atagar@torproject.org Date: Thu Dec 6 22:44:17 2012 -0800
Reducing ExitPolicyRule's memory requirements
Frequently there's several ExitPolicyRule entries per an exit policy, so when we pull the consensus we get quite a few instances of this class. Making the following changes to reduce the memory requirements...
* Dropping the rule attribute. The string representation of the rule should be good enough, if not better for callers.
* Replacing the address_type attribute with a method for getting it. This lets us store the address type as an integer within our class.
* Replacing the mask attribute with a method for getting it. The ip mask representation is very rarely useful, so there's little reason to store it unless it's requested.
* Lazily loading the integer reprentation of our address and mask, both speeding up our constructor and avoiding it entirely if our caller never uses is_match()
This lowers the memory requirement for loading the full consensus on my netbook from 5.5% to 5.1% (a 7% drop). This is a drop in the bucket compared to the prior commit, but between the faster constructor runtime and squeezing out a little more performance it's still worth it. --- stem/exit_policy.py | 156 +++++++++++++++++++++++++++----------- stem/util/connection.py | 4 + test/unit/exit_policy/policy.py | 9 +-- test/unit/exit_policy/rule.py | 14 ++-- 4 files changed, 124 insertions(+), 59 deletions(-)
diff --git a/stem/exit_policy.py b/stem/exit_policy.py index a6ae028..f9f0eb8 100644 --- a/stem/exit_policy.py +++ b/stem/exit_policy.py @@ -33,7 +33,9 @@ exiting to a destination is permissible or not. For instance... ExitPolicyRule - Single rule of an exit policy chain |- is_address_wildcard - checks if we'll accept any address |- is_port_wildcard - checks if we'll accept any port + |- get_address_type - provides the protocol our ip address belongs to |- is_match - checks if we match a given destination + |- get_mask - provides the address representation of our mask +- __str__ - string representation for this rule
.. data:: AddressType (enum) @@ -309,6 +311,7 @@ class MicrodescriptorExitPolicy(ExitPolicy): raise ValueError(exc_msg)
super(MicrodescriptorExitPolicy, self).__init__(*rules) + self.set_default_allowed(not self.is_accept)
def __str__(self): return self._policy @@ -334,12 +337,9 @@ class ExitPolicyRule(object):
This should be treated as an immutable object.
- :var str rule: rule that we were originally created from :var bool is_accept: indicates if exiting is allowed or disallowed
- :var stem.exit_policy.AddressType address_type: type of address that we have :var str address: address that this rule is for - :var str mask: subnet mask for the address (ex. "255.255.255.0") :var int masked_bits: number of bits the subnet mask represents, **None** if the mask can't have a bit representation
@@ -352,8 +352,6 @@ class ExitPolicyRule(object): """
def __init__(self, rule): - self.rule = rule - # policy ::= "accept" exitpattern | "reject" exitpattern # exitpattern ::= addrspec ":" portspec
@@ -375,24 +373,27 @@ class ExitPolicyRule(object): raise ValueError("An exitpattern must be of the form 'addrspec:portspec': %s" % rule)
self.address = None - self.address_type = None - self.mask = self.masked_bits = None + self._address_type = None + self.masked_bits = None self.min_port = self.max_port = None
+ # Our mask in ip notation (ex. "255.255.255.0"). This is only set if we + # either have a custom mask that can't be represented by a number of bits, + # or the user has called mask(), lazily loading this. + + self._mask = None + addrspec, portspec = exitpattern.rsplit(":", 1) - self._apply_addrspec(addrspec) - self._apply_portspec(portspec) + self._apply_addrspec(rule, addrspec) + self._apply_portspec(rule, portspec)
- # Pre-calculating the integer representation of our mask and masked - # address. These are used by our is_match() method to compare ourselves to + # The integer representation of our mask and masked address. These are + # lazily loaded and used by our is_match() method to compare ourselves to # other addresses.
- if self.is_address_wildcard(): - # is_match() will short circuit so these are unused - self._mask_bin = self._addr_bin = None - else: - self._mask_bin = int(stem.util.connection.get_address_binary(self.mask), 2) - self._addr_bin = int(stem.util.connection.get_address_binary(self.address), 2) & self._mask_bin + self._mask_bin = self._addr_bin = None + + # Lazily loaded string representation of our policy.
self._str_representation = None
@@ -406,7 +407,7 @@ class ExitPolicyRule(object): :returns: **bool** for if our address matching is a wildcard """
- return self.address_type == AddressType.WILDCARD + return self._address_type == _address_type_to_int(AddressType.WILDCARD)
def is_port_wildcard(self): """ @@ -432,10 +433,12 @@ class ExitPolicyRule(object):
# validate our input and check if the argument doesn't match our address type if address is not None: + address_type = self.get_address_type() + if stem.util.connection.is_valid_ip_address(address): - if self.address_type == AddressType.IPv6: return False + if address_type == AddressType.IPv6: return False elif stem.util.connection.is_valid_ipv6_address(address, allow_brackets = True): - if self.address_type == AddressType.IPv4: return False + if address_type == AddressType.IPv4: return False
address = address.lstrip("[").rstrip("]") else: @@ -453,8 +456,8 @@ class ExitPolicyRule(object): return False else: comparison_addr_bin = int(stem.util.connection.get_address_binary(address), 2) - comparison_addr_bin &= self._mask_bin - if self._addr_bin != comparison_addr_bin: return False + comparison_addr_bin &= self._get_mask_bin() + if self._get_address_bin() != comparison_addr_bin: return False
if not self.is_port_wildcard(): if port is None: @@ -464,6 +467,43 @@ class ExitPolicyRule(object):
return True
+ def get_address_type(self): + """ + Provides the :data:`~stem.exit_policy.AddressType: for our policy. + + :returns: :data:`~stem.exit_policy.AddressType: for the type of address that we have + """ + + return _int_to_address_type(self._address_type) + + def get_mask(self, cache = True): + """ + Provides the address represented by our mask. This is **None** if our + address type is a wildcard. + + :param bool cache: caches the result if **True** + + :returns: str of our subnet mask for the address (ex. "255.255.255.0") + """ + + # Lazy loading our mask because it very infrequently requested. There's + # no reason to usually usse memory for it. + + address_type = self.get_address_type() + + if not self._mask: + if address_type == AddressType.WILDCARD: + mask = None + elif address_type == AddressType.IPv4: + mask = stem.util.connection.get_mask(self.masked_bits) + elif address_type == AddressType.IPv6: + mask = stem.util.connection.get_mask_ipv6(self.masked_bits) + + if not cache: return mask + self._mask = mask + + return self._mask + def __str__(self): """ Provides the string representation of our policy. This does not @@ -479,7 +519,9 @@ class ExitPolicyRule(object): if self.is_address_wildcard(): label += "*:" else: - if self.address_type == AddressType.IPv4: + address_type = self.get_address_type() + + if address_type == AddressType.IPv4: label += self.address else: label += "[%s]" % self.address @@ -489,12 +531,13 @@ class ExitPolicyRule(object): # - use our masked bit count if we can # - use the mask itself otherwise
- if self.mask in (stem.util.connection.FULL_IPv4_MASK, stem.util.connection.FULL_IPv6_MASK): + if (address_type == AddressType.IPv4 and self.masked_bits == 32) or \ + (address_type == AddressType.IPv6 and self.masked_bits == 128): label += ":" - elif not self.masked_bits is None: + elif self.masked_bits is not None: label += "/%i:" % self.masked_bits else: - label += "/%s:" % self.mask + label += "/%s:" % self.get_mask()
if self.is_port_wildcard(): label += "*" @@ -507,7 +550,23 @@ class ExitPolicyRule(object):
return self._str_representation
- def _apply_addrspec(self, addrspec): + def _get_mask_bin(self): + # provides an integer representation of our mask + + if self._mask_bin is None: + self._mask_bin = int(stem.util.connection.get_address_binary(self.get_mask(False)), 2) + + return self._mask_bin + + def _get_address_bin(self): + # provides an integer representation of our address + + if self._addr_bin is None: + self._addr_bin = int(stem.util.connection.get_address_binary(self.address), 2) & self._mask_bin + + return self._addr_bin + + def _apply_addrspec(self, rule, addrspec): # Parses the addrspec... # addrspec ::= "*" | ip4spec | ip6spec
@@ -517,34 +576,34 @@ class ExitPolicyRule(object): self.address, addr_extra = addrspec, None
if addrspec == "*": - self.address_type = AddressType.WILDCARD - self.address = self.mask = self.masked_bits = None + self._address_type = _address_type_to_int(AddressType.WILDCARD) + self.address = self.masked_bits = None elif stem.util.connection.is_valid_ip_address(self.address): # ipv4spec ::= ip4 | ip4 "/" num_ip4_bits | ip4 "/" ip4mask # ip4 ::= an IPv4 address in dotted-quad format # ip4mask ::= an IPv4 mask in dotted-quad format # num_ip4_bits ::= an integer between 0 and 32
- self.address_type = AddressType.IPv4 + self._address_type = _address_type_to_int(AddressType.IPv4)
if addr_extra is None: - self.mask = stem.util.connection.FULL_IPv4_MASK self.masked_bits = 32 elif stem.util.connection.is_valid_ip_address(addr_extra): # provided with an ip4mask - self.mask = addr_extra - try: self.masked_bits = stem.util.connection.get_masked_bits(addr_extra) except ValueError: # mask can't be represented as a number of bits (ex. "255.255.0.255") + self._mask = addr_extra self.masked_bits = None elif addr_extra.isdigit(): # provided with a num_ip4_bits - self.mask = stem.util.connection.get_mask(int(addr_extra)) self.masked_bits = int(addr_extra) + + if self.masked_bits < 0 or self.masked_bits > 32: + raise ValueError("IPv4 masks must be in the range of 0-32 bits") else: - raise ValueError("The '%s' isn't a mask nor number of bits: %s" % (addr_extra, self.rule)) + raise ValueError("The '%s' isn't a mask nor number of bits: %s" % (addr_extra, rule)) elif self.address.startswith("[") and self.address.endswith("]") and \ stem.util.connection.is_valid_ipv6_address(self.address[1:-1]): # ip6spec ::= ip6 | ip6 "/" num_ip6_bits @@ -552,21 +611,22 @@ class ExitPolicyRule(object): # num_ip6_bits ::= an integer between 0 and 128
self.address = stem.util.connection.expand_ipv6_address(self.address[1:-1].upper()) - self.address_type = AddressType.IPv6 + self._address_type = _address_type_to_int(AddressType.IPv6)
if addr_extra is None: - self.mask = stem.util.connection.FULL_IPv6_MASK self.masked_bits = 128 elif addr_extra.isdigit(): # provided with a num_ip6_bits - self.mask = stem.util.connection.get_mask_ipv6(int(addr_extra)) self.masked_bits = int(addr_extra) + + if self.masked_bits < 0 or self.masked_bits > 128: + raise ValueError("IPv6 masks must be in the range of 0-128 bits") else: - raise ValueError("The '%s' isn't a number of bits: %s" % (addr_extra, self.rule)) + raise ValueError("The '%s' isn't a number of bits: %s" % (addr_extra, rule)) else: - raise ValueError("Address isn't a wildcard, IPv4, or IPv6 address: %s" % self.rule) + raise ValueError("Address isn't a wildcard, IPv4, or IPv6 address: %s" % rule)
- def _apply_portspec(self, portspec): + def _apply_portspec(self, rule, portspec): # Parses the portspec... # portspec ::= "*" | port | port "-" port # port ::= an integer between 1 and 65535, inclusive. @@ -581,7 +641,7 @@ class ExitPolicyRule(object): if stem.util.connection.is_valid_port(portspec, allow_zero = True): self.min_port = self.max_port = int(portspec) else: - raise ValueError("'%s' isn't within a valid port range: %s" % (portspec, self.rule)) + raise ValueError("'%s' isn't within a valid port range: %s" % (portspec, rule)) elif "-" in portspec: # provided with a port range port_comp = portspec.split("-", 1) @@ -591,11 +651,11 @@ class ExitPolicyRule(object): self.max_port = int(port_comp[1])
if self.min_port > self.max_port: - raise ValueError("Port range has a lower bound that's greater than its upper bound: %s" % self.rule) + raise ValueError("Port range has a lower bound that's greater than its upper bound: %s" % rule) else: - raise ValueError("Malformed port range: %s" % self.rule) + raise ValueError("Malformed port range: %s" % rule) else: - raise ValueError("Port value isn't a wildcard, integer, or range: %s" % self.rule) + raise ValueError("Port value isn't a wildcard, integer, or range: %s" % rule)
def __eq__(self, other): if isinstance(other, ExitPolicyRule): @@ -608,3 +668,9 @@ class ExitPolicyRule(object): else: return False
+def _address_type_to_int(address_type): + return AddressType.index_of(address_type) + +def _int_to_address_type(address_type_int): + return AddressType[AddressType.keys()[address_type_int]] + diff --git a/stem/util/connection.py b/stem/util/connection.py index a21cde8..0417c87 100644 --- a/stem/util/connection.py +++ b/stem/util/connection.py @@ -165,6 +165,8 @@ def get_mask(bits):
if bits > 32 or bits < 0: raise ValueError("A mask can only be 0-32 bits, got %i" % bits) + elif bits == 32: + return FULL_IPv4_MASK
# get the binary representation of the mask mask_bin = get_binary(2 ** bits - 1, 32)[::-1] @@ -213,6 +215,8 @@ def get_mask_ipv6(bits):
if bits > 128 or bits < 0: raise ValueError("A mask can only be 0-128 bits, got %i" % bits) + elif bits == 128: + return FULL_IPv6_MASK
# get the binary representation of the mask mask_bin = get_binary(2 ** bits - 1, 128)[::-1] diff --git a/test/unit/exit_policy/policy.py b/test/unit/exit_policy/policy.py index 0714ef9..67fbf68 100644 --- a/test/unit/exit_policy/policy.py +++ b/test/unit/exit_policy/policy.py @@ -154,22 +154,19 @@ class TestExitPolicy(unittest.TestCase): if expect_success: self.fail()
def test_microdescriptor_attributes(self): - # checks that its is_accept and ports attributes are properly set + # checks that its is_accept attribute is properly set
# single port policy = MicrodescriptorExitPolicy('accept 443') self.assertTrue(policy.is_accept) - self.assertEquals(set([443]), policy.ports)
# multiple ports policy = MicrodescriptorExitPolicy('accept 80,443') self.assertTrue(policy.is_accept) - self.assertEquals(set([80, 443]), policy.ports)
# port range policy = MicrodescriptorExitPolicy('reject 1-1024') self.assertFalse(policy.is_accept) - self.assertEquals(set(range(1, 1025)), policy.ports)
def test_microdescriptor_can_exit_to(self): test_inputs = { @@ -188,6 +185,6 @@ class TestExitPolicy(unittest.TestCase): # address argument should be ignored policy = MicrodescriptorExitPolicy('accept 80,443')
- self.assertFalse(policy.can_exit_to('blah', 79)) - self.assertTrue(policy.can_exit_to('blah', 80)) + self.assertFalse(policy.can_exit_to('127.0.0.1', 79)) + self.assertTrue(policy.can_exit_to('127.0.0.1', 80))
diff --git a/test/unit/exit_policy/rule.py b/test/unit/exit_policy/rule.py index 55e0c76..994b899 100644 --- a/test/unit/exit_policy/rule.py +++ b/test/unit/exit_policy/rule.py @@ -46,7 +46,6 @@ class TestExitPolicyRule(unittest.TestCase):
for rule_arg in test_inputs: rule = ExitPolicyRule(rule_arg) - self.assertEquals(rule_arg, rule.rule) self.assertEquals(rule_arg, str(rule))
def test_str_changed(self): @@ -60,7 +59,6 @@ class TestExitPolicyRule(unittest.TestCase):
for rule_arg, expected_str in test_inputs.items(): rule = ExitPolicyRule(rule_arg) - self.assertEquals(rule_arg, rule.rule) self.assertEquals(expected_str, str(rule))
def test_valid_wildcard(self): @@ -103,9 +101,9 @@ class TestExitPolicyRule(unittest.TestCase):
def test_wildcard_attributes(self): rule = ExitPolicyRule("reject *:*") - self.assertEquals(AddressType.WILDCARD, rule.address_type) + self.assertEquals(AddressType.WILDCARD, rule.get_address_type()) self.assertEquals(None, rule.address) - self.assertEquals(None, rule.mask) + self.assertEquals(None, rule.get_mask()) self.assertEquals(None, rule.masked_bits) self.assertEquals(1, rule.min_port) self.assertEquals(65535, rule.max_port) @@ -122,9 +120,9 @@ class TestExitPolicyRule(unittest.TestCase): address, mask, masked_bits = attr
rule = ExitPolicyRule("accept %s:*" % rule_addr) - self.assertEquals(AddressType.IPv4, rule.address_type) + self.assertEquals(AddressType.IPv4, rule.get_address_type()) self.assertEquals(address, rule.address) - self.assertEquals(mask, rule.mask) + self.assertEquals(mask, rule.get_mask()) self.assertEquals(masked_bits, rule.masked_bits)
def test_invalid_ipv4_addresses(self): @@ -161,9 +159,9 @@ class TestExitPolicyRule(unittest.TestCase): address, mask, masked_bits = attr
rule = ExitPolicyRule("accept %s:*" % rule_addr) - self.assertEquals(AddressType.IPv6, rule.address_type) + self.assertEquals(AddressType.IPv6, rule.get_address_type()) self.assertEquals(address, rule.address) - self.assertEquals(mask, rule.mask) + self.assertEquals(mask, rule.get_mask()) self.assertEquals(masked_bits, rule.masked_bits)
def test_invalid_ipv6_addresses(self):